diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5c5da50c..108dcf78 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,6 @@
 # Contributing
 Thank you for considering improving this project! By participating, you
-agree to abide by the [code of conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md).
+agree to abide by the [code of conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html).
 
 # Issues (Reporting a problem or suggestion)
 If you've experience a problem with the package, or have a suggestion for it, 
@@ -17,6 +17,7 @@ We'll do our best to answer your question.
 
 # Pull Requests (Making changes to the package)
 We appreciate pull requests that follow these guidelines:
+
 1) Make sure that tests pass (and add new ones if possible). 
 
 2) Do your best to conform to the code style of the package, currently
diff --git a/R/api_process_extract.R b/R/api_process_extract.R
index dc43f63a..2b269713 100644
--- a/R/api_process_extract.R
+++ b/R/api_process_extract.R
@@ -766,7 +766,7 @@ extract_is_completed_and_has_links.micro_extract <- function(extract) {
   is_complete <- extract$status == "completed"
 
   has_codebook <- has_url(download_links, "ddi_codebook")
-  has_data <-  has_url(download_links, "data")
+  has_data <- has_url(download_links, "data")
 
   is_complete && has_codebook && has_data
 }
diff --git a/R/micro_read_chunked.R b/R/micro_read_chunked.R
index 0fac526d..8f997597 100644
--- a/R/micro_read_chunked.R
+++ b/R/micro_read_chunked.R
@@ -168,25 +168,25 @@
 #' # the full dataset in memory
 #' if (requireNamespace("biglm")) {
 #'   lm_results <- read_ipums_micro_chunked(
-#'    ipums_example("cps_00160.xml"),
-#'    IpumsBiglmCallback$new(
-#'      INCTOT ~ AGE + HEALTH, # Model formula
-#'      function(x, pos) {
-#'        x %>%
-#'          mutate(
-#'            INCTOT = lbl_na_if(
-#'              INCTOT,
-#'              ~ grepl("Missing|N.I.U.", .lbl)
-#'            ),
-#'            HEALTH = as_factor(HEALTH)
-#'          )
-#'      }
-#'    ),
-#'    chunk_size = 1000,
-#'    verbose = FALSE
-#'  )
+#'     ipums_example("cps_00160.xml"),
+#'     IpumsBiglmCallback$new(
+#'       INCTOT ~ AGE + HEALTH, # Model formula
+#'       function(x, pos) {
+#'         x %>%
+#'           mutate(
+#'             INCTOT = lbl_na_if(
+#'               INCTOT,
+#'               ~ grepl("Missing|N.I.U.", .lbl)
+#'             ),
+#'             HEALTH = as_factor(HEALTH)
+#'           )
+#'       }
+#'     ),
+#'     chunk_size = 1000,
+#'     verbose = FALSE
+#'   )
 #'
-#'  summary(lm_results)
+#'   summary(lm_results)
 #' }
 read_ipums_micro_chunked <- function(
     ddi,
diff --git a/R/viewer.R b/R/viewer.R
index bf62c6d5..680e29cc 100644
--- a/R/viewer.R
+++ b/R/viewer.R
@@ -61,8 +61,10 @@ ipums_view <- function(x, out_file = NULL, launch = TRUE) {
   if (is.null(out_file)) {
     if (!launch) {
       rlang::warn(c(
-        paste0("Some operating systems may have trouble opening an HTML ",
-               "file from a temporary directory."),
+        paste0(
+          "Some operating systems may have trouble opening an HTML ",
+          "file from a temporary directory."
+        ),
         "i" = "Use `out_file` to specify an alternate output location."
       ))
     }
diff --git a/README.Rmd b/README.Rmd
index f4728d6a..5b1c4da4 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -53,7 +53,7 @@ remotes::install_github("ipums/ipumsr")
 
 ## What is IPUMS?
 
-[IPUMS](https://www.ipums.org/mission-purpose) is the world's largest
+[IPUMS](https://www.ipums.org) is the world's largest
 publicly available population database, providing census and survey data
 from around the world integrated across time and space. IPUMS
 integration and documentation make it easy to study change, conduct
@@ -61,7 +61,7 @@ comparative research, merge information across data types, and analyze
 individuals within family and community context. Data and services are
 available free of charge.
 
-IPUMS consists of multiple projects, or collections, that provide
+IPUMS consists of multiple projects, or *collections*, that provide
 different data products.
 
 -   **Microdata** projects distribute data for individual survey units,
@@ -71,7 +71,7 @@ statistics for particular geographic units along with corresponding
 GIS mapping files.
 
 ipumsr supports different levels of functionality for each IPUMS project, as
-summarized in the following table:
+summarized in the table below.
 
 ```{r}
 #| echo: false
@@ -88,90 +88,90 @@ tbl_config <- list(
   list(
     img = "<a href='https://usa.ipums.org/usa/'><img src='man/figures/logo-square_usa_50x50.png'></a>",
     proj = "<a href='https://usa.ipums.org/usa/'>IPUMS USA</a>",
-    type = "Microdata", 
-    desc = "U.S. Census and American Community Survey microdata (1850-present)", 
-    read = checkmark(), 
-    request = checkmark(), 
+    type = "Microdata",
+    desc = "U.S. Census and American Community Survey microdata (1850-present)",
+    read = checkmark(),
+    request = checkmark(),
     metadata = ""
   ),
   list(
     img = "<a href='https://cps.ipums.org/cps/'><img src='man/figures/logo-square_cps_50x50.png'></a>",
-    proj = "<a href='https://cps.ipums.org/cps/'>IPUMS CPS</a>", 
-    type = "Microdata", 
-    desc = "Current Population Survey microdata including basic monthly surveys and supplements (1962-present)", 
-    read = checkmark(), 
-    request = checkmark(), 
+    proj = "<a href='https://cps.ipums.org/cps/'>IPUMS CPS</a>",
+    type = "Microdata",
+    desc = "Current Population Survey microdata including basic monthly surveys and supplements (1962-present)",
+    read = checkmark(),
+    request = checkmark(),
     metadata = ""
   ),
   list(
     img = "<a href='https://international.ipums.org/international/'><img src='man/figures/logo-square_international_50x50.png'></a>",
     proj = "<a href='https://international.ipums.org/international/'>IPUMS International</a>",
-    type = "Microdata", 
-    desc = "Census microdata covering over 100 countries, contemporary and historical", 
-    read = checkmark(), 
-    request = checkmark(), 
+    type = "Microdata",
+    desc = "Census microdata covering over 100 countries, contemporary and historical",
+    read = checkmark(),
+    request = checkmark(),
     metadata = ""
   ),
   list(
     img = "<a href='https://www.nhgis.org/'><img src='man/figures/logo-square_nhgis50x50.png'></a>",
-    proj = "<a href='https://www.nhgis.org/'>IPUMS NHGIS</a>", 
-    type = "Aggregate Data", 
-    desc = "Tabular U.S. Census data and GIS mapping files (1790-present)", 
+    proj = "<a href='https://www.nhgis.org/'>IPUMS NHGIS</a>",
+    type = "Aggregate Data",
+    desc = "Tabular U.S. Census data and GIS mapping files (1790-present)",
     read = checkmark(),
-    request = checkmark(), 
+    request = checkmark(),
     metadata = checkmark()
   ),
   list(
     img = "<a href='https://ihgis.ipums.org/'><img src='man/figures/logo-square_ihgis_50x50.png'></a>",
-    proj = "<a href='https://ihgis.ipums.org/'>IPUMS IHGIS</a>", 
-    type = "Aggregate Data", 
-    desc = "Tabular and GIS data from population, housing, and agricultural censuses around the world", 
-    read = "", 
-    request = "", 
+    proj = "<a href='https://ihgis.ipums.org/'>IPUMS IHGIS</a>",
+    type = "Aggregate Data",
+    desc = "Tabular and GIS data from population, housing, and agricultural censuses around the world",
+    read = "",
+    request = "",
     metadata = ""
   ),
   list(
     img = "<a href='https://timeuse.ipums.org/'><img src='man/figures/logo-square_time-use_50x50.png'></a>",
-    proj = "<a href='https://timeuse.ipums.org/'>IPUMS Time Use</a>", 
-    type = "Microdata", 
-    desc = "Time use microdata from the U.S. (1930-present) and thirteen other countries (1965-present)", 
-    read = checkmark(), 
-    request = "", 
+    proj = "<a href='https://timeuse.ipums.org/'>IPUMS Time Use</a>",
+    type = "Microdata",
+    desc = "Time use microdata from the U.S. (1930-present) and thirteen other countries (1965-present)",
+    read = checkmark(),
+    request = "",
     metadata = ""
   ),
   list(
     img = "<a href='https://healthsurveys.ipums.org/'><img src='man/figures/logo-square_health-surveys_50x50.png'></a>",
-    proj = "<a href='https://healthsurveys.ipums.org/'>IPUMS Health Surveys</a>", 
-    type = "Microdata", 
+    proj = "<a href='https://healthsurveys.ipums.org/'>IPUMS Health Surveys</a>",
+    type = "Microdata",
     desc = paste0(
       "Microdata from the U.S. ",
       "<a href='https://nhis.ipums.org/nhis/'>National Health Interview Survey (NHIS)</a> (1963-present) and ",
       "<a href='https://meps.ipums.org/meps/'>Medical Expenditure Panel Survey (MEPS)</a> (1996-present)"
     ),
-    read = checkmark(), 
-    request = "", 
+    read = checkmark(),
+    request = "",
     metadata = ""
   ),
   list(
     img = "<a href='https://globalhealth.ipums.org/'><img src='man/figures/logo-square_global-health_50x50.png'></a>",
-    proj = "<a href='https://globalhealth.ipums.org/'>IPUMS Global Health</a>", 
+    proj = "<a href='https://globalhealth.ipums.org/'>IPUMS Global Health</a>",
     type = "Microdata",
     desc = paste0(
       "Health survey microdata for low- and middle-income countries, including ",
       "harmonized data collections for <a href='https://www.idhsdata.org/'>Demographic and Health Surveys (DHS)</a> ",
       "and <a href='https://pma.ipums.org/'>Performance Monitoring for Action (PMA)</a> surveys"
     ),
-    read = checkmark(), 
-    request = "", 
+    read = checkmark(),
+    request = "",
     metadata = ""
   ),
   list(
     img = "<a href='https://highered.ipums.org/highered/'><img src='man/figures/logo-square_higher-ed_50x50.png'></a>",
-    proj = "<a href='https://highered.ipums.org/highered/'>IPUMS Higher Ed</a>", 
-    type = "Microdata", 
-    desc = "Survey microdata on the science and engineering workforce in the U.S. from 1993 to 2013", 
-    read = checkmark(), 
-    request = "", 
+    proj = "<a href='https://highered.ipums.org/highered/'>IPUMS Higher Ed</a>",
+    type = "Microdata",
+    desc = "Survey microdata on the science and engineering workforce in the U.S. from 1993 to 2013",
+    read = checkmark(),
+    request = "",
     metadata = ""
   )
 )
@@ -196,25 +196,28 @@ knitr::kable(
 
 ipumsr uses the [IPUMS API](https://developer.ipums.org/) to submit data
 requests, download data extracts, and get metadata, so the scope of
-ipumsr functionality generally corresponds to the [available API
-functionality](https://developer.ipums.org/docs/v2/apiprogram/apis/). As
+functionality generally corresponds to that [available via the API](https://developer.ipums.org/docs/v2/apiprogram/apis/). As
 the IPUMS team extends the API to support more functionality for more
 projects, we aim to extend ipumsr capabilities accordingly.
 
 ## Getting started
 
 If you're new to IPUMS data, learn more about what's available through
-the [IPUMS Projects Overview](https://www.ipums.org/overview).
+the [IPUMS Projects Overview](https://www.ipums.org/overview). Then, see
+`vignette("ipums")` for an overview of how to obtain IPUMS data.
 
-The package vignettes are the best place to learn about what's available in 
-ipumsr itself:
+The package vignettes are the best place to explore what ipumsr has to offer:
 
 -   To read IPUMS data extracts into R, see `vignette("ipums-read")`.
--   To interact with the IPUMS extract system via the IPUMS API, see 
-    `vignette("ipums-api")`.
+
+-   To interact with the IPUMS extract and metadata system via the IPUMS API, 
+    see `vignette("ipums-api")`.
+    
 -   For additional details about microdata and NHGIS extract requests, see 
     `vignette("ipums-api-micro")` and `vignette("ipums-api-nhgis")`.
+    
 -   To work with labelled values in IPUMS data, see `vignette("value-labels")`.
+
 -   For techniques for working with large data extracts, see
     `vignette("ipums-bigdata")`.
 
@@ -243,9 +246,9 @@ We greatly appreciate feedback and development contributions. Please
 submit any bug reports, pull requests, or other suggestions on
 [GitHub](https://github.com/ipums/ipumsr/issues). Before contributing,
 please be sure to read the [Contributing
-Guidelines](https://github.com/ipums/ipumsr/blob/master/CONTRIBUTING.md)
-and the [Code of
-Conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md).
+Guidelines](https://tech.popdata.org/ipumsr/CONTRIBUTING.html)
+and the 
+[Code of Conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html).
 
 If you have general questions or concerns about IPUMS data, check out
 our [user forum](https://forum.ipums.org) or send an email to
diff --git a/README.md b/README.md
index a4c61cbe..59288cba 100644
--- a/README.md
+++ b/README.md
@@ -42,15 +42,15 @@ remotes::install_github("ipums/ipumsr")
 
 ## What is IPUMS?
 
-[IPUMS](https://www.ipums.org/mission-purpose) is the world’s largest
-publicly available population database, providing census and survey data
-from around the world integrated across time and space. IPUMS
-integration and documentation make it easy to study change, conduct
-comparative research, merge information across data types, and analyze
-individuals within family and community context. Data and services are
-available free of charge.
-
-IPUMS consists of multiple projects, or collections, that provide
+[IPUMS](https://www.ipums.org) is the world’s largest publicly available
+population database, providing census and survey data from around the
+world integrated across time and space. IPUMS integration and
+documentation make it easy to study change, conduct comparative
+research, merge information across data types, and analyze individuals
+within family and community context. Data and services are available
+free of charge.
+
+IPUMS consists of multiple projects, or *collections*, that provide
 different data products.
 
 - **Microdata** projects distribute data for individual survey units,
@@ -60,7 +60,7 @@ different data products.
   GIS mapping files.
 
 ipumsr supports different levels of functionality for each IPUMS
-project, as summarized in the following table:
+project, as summarized in the table below.
 
 <table class="table-hover table-proj-summary">
 <thead>
@@ -298,26 +298,31 @@ from 1993 to 2013
 
 ipumsr uses the [IPUMS API](https://developer.ipums.org/) to submit data
 requests, download data extracts, and get metadata, so the scope of
-ipumsr functionality generally corresponds to the [available API
-functionality](https://developer.ipums.org/docs/v2/apiprogram/apis/). As
-the IPUMS team extends the API to support more functionality for more
-projects, we aim to extend ipumsr capabilities accordingly.
+functionality generally corresponds to that [available via the
+API](https://developer.ipums.org/docs/v2/apiprogram/apis/). As the IPUMS
+team extends the API to support more functionality for more projects, we
+aim to extend ipumsr capabilities accordingly.
 
 ## Getting started
 
 If you’re new to IPUMS data, learn more about what’s available through
-the [IPUMS Projects Overview](https://www.ipums.org/overview).
+the [IPUMS Projects Overview](https://www.ipums.org/overview). Then, see
+`vignette("ipums")` for an overview of how to obtain IPUMS data.
 
-The package vignettes are the best place to learn about what’s available
-in ipumsr itself:
+The package vignettes are the best place to explore what ipumsr has to
+offer:
 
 - To read IPUMS data extracts into R, see `vignette("ipums-read")`.
-- To interact with the IPUMS extract system via the IPUMS API, see
-  `vignette("ipums-api")`.
+
+- To interact with the IPUMS extract and metadata system via the IPUMS
+  API, see `vignette("ipums-api")`.
+
 - For additional details about microdata and NHGIS extract requests, see
   `vignette("ipums-api-micro")` and `vignette("ipums-api-nhgis")`.
+
 - To work with labelled values in IPUMS data, see
   `vignette("value-labels")`.
+
 - For techniques for working with large data extracts, see
   `vignette("ipums-bigdata")`.
 
@@ -346,9 +351,8 @@ We greatly appreciate feedback and development contributions. Please
 submit any bug reports, pull requests, or other suggestions on
 [GitHub](https://github.com/ipums/ipumsr/issues). Before contributing,
 please be sure to read the [Contributing
-Guidelines](https://github.com/ipums/ipumsr/blob/master/CONTRIBUTING.md)
-and the [Code of
-Conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md).
+Guidelines](https://tech.popdata.org/ipumsr/CONTRIBUTING.html) and the
+[Code of Conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html).
 
 If you have general questions or concerns about IPUMS data, check out
 our [user forum](https://forum.ipums.org) or send an email to
diff --git a/docs/CONDUCT.html b/docs/CONDUCT.html
deleted file mode 100644
index d3f87ab2..00000000
--- a/docs/CONDUCT.html
+++ /dev/null
@@ -1,112 +0,0 @@
-<!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><title>Contributor Code of Conduct • ipumsr</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="apple-touch-icon-60x60.png"><script src="deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="deps/bootstrap-5.2.2/bootstrap.min.css" rel="stylesheet"><script src="deps/bootstrap-5.2.2/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="pkgdown.js"></script><link href="extra.css" rel="stylesheet"><meta property="og:title" content="Contributor Code of Conduct"><meta property="og:image" content="http://tech.popdata.org/ipumsr/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
-<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
-<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
-<![endif]--></head><body>
-    <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
-    
-
-    <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
-    
-    <a class="navbar-brand me-2" href="index.html">ipumsr</a>
-
-    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="Released version">0.7.0</small>
-
-    
-    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
-      <span class="navbar-toggler-icon"></span>
-    </button>
-
-    <div id="navbar" class="collapse navbar-collapse ms-3">
-      <ul class="navbar-nav me-auto"><li class="nav-item">
-  <a class="nav-link" href="index.html">
-    <span class="fa fa-home"></span>
-     
-  </a>
-</li>
-<li class="nav-item">
-  <a class="nav-link" href="articles/ipums.html">Get Started</a>
-</li>
-<li class="nav-item">
-  <a class="nav-link" href="reference/index.html">Reference</a>
-</li>
-<li class="nav-item dropdown">
-  <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
-  <div class="dropdown-menu" aria-labelledby="dropdown-articles">
-    <h6 class="dropdown-header" data-toc-skip>IPUMS Data</h6>
-    <a class="dropdown-item" href="articles/ipums-read.html">Read IPUMS data</a>
-    <a class="dropdown-item" href="articles/value-labels.html">Value labels</a>
-    <a class="dropdown-item" href="articles/ipums-bigdata.html">Big IPUMS data</a>
-    <div class="dropdown-divider"></div>
-    <h6 class="dropdown-header" data-toc-skip>IPUMS API</h6>
-    <a class="dropdown-item" href="articles/ipums-api.html">Introduction to the API</a>
-    <a class="dropdown-item" href="articles/ipums-api-micro.html">Microdata API requests</a>
-    <a class="dropdown-item" href="articles/ipums-api-nhgis.html">NHGIS API requests</a>
-    <div class="dropdown-divider"></div>
-    <a class="external-link dropdown-item" href="https://ipums.org/exercises.shtml">Project-specific exercises</a>
-  </div>
-</li>
-<li class="nav-item">
-  <a class="nav-link" href="news/index.html">Changelog</a>
-</li>
-      </ul><form class="form-inline my-2 my-lg-0" role="search">
-        <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
-
-      <ul class="navbar-nav"><li class="nav-item">
-  <a class="external-link nav-link" href="https://github.com/ipums/ipumsr">
-    <span class="fa fa-github"></span>
-     
-  </a>
-</li>
-<li class="nav-item">
-  <a class="external-link nav-link" href="https://forum.ipums.org">
-    <span class="fa fa-users"></span>
-     
-  </a>
-</li>
-<li class="nav-item">
-  <a class="external-link nav-link" href="https://www.ipums.org/">
-    <span class="fa fa-globe"></span>
-     
-  </a>
-</li>
-      </ul></div>
-
-    
-  </div>
-</nav><div class="container template-title-body">
-<div class="row">
-  <main id="main" class="col-md-9"><div class="page-header">
-      <img src="logo.png" class="logo" alt=""><h1>Contributor Code of Conduct</h1>
-      <small class="dont-index">Source: <a href="https://github.com/ipums/ipumsr/blob/HEAD/CONDUCT.md" class="external-link"><code>CONDUCT.md</code></a></small>
-    </div>
-
-<div id="contributor-code-of-conduct" class="section level1">
-
-<p>As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.</p>
-<p>We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.</p>
-<p>Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.</p>
-<p>Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.</p>
-<p>Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.</p>
-<p>This Code of Conduct is adapted from the Contributor Covenant (<a href="http:contributor-covenant.org" class="uri">http:contributor-covenant.org</a>), version 1.0.0, available at <a href="http://contributor-covenant.org/version/1/0/0/" class="external-link uri">http://contributor-covenant.org/version/1/0/0/</a></p>
-</div>
-
-  </main></div>
-
-
-    <footer><div class="pkgdown-footer-left">
-  <p></p><p>Developed by <a href="https://www.ipums.org" class="external-link">IPUMS</a> at the University of Minnesota.</p>
-</div>
-
-<div class="pkgdown-footer-right">
-  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
-</div>
-
-    </footer></div>
-
-  
-
-  
-
-  </body></html>
-
diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html
index 903aef60..08651bbe 100644
--- a/docs/CONTRIBUTING.html
+++ b/docs/CONTRIBUTING.html
@@ -84,7 +84,7 @@ <h6 class="dropdown-header" data-toc-skip>IPUMS API</h6>
 
 <div id="contributing" class="section level1">
 
-<p>Thank you for considering improving this project! By participating, you agree to abide by the <a href="https://github.com/ipums/ipumsr/blob/master/CONDUCT.md" class="external-link">code of conduct</a>.</p>
+<p>Thank you for considering improving this project! By participating, you agree to abide by the <a href="https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html" class="external-link">code of conduct</a>.</p>
 </div>
 <div class="section level1">
 <h1 id="issues-reporting-a-problem-or-suggestion">Issues (Reporting a problem or suggestion)<a class="anchor" aria-label="anchor" href="#issues-reporting-a-problem-or-suggestion"></a></h1>
@@ -93,8 +93,9 @@ <h1 id="issues-reporting-a-problem-or-suggestion">Issues (Reporting a problem or
 </div>
 <div class="section level1">
 <h1 id="pull-requests-making-changes-to-the-package">Pull Requests (Making changes to the package)<a class="anchor" aria-label="anchor" href="#pull-requests-making-changes-to-the-package"></a></h1>
-<p>We appreciate pull requests that follow these guidelines: 1) Make sure that tests pass (and add new ones if possible).</p>
-<ol start="2" style="list-style-type: decimal"><li><p>Do your best to conform to the code style of the package, currently based on the <a href="http://style.tidyverse.org/" class="external-link">tidyverse style guide</a>. See the <a href="https://styler.r-lib.org/" class="external-link">styler</a> package to easily catch stylistic errors.</p></li>
+<p>We appreciate pull requests that follow these guidelines:</p>
+<ol style="list-style-type: decimal"><li><p>Make sure that tests pass (and add new ones if possible).</p></li>
+<li><p>Do your best to conform to the code style of the package, currently based on the <a href="http://style.tidyverse.org/" class="external-link">tidyverse style guide</a>. See the <a href="https://styler.r-lib.org/" class="external-link">styler</a> package to easily catch stylistic errors.</p></li>
 <li><p>Please add you name and affiliation to the NOTICE.txt file.</p></li>
 <li><p>Summarize your changes in the NEWS.md file.</p></li>
 </ol><div class="section level2">
diff --git a/docs/articles/cps_select_data.jpg b/docs/articles/cps_select_data.jpg
new file mode 100644
index 00000000..1b2c8879
Binary files /dev/null and b/docs/articles/cps_select_data.jpg differ
diff --git a/docs/articles/ipums-api-micro.html b/docs/articles/ipums-api-micro.html
index 5de7916b..98ff9856 100644
--- a/docs/articles/ipums-api-micro.html
+++ b/docs/articles/ipums-api-micro.html
@@ -144,8 +144,8 @@ <h2 id="supported-microdata-collections">Supported microdata collections<a class
 <p>In addition to microdata projects, the IPUMS API also supports IPUMS
 NHGIS data. For details about obtaining IPUMS NHGIS data using ipumsr,
 see the <a href="ipums-api-nhgis.html">NHGIS-specific vignette</a>.</p>
-<p>Before getting started, we’ll load ipumsr and <a href="https://dplyr.tidyverse.org/" class="external-link">dplyr</a>, which will be helpful for
-this demo:</p>
+<p>Before getting started, we’ll load ipumsr and <a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a>,
+which will be helpful for this demo:</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://tech.popdata.org/ipumsr/" class="external-link">ipumsr</a></span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span></span></code></pre></div>
@@ -203,7 +203,7 @@ <h2 id="ipums-microdata-metadata-forthcoming">IPUMS microdata metadata (forthcom
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">ipumsi_samps</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/get_sample_info.html">get_sample_info</a></span><span class="op">(</span><span class="st">"ipumsi"</span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">ipumsi_samps</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> </span>
+<span><span class="va">ipumsi_samps</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html" class="external-link">filter</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/grep.html" class="external-link">grepl</a></span><span class="op">(</span><span class="st">"Mexico"</span>, <span class="va">description</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 70 × 2</span></span></span>
 <span><span class="co">#&gt;    name    description       </span></span>
@@ -426,8 +426,8 @@ <h3 id="case-selections">Case selections<a class="anchor" aria-label="anchor" hr
 <span></span>
 <span><span class="co"># For detailed case selection, change the `case_selection_type`</span></span>
 <span><span class="fu"><a href="../reference/var_spec.html">var_spec</a></span><span class="op">(</span></span>
-<span>  <span class="st">"RACE"</span>, </span>
-<span>  case_selections <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"811"</span>, <span class="st">"812"</span><span class="op">)</span>, </span>
+<span>  <span class="st">"RACE"</span>,</span>
+<span>  case_selections <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"811"</span>, <span class="st">"812"</span><span class="op">)</span>,</span>
 <span>  case_selection_type <span class="op">=</span> <span class="st">"detailed"</span></span>
 <span><span class="op">)</span></span>
 <span><span class="co">#&gt; $name</span></span>
@@ -478,7 +478,8 @@ <h3 id="attached-characteristics">Attached characteristics<a class="anchor" aria
 characteristics of other household members. To do so, use the
 <code>attached_characteristics</code> argument of
 <code><a href="../reference/var_spec.html">var_spec()</a></code>.</p>
-<p>For instance, to attach the spouse’s SEX value to a record:</p>
+<p>For instance, to attach the spouse’s <code>SEX</code> value to a
+record:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/var_spec.html">var_spec</a></span><span class="op">(</span><span class="st">"SEX"</span>, attached_characteristics <span class="op">=</span> <span class="st">"spouse"</span><span class="op">)</span></span>
 <span><span class="co">#&gt; $name</span></span>
@@ -489,9 +490,9 @@ <h3 id="attached-characteristics">Attached characteristics<a class="anchor" aria
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; attr(,"class")</span></span>
 <span><span class="co">#&gt; [1] "var_spec"   "ipums_spec" "list"</span></span></code></pre></div>
-<p>This will add a new variable (in this case, SEX_SP) to the output
-data that will contain the sex of a person’s spouse (if no such record
-exists, the value will be 0).</p>
+<p>This will add a new variable (in this case, <code>SEX_SP</code>) to
+the output data that will contain the sex of a person’s spouse (if no
+such record exists, the value will be 0).</p>
 <p>Multiple attached characteristics can be attached for a single
 variable:</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
@@ -525,8 +526,8 @@ <h3 id="data-quality-flags">Data quality flags<a class="anchor" aria-label="anch
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; attr(,"class")</span></span>
 <span><span class="co">#&gt; [1] "var_spec"   "ipums_spec" "list"</span></span></code></pre></div>
-<p>This will produce a new variable (QRACE) containing the data quality
-flag for the given variable.</p>
+<p>This will produce a new variable (<code>QRACE</code>) containing the
+data quality flag for the given variable.</p>
 <p>To add data quality flags for all variables that have them, set
 <code>data_quality_flags = TRUE</code> in your extract definition
 directly:</p>
diff --git a/docs/articles/ipums-api-nhgis.html b/docs/articles/ipums-api-nhgis.html
index a67fe991..79716f76 100644
--- a/docs/articles/ipums-api-nhgis.html
+++ b/docs/articles/ipums-api-nhgis.html
@@ -188,14 +188,14 @@ <h3 id="summary-metadata">Summary metadata<a class="anchor" aria-label="anchor"
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">4</span> 1820_cPop 1820 Census Population Data [US, States &amp; Counties]       401</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">5</span> 1830_cPop 1830 Census Population Data [US, States &amp; Counties]       501</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">6</span> 1840_cAg  1840 Census Agriculture Data [US, States &amp; Counties]      601</span></span></code></pre></div>
-<p>We can use basic functions from <a href="https://dplyr.tidyverse.org/" class="external-link">dplyr</a> to filter the metadata to
-those records of interest. For instance, if we wanted to find all the
-data sources related to agriculture from the 1900 Census, we could
-filter on <code>group</code> and <code>description</code>:</p>
+<p>We can use basic functions from <a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a> to filter the
+metadata to those records of interest. For instance, if we wanted to
+find all the data sources related to agriculture from the 1900 Census,
+we could filter on <code>group</code> and <code>description</code>:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="va">ds</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> </span>
+<code class="sourceCode R"><span><span class="va">ds</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/filter.html" class="external-link">filter</a></span><span class="op">(</span></span>
-<span>    <span class="va">group</span> <span class="op">==</span> <span class="st">"1900 Census"</span>, </span>
+<span>    <span class="va">group</span> <span class="op">==</span> <span class="st">"1900 Census"</span>,</span>
 <span>    <span class="fu"><a href="https://rdrr.io/r/base/grep.html" class="external-link">grepl</a></span><span class="op">(</span><span class="st">"Agriculture"</span>, <span class="va">description</span><span class="op">)</span></span>
 <span>  <span class="op">)</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 2 × 4</span></span></span>
@@ -249,8 +249,9 @@ <h3 id="summary-metadata">Summary metadata<a class="anchor" aria-label="anchor"
 <span><span class="co">#&gt;   <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span>  <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span>            <span style="color: #949494; font-style: italic;">&lt;int&gt;</span></span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">1</span> state  State                4</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">2</span> county State--County       25</span></span></code></pre></div>
-<p>To filter on these columns, we can use <code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">map_lgl()</a></code> from <a href="https://purrr.tidyverse.org/" class="external-link">purrr</a>. For instance, to find all
-time series tables that include data from a particular year:</p>
+<p>To filter on these columns, we can use <code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">map_lgl()</a></code> from
+<a href="https://purrr.tidyverse.org/" class="external-link">purrr</a>. For instance, to find all time series tables that
+include data from a particular year:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="co"># Iterate over each `years` entry, identifying whether that entry</span></span>
 <span><span class="co"># contains "1840" in its `name` column.</span></span>
@@ -262,9 +263,8 @@ <h3 id="summary-metadata">Summary metadata<a class="anchor" aria-label="anchor"
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">1</span> A00   Total Population   Nominal                    100. <span style="color: #949494;">&lt;tibble&gt;</span>    <span style="color: #949494;">&lt;tibble&gt;</span></span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">2</span> A08   Persons by Sex [2] Nominal                    102. <span style="color: #949494;">&lt;tibble&gt;</span>    <span style="color: #949494;">&lt;tibble&gt;</span></span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 1 more variable: geog_levels &lt;list&gt;</span></span></span></code></pre></div>
-<p>For more details on working with nested data frames, see the
-documentation for <a href="https://dplyr.tidyverse.org/" class="external-link">dplyr</a> and
-<a href="https://purrr.tidyverse.org/" class="external-link">purrr</a>.</p>
+<p>For more details on working with nested data frames, see this <a href="https://tidyr.tidyverse.org/articles/nest.html" class="external-link">tidyr
+article</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="detailed-metadata">Detailed metadata<a class="anchor" aria-label="anchor" href="#detailed-metadata"></a>
@@ -275,8 +275,9 @@ <h3 id="detailed-metadata">Detailed metadata<a class="anchor" aria-label="anchor
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cAg_meta</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/get_metadata_nhgis.html">get_metadata_nhgis</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="st">"1900_cAg"</span><span class="op">)</span></span></code></pre></div>
 <p>This provides a comprehensive list of the possible specifications for
-the input data source. For instance, for the 1900_cAg dataset, we have
-66 tables to choose from, and 3 possible geographic levels:</p>
+the input data source. For instance, for the <code>1900_cAg</code>
+dataset, we have 66 tables to choose from, and 3 possible geographic
+levels:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cAg_meta</span><span class="op">$</span><span class="va">data_tables</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 66 × 4</span></span></span>
@@ -352,9 +353,9 @@ <h2 id="defining-an-ipums-nhgis-extract-request">Defining an IPUMS NHGIS extract
 <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" aria-label="anchor" href="#basic-extract-definitions"></a>
 </h3>
 <p>Let’s say we’re interested in getting state-level data on the number
-of farms and their average size from the 1900_cAg dataset that we
-identified above. As we can see in the metadata, these data are
-contained in tables NT2 and NT3:</p>
+of farms and their average size from the <code>1900_cAg</code> dataset
+that we identified above. As we can see in the metadata, these data are
+contained in tables <code>NT2</code> and <code>NT3</code>:</p>
 <div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cAg_meta</span><span class="op">$</span><span class="va">data_tables</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 66 × 4</span></span></span>
@@ -371,6 +372,9 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 <span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span> NT9   AYL        Farms with Buildings                         9</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span> NT10  AWT        Acres of Farmland                           10</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 56 more rows</span></span></span></code></pre></div>
+<div class="section level4">
+<h4 id="dataset-specifications">Dataset specifications<a class="anchor" aria-label="anchor" href="#dataset-specifications"></a>
+</h4>
 <p>To request these data, we need to make an explicit <em>dataset
 specification</em>. All datasets must be associated with a selection of
 data tables and geographic levels. We can use the <code><a href="../reference/ds_spec.html">ds_spec()</a></code>
@@ -380,8 +384,8 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 object):</p>
 <div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">dataset</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ds_spec.html">ds_spec</a></span><span class="op">(</span></span>
-<span>  <span class="st">"1900_cAg"</span>, </span>
-<span>  data_tables <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"NT1"</span>, <span class="st">"NT2"</span><span class="op">)</span>, </span>
+<span>  <span class="st">"1900_cAg"</span>,</span>
+<span>  data_tables <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"NT1"</span>, <span class="st">"NT2"</span><span class="op">)</span>,</span>
 <span>  geog_levels <span class="op">=</span> <span class="st">"state"</span></span>
 <span><span class="op">)</span></span>
 <span></span>
@@ -406,9 +410,13 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 <span><span class="co">#&gt; <span style="color: #0000BB; font-weight: bold;">Dataset: </span>1900_cAg</span></span>
 <span><span class="co">#&gt;   <span style="font-weight: bold;">Tables: </span>NT1, NT2</span></span>
 <span><span class="co">#&gt;   <span style="font-weight: bold;">Geog Levels: </span>state</span></span></code></pre></div>
-<p>(Dataset specifications can also include selections for
+<p>Dataset specifications can also include selections for
 <code>years</code> and <code>breakdown_values</code>, but these are not
-available for all datasets.)</p>
+available for all datasets.</p>
+</div>
+<div class="section level4">
+<h4 id="time-series-table-specifications">Time series table specifications<a class="anchor" aria-label="anchor" href="#time-series-table-specifications"></a>
+</h4>
 <p>Similarly, to make a request for time series tables, use the
 <code><a href="../reference/ds_spec.html">tst_spec()</a></code> helper. This makes a <code>tst_spec</code>
 object containing a time series table specification.</p>
@@ -419,7 +427,7 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/define_extract_nhgis.html">define_extract_nhgis</a></span><span class="op">(</span></span>
 <span>  description <span class="op">=</span> <span class="st">"Example time series table request"</span>,</span>
 <span>  time_series_tables <span class="op">=</span> <span class="fu"><a href="../reference/ds_spec.html">tst_spec</a></span><span class="op">(</span></span>
-<span>    <span class="st">"CW3"</span>, </span>
+<span>    <span class="st">"CW3"</span>,</span>
 <span>    geog_levels <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"county"</span>, <span class="st">"tract"</span><span class="op">)</span>,</span>
 <span>    years <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"1990"</span>, <span class="st">"2000"</span><span class="op">)</span></span>
 <span>  <span class="op">)</span></span>
@@ -430,10 +438,29 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 <span><span class="co">#&gt; <span style="color: #00BB00; font-weight: bold;">Time Series Table: </span>CW3</span></span>
 <span><span class="co">#&gt;   <span style="font-weight: bold;">Geog Levels: </span>county, tract</span></span>
 <span><span class="co">#&gt;   <span style="font-weight: bold;">Years: </span>1990, 2000</span></span></code></pre></div>
+</div>
+<div class="section level4">
+<h4 id="shapefile-specifications">Shapefile specifications<a class="anchor" aria-label="anchor" href="#shapefile-specifications"></a>
+</h4>
+<p>Shapefiles don’t have any additional specification options, and
+therefore can be requested simply by providing their names:</p>
+<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
+<code class="sourceCode R"><span><span class="fu"><a href="../reference/define_extract_nhgis.html">define_extract_nhgis</a></span><span class="op">(</span></span>
+<span>  description <span class="op">=</span> <span class="st">"Example shapefiles request"</span>,</span>
+<span>  shapefiles <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"us_county_2021_tl2021"</span>, <span class="st">"us_county_2020_tl2020"</span><span class="op">)</span></span>
+<span><span class="op">)</span></span>
+<span><span class="co">#&gt; Unsubmitted IPUMS NHGIS extract </span></span>
+<span><span class="co">#&gt; Description: Example shapefiles request</span></span>
+<span><span class="co">#&gt; </span></span>
+<span><span class="co">#&gt; <span style="color: #BBBB00; font-weight: bold;">Shapefiles: </span>us_county_2021_tl2021, us_county_2020_tl2020</span></span></code></pre></div>
+</div>
+<div class="section level4">
+<h4 id="invalid-specifications">Invalid specifications<a class="anchor" aria-label="anchor" href="#invalid-specifications"></a>
+</h4>
 <p>An attempt to define an extract that does not have all the required
 specifications for a given dataset or time series table will throw an
 error:</p>
-<div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
+<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/define_extract_nhgis.html">define_extract_nhgis</a></span><span class="op">(</span></span>
 <span>  description <span class="op">=</span> <span class="st">"Invalid extract"</span>,</span>
 <span>  datasets <span class="op">=</span> <span class="fu"><a href="../reference/ds_spec.html">ds_spec</a></span><span class="op">(</span><span class="st">"1900_STF1"</span>, data_tables <span class="op">=</span> <span class="st">"NP1"</span><span class="op">)</span></span>
@@ -442,20 +469,10 @@ <h3 id="basic-extract-definitions">Basic extract definitions<a class="anchor" ar
 <span><span class="co">#&gt; <span style="color: #BBBB00;">!</span> Invalid `ds_spec` specification:</span></span>
 <span><span class="co">#&gt; <span style="color: #BB0000;">✖</span> `geog_levels` must not contain missing values.</span></span></code></pre></div>
 <p>Note that it is still possible to make invalid extract requests (for
-instance, by requesting a dataset or table that doesn’t exist). This
-kind of issue will be caught upon submission to the API, not upon the
-creation of the extract definition.</p>
-<p>Shapefiles don’t have any additional specification options, and
-therefore can be requested simply by providing their names:</p>
-<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="fu"><a href="../reference/define_extract_nhgis.html">define_extract_nhgis</a></span><span class="op">(</span></span>
-<span>  description <span class="op">=</span> <span class="st">"Example shapefiles request"</span>,</span>
-<span>  shapefiles <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"us_county_2021_tl2021"</span>, <span class="st">"us_county_2020_tl2020"</span><span class="op">)</span></span>
-<span><span class="op">)</span></span>
-<span><span class="co">#&gt; Unsubmitted IPUMS NHGIS extract </span></span>
-<span><span class="co">#&gt; Description: Example shapefiles request</span></span>
-<span><span class="co">#&gt; </span></span>
-<span><span class="co">#&gt; <span style="color: #BBBB00; font-weight: bold;">Shapefiles: </span>us_county_2021_tl2021, us_county_2020_tl2020</span></span></code></pre></div>
+instance, by requesting a dataset or data table that doesn’t exist).
+This kind of issue will be caught upon submission to the API, not upon
+the creation of the extract definition.</p>
+</div>
 </div>
 <div class="section level3">
 <h3 id="more-complicated-extract-definitions">More complicated extract definitions<a class="anchor" aria-label="anchor" href="#more-complicated-extract-definitions"></a>
@@ -489,8 +506,8 @@ <h3 id="more-complicated-extract-definitions">More complicated extract definitio
 easier to generate the specifications independently before creating your
 extract request object. You can quickly create multiple
 <code>ds_spec</code> objects by iterating across the specifications you
-want to include. Here, we use purrr to do so, but you could also use a
-<code>for</code> loop:</p>
+want to include. Here, we use <a href="https://purrr.tidyverse.org/" class="external-link">purrr</a> to do so, but you
+could also use a <code>for</code> loop:</p>
 <div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">ds_names</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"2019_ACS1"</span>, <span class="st">"2018_ACS1"</span><span class="op">)</span></span>
 <span><span class="va">tables</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"B01001"</span>, <span class="st">"B01002"</span><span class="op">)</span></span>
@@ -500,11 +517,7 @@ <h3 id="more-complicated-extract-definitions">More complicated extract definitio
 <span><span class="co"># data tabels and geog levels indicated above</span></span>
 <span><span class="va">datasets</span> <span class="op">&lt;-</span> <span class="fu">purrr</span><span class="fu">::</span><span class="fu"><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">map</a></span><span class="op">(</span></span>
 <span>  <span class="va">ds_names</span>,</span>
-<span>  <span class="op">~</span> <span class="fu"><a href="../reference/ds_spec.html">ds_spec</a></span><span class="op">(</span></span>
-<span>    name <span class="op">=</span> <span class="va">.x</span>, </span>
-<span>    data_tables <span class="op">=</span> <span class="va">tables</span>, </span>
-<span>    geog_levels <span class="op">=</span> <span class="va">geogs</span></span>
-<span>  <span class="op">)</span></span>
+<span>  <span class="op">~</span> <span class="fu"><a href="../reference/ds_spec.html">ds_spec</a></span><span class="op">(</span>name <span class="op">=</span> <span class="va">.x</span>, data_tables <span class="op">=</span> <span class="va">tables</span>, geog_levels <span class="op">=</span> <span class="va">geogs</span><span class="op">)</span></span>
 <span><span class="op">)</span></span>
 <span></span>
 <span><span class="va">nhgis_ext</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/define_extract_nhgis.html">define_extract_nhgis</a></span><span class="op">(</span></span>
diff --git a/docs/articles/ipums-api.html b/docs/articles/ipums-api.html
index 80df8a5e..4699e070 100644
--- a/docs/articles/ipums-api.html
+++ b/docs/articles/ipums-api.html
@@ -122,12 +122,14 @@ <h6 class="dropdown-header" data-toc-skip>IPUMS API</h6>
 <p>The IPUMS API provides two asset types, both of which are supported
 by ipumsr:</p>
 <ul>
-<li><p><strong>IPUMS extract</strong> endpoints can be used to submit
+<li>
+<strong>IPUMS extract</strong> endpoints can be used to submit
 extract requests for processing and download completed extract
-files.</p></li>
-<li><p><strong>IPUMS metadata</strong> endpoints can be used to discover
+files.</li>
+<li>
+<strong>IPUMS metadata</strong> endpoints can be used to discover
 and explore available IPUMS data as well as retrieve codes, names, and
-other extract parameters necessary to form extract requests.</p></li>
+other extract parameters necessary to form extract requests.</li>
 </ul>
 <p>Use of the IPUMS API enables the adoption of a programmatic workflow
 that can help users to:</p>
@@ -565,8 +567,8 @@ <h2 id="share">Share an extract definition<a class="anchor" aria-label="anchor"
 <p>One exciting feature enabled by the IPUMS API is the ability to share
 a standardized extract definition with other IPUMS users so that they
 can create an identical extract request themselves. The terms of use for
-most IPUMS collections prohibit the redistribution of IPUMS data, but
-don’t prohibit sharing data extract definitions.</p>
+most IPUMS collections prohibit the public redistribution of IPUMS data,
+but don’t prohibit the sharing of data extract definitions.</p>
 <p>ipumsr facilitates this type of sharing with
 <code><a href="../reference/save_extract_as_json.html">save_extract_as_json()</a></code> and
 <code><a href="../reference/save_extract_as_json.html">define_extract_from_json()</a></code>, which read and write
diff --git a/docs/articles/ipums-bigdata.html b/docs/articles/ipums-bigdata.html
index f428736d..5385618c 100644
--- a/docs/articles/ipums-bigdata.html
+++ b/docs/articles/ipums-bigdata.html
@@ -121,8 +121,8 @@ <h6 class="dropdown-header" data-toc-skip>IPUMS API</h6>
     
 <p>Browsing for IPUMS data can be a little like grocery shopping when
 you’re hungry—you show up to grab a couple things, but everything looks
-so good that you end up with an overflowing cart<a class="footnote-ref" tabindex="0" data-bs-toggle="popover" data-bs-content="&lt;p&gt;Bonus joke: why is the IPUMS website better than any
-grocery store? More free samples.&lt;/p&gt;"><sup>1</sup></a>. Unfortunately, this
+so good that you end up with an overflowing cart.<a class="footnote-ref" tabindex="0" data-bs-toggle="popover" data-bs-content="&lt;p&gt;Bonus joke: why is the IPUMS website better than any
+grocery store? More free samples.&lt;/p&gt;"><sup>1</sup></a> Unfortunately, this
 can lead to extracts so large that they don’t fit in your computer’s
 memory.</p>
 <p>If you’ve got an extract that’s too big, both the IPUMS website and
@@ -208,8 +208,9 @@ <h3 id="select-cases">Select cases<a class="anchor" aria-label="anchor" href="#s
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; <span style="font-weight: bold;">Samples: </span>(1 total) us2013a</span></span>
 <span><span class="co">#&gt; <span style="font-weight: bold;">Variables: </span>(2 total) MARST, SEX</span></span></code></pre></div>
-<p>If you’re using the online interface, the “Select Cases” option will
-be available on the last page before submitting an extract request.</p>
+<p>If you’re using the online interface, the <strong>Select
+Cases</strong> option will be available on the last page before
+submitting an extract request.</p>
 </div>
 <div class="section level3">
 <h3 id="use-a-sampled-subset-of-the-data">Use a sampled subset of the data<a class="anchor" aria-label="anchor" href="#use-a-sampled-subset-of-the-data"></a>
@@ -217,11 +218,12 @@ <h3 id="use-a-sampled-subset-of-the-data">Use a sampled subset of the data<a cla
 <p>Yet another option (also only for microdata projects) is to take a
 random subsample of the data before producing your extract.</p>
 <p>Sampled data is not available via the IPUMS API, but you can use the
-“Customize Sample Size” option in the online interface to do so. This
-also appears on the final page before submitting an extract request.</p>
-<p>If you’ve already submitted the extract, you can click the “REVISE”
-link on the “Download or Revise Extracts” page to access these features
-and produce a new data extract.</p>
+<strong>Customize Sample Size</strong> option in the online interface to
+do so. This also appears on the final page before submitting an extract
+request.</p>
+<p>If you’ve already submitted the extract, you can click the
+<strong>REVISE</strong> link on the <em>Download or Revise Extracts</em>
+page to access these features and produce a new data extract.</p>
 </div>
 </div>
 <div class="section level2">
@@ -230,14 +232,16 @@ <h2 id="option-3-process-the-data-in-pieces">Option 3: Process the data in piece
 <p>ipumsr provides two related options for reading data sources in
 increments:</p>
 <ul>
-<li><p><em>Chunked</em> functions allow you to specify a function that
-will be called on each chunk of data as it is read in as well as how you
+<li>
+<em>Chunked</em> functions allow you to specify a function that will
+be called on each chunk of data as it is read in as well as how you
 would like the chunks to be combined at the end. These functions use the
 readr <a href="https://readr.tidyverse.org/reference/read_delim_chunked.html" class="external-link">framework</a>
-for reading chunked data.</p></li>
-<li><p><em>Yielded</em> functions allow more flexibility by returning
+for reading chunked data.</li>
+<li>
+<em>Yielded</em> functions allow more flexibility by returning
 control to the user between the loading of each piece of data. These
-functions are unique to ipumsr and fixed-width data.</p></li>
+functions are unique to ipumsr and fixed-width data.</li>
 </ul>
 <div class="section level3">
 <h3 id="reading-chunked-data">Reading chunked data<a class="anchor" aria-label="anchor" href="#reading-chunked-data"></a>
@@ -262,10 +266,9 @@ <h4 id="chunked-tab">Chunked tabulation<a class="anchor" aria-label="anchor" hre
 <p>Imagine we wanted to find the percent of people in the workforce
 grouped by their self-reported health. Since our example extract is
 small enough to fit in memory, we could load the full dataset with
-<code><a href="../reference/read_ipums_micro.html">read_ipums_micro()</a></code>, relabel the <code>EMPSTAT</code>
-variable into a binary variable (see
-<code><a href="../articles/value-labels.html">vignette("value-labels")</a></code>), and count the people in each
-group.</p>
+<code><a href="../reference/read_ipums_micro.html">read_ipums_micro()</a></code>, use <code><a href="../reference/lbl_relabel.html">lbl_relabel()</a></code> to
+relabel the <code>EMPSTAT</code> variable into a binary variable, and
+count the people in each group.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/read_ipums_micro.html">read_ipums_micro</a></span><span class="op">(</span><span class="va">cps_ddi_file</span>, verbose <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span></span>
 <span>  <span class="fu"><a href="https://dplyr.tidyverse.org/reference/mutate.html" class="external-link">mutate</a></span><span class="op">(</span></span>
@@ -345,11 +348,11 @@ <h4 id="chunked-tab">Chunked tabulation<a class="anchor" aria-label="anchor" hre
 <p>In this case, we want to row-bind the data frames returned by
 <code>cb_function()</code>, so we use
 <code>IpumsDataFrameCallback</code>.</p>
-<p>Callback objects are <a href="https://CRAN.R-project.org/package=R6" class="external-link">R6</a> objects, but you
-don’t need to be familiar with R6 to use them<a class="footnote-ref" tabindex="0" data-bs-toggle="popover" data-bs-content='&lt;p&gt;If you’re interested in learning more about R6, check
+<p>Callback objects are <a href="https://r6.r-lib.org" class="external-link">R6</a> objects, but you don’t need to
+be familiar with R6 to use them.<a class="footnote-ref" tabindex="0" data-bs-toggle="popover" data-bs-content='&lt;p&gt;If you’re interested in learning more about R6, check
 out Hadley Wickham’s &lt;a href="https://adv-r.hadley.nz/r6.html?q=R6#r6" class="external-link"&gt;Advanced R&lt;/a&gt; book,
-which is available for free online.&lt;/p&gt;'><sup>2</sup></a>. To initialize a
-callback object, simply use <code>$new()</code>:</p>
+which is available for free online.&lt;/p&gt;'><sup>2</sup></a> To initialize a callback object, simply use
+<code>$new()</code>:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cb</span> <span class="op">&lt;-</span> <span class="va"><a href="../reference/ipums_callback.html">IpumsDataFrameCallback</a></span><span class="op">$</span><span class="fu">new</span><span class="op">(</span><span class="va">cb_function</span><span class="op">)</span></span></code></pre></div>
 <p>At this point, we’re ready to load the data in chunks. We use
@@ -715,15 +718,17 @@ <h2 id="database">Option 4: Use a database<a class="anchor" aria-label="anchor"
 benefits and drawbacks, and the database you choose to use will be
 specific to your use case. However, once you’ve chosen a database, there
 will be two general steps:</p>
-<ul>
-<li><p>Importing data into the database</p></li>
-<li><p>Connecting the database to R</p></li>
-</ul>
-<p>R has several tools that support database integration, including <a href="https://dbi.r-dbi.org/" class="external-link">DBI</a>, <a href="https://dbplyr.tidyverse.org/" class="external-link">dbplyr</a>, <a href="https://spark.rstudio.com/" class="external-link">sparklyr</a>, <a href="https://spark.apache.org/docs/latest/sparkr.html" class="external-link">sparkR</a>, <a href="https://bigrquery.r-dbi.org/" class="external-link">bigrquery</a>, and others. In this
-example, we’ll use RSQLite to load the data into an in-memory database.
-(We use RSQLite because it is easy to set up, but it is likely not
-efficient enough to fully resolve issues with large IPUMS data, so it
-may be wise to consider an alternative in practice.)</p>
+<ol style="list-style-type: decimal">
+<li>Importing data into the database</li>
+<li>Connecting the database to R</li>
+</ol>
+<p>R has several tools that support database integration, including
+<a href="https://dbi.r-dbi.org" class="external-link">DBI</a>, <a href="https://dbplyr.tidyverse.org/" class="external-link">dbplyr</a>, <a href="https://spark.rstudio.com/" class="external-link">sparklyr</a>,
+<a href="https://bigrquery.r-dbi.org" class="external-link">bigrquery</a>, and others. In this example, we’ll use
+<a href="https://rsqlite.r-dbi.org" class="external-link">RSQLite</a> to load the data into an in-memory database. (We
+use RSQLite because it is easy to set up, but it is likely not efficient
+enough to fully resolve issues with large IPUMS data, so it may be wise
+to consider an alternative in practice.)</p>
 <div class="section level4">
 <h4 id="importing-data-into-the-database">Importing data into the database<a class="anchor" aria-label="anchor" href="#importing-data-into-the-database"></a>
 </h4>
@@ -733,9 +738,9 @@ <h4 id="importing-data-into-the-database">Importing data into the database<a cla
 support CSV format), then you can use an ipumsr <code>chunked</code>
 function to load the data into a database without needing to store the
 entire dataset in R.</p>
-<p>(For more about rectangular vs. hierarchical extracts, see the
-“Hierarchical extracts” section of
-<code><a href="../articles/ipums-read.html">vignette("ipums-read")</a></code>.)</p>
+<p>See the <a href="ipums-read.html#hierarchical-extracts">IPUMS data
+reading vignette</a> for more about rectangular vs. hierarchical
+extracts.</p>
 <div class="sourceCode" id="cb25"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dbi.r-dbi.org" class="external-link">DBI</a></span><span class="op">)</span></span>
 <span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://rsqlite.r-dbi.org" class="external-link">RSQLite</a></span><span class="op">)</span></span>
@@ -830,8 +835,8 @@ <h4 id="connecting-to-a-database-with-dbplyr">Connecting to a database with dbpl
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span>    10 October  </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">11</span>    11 November </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">12</span>    12 December</span></span></code></pre></div>
-<p>For more about variable metadata in IPUMS data, see
-<code><a href="../articles/value-labels.html">vignette("value-labels")</a></code>.</p>
+<p>See the <a href="value-labels.html">value labels vignette</a> more
+about variable metadata in IPUMS data.</p>
 </div>
 </div>
 <div class="section level2">
diff --git a/docs/articles/ipums-read.html b/docs/articles/ipums-read.html
index 352c3318..cf898e06 100644
--- a/docs/articles/ipums-read.html
+++ b/docs/articles/ipums-read.html
@@ -132,10 +132,10 @@ <h2 id="ipums-extract-structure">IPUMS extract structure<a class="anchor" aria-l
 all projects will provide multiple files in a data extract. The files
 most relevant to ipumsr are:</p>
 <ul>
-<li><p>The metadata file containing information about the variables
-included in the extract data</p></li>
-<li><p>One or more data files, depending on the project and
-specifications in the extract</p></li>
+<li>The <strong>metadata</strong> file containing information about the
+variables included in the extract data</li>
+<li>One or more <strong>data</strong> files, depending on the project
+and specifications in the extract</li>
 </ul>
 <p>Both of these files are necessary to properly load data into R.
 Obviously, the data files contain the actual data values to be loaded.
@@ -144,8 +144,8 @@ <h2 id="ipums-extract-structure">IPUMS extract structure<a class="anchor" aria-l
 <p>Even for .csv files, the metadata file allows for the addition of
 contextual variable information to the loaded data. This makes it much
 easier to interpret the values in the data variables and effectively use
-them in your data processing pipeline. See the vignette about <a href="value-labels.html">value labels</a> for more information on
-working with these labels.</p>
+them in your data processing pipeline. See the <a href="value-labels.html">value labels</a> vignette for more information
+on working with these labels.</p>
 </div>
 <div class="section level2">
 <h2 id="reading-microdata-extracts">Reading microdata extracts<a class="anchor" aria-label="anchor" href="#reading-microdata-extracts"></a>
@@ -292,13 +292,13 @@ <h2 id="reading-microdata-extracts">Reading microdata extracts<a class="anchor"
 <div class="section level3">
 <h3 id="hierarchical-extracts">Hierarchical extracts<a class="anchor" aria-label="anchor" href="#hierarchical-extracts"></a>
 </h3>
-<p>IPUMS microdata can come in either “rectangular” or “hierarchical”
-format.</p>
+<p>IPUMS microdata can come in either <em>rectangular</em> or
+<em>hierarchical</em> format.</p>
 <p>Rectangular data are transformed such that every row of data
 represents the same type of record. For instance, each row will
 represent a person record, and all household-level information for that
-person will be included in the same row. (This is the case for the CPS
-example above.)</p>
+person will be included in the same row. (This is the case for
+<code>cps_data</code> shown in the <a href="#reading-microdata-extracts">example above</a>.)</p>
 <p>Hierarchical data have records of different types interspersed in a
 single file. For instance, a household record will be included in its
 own row followed by the person records associated with that
@@ -324,11 +324,11 @@ <h3 id="hierarchical-extracts">Hierarchical extracts<a class="anchor" aria-label
 <span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span> H<span style="color: #949494;"> [Househ…</span>  <span style="text-decoration: underline;">1</span>962     84  3<span style="color: #949494;"> [Mar…</span>   <span style="text-decoration: underline;">1</span>790. 27<span style="color: #949494;"> [Min…</span>     <span style="color: #BB0000;">NA</span>    <span style="color: #BB0000;">NA</span>  <span style="color: #BB0000;">NA</span>           </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span> P<span style="color: #949494;"> [Person…</span>  <span style="text-decoration: underline;">1</span>962     84 <span style="color: #BB0000;">NA</span>           <span style="color: #BB0000;">NA</span>  <span style="color: #BB0000;">NA</span>            1  <span style="text-decoration: underline;">1</span>790.  6.38<span style="color: #949494;">e</span>3      </span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 11,043 more rows</span></span></span></code></pre></div>
-<p>The long format consists of a single <code>data.frame</code> that
-includes rows with varying record types. In this example, some rows have
-a record type of “Household” and others have a record type of “Person”.
-Variables that do not apply to a particular record type will be filled
-with <code>NA</code> in rows of that record type.</p>
+<p>The long format consists of a single <a href="https://tibble.tidyverse.org/reference/tbl_df-class.html" class="external-link"><code>tibble</code></a>
+that includes rows with varying record types. In this example, some rows
+have a record type of “Household” and others have a record type of
+“Person”. Variables that do not apply to a particular record type will
+be filled with <code>NA</code> in rows of that record type.</p>
 <p>To read data in list format, use
 <code><a href="../reference/read_ipums_micro.html">read_ipums_micro_list()</a></code>. This function returns a list where
 each element contains all the records for a given record type:</p>
@@ -423,12 +423,13 @@ <h2 id="reading-ipums-nhgis-extracts">Reading IPUMS NHGIS extracts<a class="anch
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; $var_desc</span></span>
 <span><span class="co">#&gt; [1] "Table D6Z: Year Structure Built (Universe: Housing Units)"</span></span></code></pre></div>
-<p>Variable metadata for NHGIS data are slightly different than those
-provided by microdata products. First, they come from a .txt codebook
-file rather than an .xml DDI file. Codebooks can still be loaded into an
-<code>ipums_ddi</code> object, but fields that do not apply to aggregate
-data will be empty. In general, NHGIS codebooks provide only variable
-labels and descriptions, along with citation information.</p>
+<p>However, variable metadata for NHGIS data are slightly different than
+those provided by microdata products. First, they come from a .txt
+codebook file rather than an .xml DDI file. Codebooks can still be
+loaded into an <code>ipums_ddi</code> object, but fields that do not
+apply to aggregate data will be empty. In general, NHGIS codebooks
+provide only variable labels and descriptions, along with citation
+information.</p>
 <div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">nhgis_cb</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis_codebook.html">read_nhgis_codebook</a></span><span class="op">(</span><span class="va">nhgis_ex1</span><span class="op">)</span></span>
 <span></span>
@@ -449,9 +450,10 @@ <h2 id="reading-ipums-nhgis-extracts">Reading IPUMS NHGIS extracts<a class="anch
 <span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span> REGIONA   Region Code                                                <span style="color: #949494;">""</span>      </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span> STATEA    State Code                                                 <span style="color: #949494;">""</span>      </span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 15 more rows</span></span></span></code></pre></div>
-<p>By design, NHGIS codebooks are human-readable. To view the codebook
-contents themselves without converting to an <code>ipums_ddi</code>
-object, set <code>raw = TRUE</code>.</p>
+<p>By design, NHGIS codebooks are human-readable, and it may be easier
+to interpret their contents in raw format. To view the codebook itself
+without converting to an <code>ipums_ddi</code> object, set
+<code>raw = TRUE</code>.</p>
 <div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">nhgis_cb</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis_codebook.html">read_nhgis_codebook</a></span><span class="op">(</span><span class="va">nhgis_ex1</span>, raw <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></span>
 <span></span>
@@ -479,12 +481,9 @@ <h2 id="reading-ipums-nhgis-extracts">Reading IPUMS NHGIS extracts<a class="anch
 <div class="section level3">
 <h3 id="handling-multiple-files">Handling multiple files<a class="anchor" aria-label="anchor" href="#handling-multiple-files"></a>
 </h3>
-<p>In the above example, <code><a href="../reference/read_nhgis_codebook.html">read_nhgis_codebook()</a></code> was able to
-identify and load the codebook file, even though the provided file path
-is the same that was provided to <code><a href="../reference/read_nhgis.html">read_nhgis()</a></code> earlier.
-However, for more complicated NHGIS extracts that include data from
-multiple data sources, the provided .zip archive will contain multiple
-codebook and data files.</p>
+<p>For more complicated NHGIS extracts that include data from multiple
+data sources, the provided .zip archive will contain multiple codebook
+and data files.</p>
 <p>You can view the files contained in an extract to determine if this
 is the case:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
@@ -503,7 +502,6 @@ <h3 id="handling-multiple-files">Handling multiple files<a class="anchor" aria-l
 documentation of the features supported in ipumsr.)</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">nhgis_data2</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis.html">read_nhgis</a></span><span class="op">(</span><span class="va">nhgis_ex2</span>, file_select <span class="op">=</span> <span class="fu"><a href="https://tidyselect.r-lib.org/reference/starts_with.html" class="external-link">contains</a></span><span class="op">(</span><span class="st">"nation"</span><span class="op">)</span><span class="op">)</span></span>
-<span></span>
 <span><span class="va">nhgis_data3</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis.html">read_nhgis</a></span><span class="op">(</span><span class="va">nhgis_ex2</span>, file_select <span class="op">=</span> <span class="fu"><a href="https://tidyselect.r-lib.org/reference/starts_with.html" class="external-link">contains</a></span><span class="op">(</span><span class="st">"ts_nominal_state"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
 <p>The matching codebook should automatically be loaded and attached to
 the data:</p>
@@ -539,7 +537,7 @@ <h3 id="nhgis-data-formats">NHGIS data formats<a class="anchor" aria-label="anch
 <div class="section level4">
 <h4 id="csv-data">CSV data<a class="anchor" aria-label="anchor" href="#csv-data"></a>
 </h4>
-<p>NHGIS data are most easily handled when in .csv format.
+<p>NHGIS data are most easily handled in .csv format.
 <code><a href="../reference/read_nhgis.html">read_nhgis()</a></code> uses <code><a href="https://readr.tidyverse.org/reference/read_delim.html" class="external-link">readr::read_csv()</a></code> to handle
 the generation of column type specifications. If the guessed
 specifications are incorrect, you can use the <code>col_types</code>
@@ -611,12 +609,10 @@ <h4 id="fixed-width-data">Fixed-width data<a class="anchor" aria-label="anchor"
 <span><span class="co">#&gt; <span style="color: #949494;">#   A00AA1940 &lt;dbl&gt;, A00AA1950 &lt;dbl&gt;, A00AA1960 &lt;dbl&gt;, A00AA1970 &lt;dbl&gt;,</span></span></span>
 <span><span class="co">#&gt; <span style="color: #949494;">#   A00AA1980 &lt;dbl&gt;, A00AA1990 &lt;dbl&gt;, A00AA2000 &lt;dbl&gt;, A00AA2010 &lt;dbl&gt;,</span></span></span>
 <span><span class="co">#&gt; <span style="color: #949494;">#   A00AA2020 &lt;dbl&gt;</span></span></span></code></pre></div>
-<p>Note that in this case numeric geographic codes are correctly loaded
-as character variables. The correct parsing of NHGIS fixed-width files
-is driven by the column parsing information contained in the .do file
-provided in the .zip archive. This contains information not only about
-column positions and data types, but also implicit decimals in the
-data.</p>
+<p>The correct parsing of NHGIS fixed-width files is driven by the
+column parsing information contained in the .do file provided in the
+.zip archive. This contains information not only about column positions
+and data types, but also implicit decimals in the data.</p>
 <p>If you no longer have access to the .do file, it is best to resubmit
 and/or re-download the extract (you may also consider converting to .csv
 format in the process). If you have moved the .do file, provide its file
@@ -636,25 +632,21 @@ <h2 id="reading-spatial-data">Reading spatial data<a class="anchor" aria-label="
 </h2>
 <p>IPUMS distributes spatial data for several projects.</p>
 <ul>
-<li><p>For microdata projects, spatial data are distributed in
-shapefiles on dedicated geography pages separate from the standard
-extract system. Look for a “Geography and GIS” link in the “Supplemental
-Data” section of the project’s website to find spatial data files and
-information.</p></li>
-<li><p>For NHGIS, spatial data can be obtained within the extract
-system. Shapefiles will be distributed in their own .zip archive
-alongside the .zip archive containing the extract’s tabular data (if any
-tabular data are requested).</p></li>
+<li>For microdata projects, spatial data are distributed in shapefiles
+on dedicated geography pages separate from the standard extract system.
+Look for a <strong>Geography and GIS</strong> link in the
+<strong>Supplemental Data</strong> section of the project’s website to
+find spatial data files and information.</li>
+<li>For NHGIS, spatial data can be obtained within the extract system.
+Shapefiles will be distributed in their own .zip archive alongside the
+.zip archive containing the extract’s tabular data (if any tabular data
+are requested).</li>
 </ul>
 <p>Use <code><a href="../reference/read_ipums_sf.html">read_ipums_sf()</a></code> to load spatial data from any of
-these sources (ipumsr is phasing out support for objects from the
-<code>sp</code> package. If you prefer to work with these objects, use
-<code><a href="https://r-spatial.github.io/sf/reference/coerce-methods.html" class="external-link">sf::as_Spatial()</a></code> to convert from <code>sf</code> to
-<code>sp</code>).</p>
+these sources as an <code>sf</code> object from <a href="https://r-spatial.github.io/sf/" class="external-link">sf</a>.</p>
 <p><code><a href="../reference/read_ipums_sf.html">read_ipums_sf()</a></code> also supports the loading of spatial
 files within .zip archives and the <code>file_select</code> syntax for
-file selection (we don’t need <code>file_select</code> in this example
-because there is only one shapefile in this example extract).</p>
+file selection when multiple internal files are present.</p>
 <div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">nhgis_shp_file</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ipums_example.html">ipums_example</a></span><span class="op">(</span><span class="st">"nhgis0972_shape_small.zip"</span><span class="op">)</span></span>
 <span></span>
@@ -677,8 +669,8 @@ <h2 id="reading-spatial-data">Reading spatial data<a class="anchor" aria-label="
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">6</span> 1640  1642    21      G1640   1640      <span style="text-decoration: underline;">5</span>608<span style="text-decoration: underline;">404</span>797.   <span style="text-decoration: underline;">415</span>671. G16421640</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 1 more variable: geometry &lt;MULTIPOLYGON [m]&gt;</span></span></span></code></pre></div>
 <p>These data can then be joined to associated tabular data. To preserve
-IPUMS attributes from the tabular data used in the join, use
-an<code>ipums_shape_*_join</code> function:</p>
+IPUMS attributes from the tabular data used in the join, use an
+<code>ipums_shape_*_join()</code> function:</p>
 <div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">joined_data</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ipums_shape_join.html">ipums_shape_left_join</a></span><span class="op">(</span></span>
 <span>  <span class="va">nhgis_data</span>,</span>
@@ -692,12 +684,12 @@ <h2 id="reading-spatial-data">Reading spatial data<a class="anchor" aria-label="
 <span><span class="co">#&gt; </span></span>
 <span><span class="co">#&gt; $var_desc</span></span>
 <span><span class="co">#&gt; [1] ""</span></span></code></pre></div>
-<p>For NHGIS data, the join code typically corresponds to the “GISJOIN”
-variable. However, for microdata projects, the variable name used for a
-geographic level in the tabular data may differ from that in the spatial
-data. Consult the documentation and metadata for these files to identify
-the correct join columns and use the <code>by</code> argument to join on
-these columns.</p>
+<p>For NHGIS data, the join code typically corresponds to the
+<code>GISJOIN</code> variable. However, for microdata projects, the
+variable name used for a geographic level in the tabular data may differ
+from that in the spatial data. Consult the documentation and metadata
+for these files to identify the correct join columns and use the
+<code>by</code> argument to join on these columns.</p>
 <p>Once joined, data include both statistical and spatial information
 along with the variable metadata.</p>
 <div class="section level3">
@@ -707,11 +699,11 @@ <h3 id="harmonized-vs--non-harmonized-data">Harmonized vs. non-harmonized data<
 that geographic boundaries shift over time. IPUMS therefore provides
 multiple types of spatial data:</p>
 <ul>
-<li><p>Harmonized (also called “integrated” or “consistent”) files have
+<li>Harmonized (also called “integrated” or “consistent”) files have
 been made consistent over time by combining geographies that share area
-for different time periods.</p></li>
-<li><p>Non-harmonized, or year-specific, files represent geographies at
-a specific point in time.</p></li>
+for different time periods.</li>
+<li>Non-harmonized, or year-specific, files represent geographies at a
+specific point in time.</li>
 </ul>
 <p>Furthermore, some NHGIS time series tables have been standardized
 such that the statistics have been adjusted to apply to a year-specific
diff --git a/docs/articles/ipums.html b/docs/articles/ipums.html
index ad89e620..ca80e422 100644
--- a/docs/articles/ipums.html
+++ b/docs/articles/ipums.html
@@ -119,9 +119,9 @@ <h6 class="dropdown-header" data-toc-skip>IPUMS API</h6>
 
     
     
-<p>This text provides an overview of how to find, request, download, and
-read IPUMS data into R. For a general introduction to IPUMS and ipumsr,
-see the <a href="https://tech.popdata.org/ipumsr/index.html" class="external-link">ipumsr home
+<p>This article provides an overview of how to find, request, download,
+and read IPUMS data into R. For a general introduction to IPUMS and
+ipumsr, see the <a href="https://tech.popdata.org/ipumsr/index.html" class="external-link">ipumsr home
 page</a>.</p>
 <div class="section level2">
 <h2 id="obtaining-ipums-data">Obtaining IPUMS data<a class="anchor" aria-label="anchor" href="#obtaining-ipums-data"></a>
@@ -143,24 +143,25 @@ <h2 id="obtaining-ipums-data">Obtaining IPUMS data<a class="anchor" aria-label="
 (.xml) files. Aggregate data projects provide metadata in either .txt or
 .csv formats.</p>
 <p>Users can submit extract requests and download extracts via either
-the IPUMS website or the IPUMS API, or via ipumsr functions that
-interface with the IPUMS API. The API currently supports access to the
-extract system only for <a href="https://developer.ipums.org/docs/v2/apiprogram/apis/" class="external-link">certain
-IPUMS projects</a>, which also determines the functionality that ipumsr
-can support.</p>
+the <strong>IPUMS website</strong> or the <strong>IPUMS API</strong>.
+ipumsr provides a set of client tools to interface with the API. Note
+that only <a href="https://developer.ipums.org/docs/v2/apiprogram/apis/" class="external-link">certain
+IPUMS projects</a> are currently supported by the IPUMS API.</p>
 <div class="section level3">
 <h3 id="obtaining-data-via-an-ipums-project-website">Obtaining data via an IPUMS project website<a class="anchor" aria-label="anchor" href="#obtaining-data-via-an-ipums-project-website"></a>
 </h3>
-<p>To create a new extract request via an IPUMS project website,
-navigate to the extract interface for the IPUMS project of interest by
-clicking <strong>Select Data</strong> in the heading of the project
-website. The project extract interface allows you to explore what’s
-available, find documentation about data concepts and sources, and then
+<p>To create a new extract request via an IPUMS project website (e.g. <a href="https://cps.ipums.org/cps/" class="external-link">IPUMS CPS</a>), navigate to the
+extract interface for that project by clicking <strong>Select
+Data</strong> in the heading of the project website.</p>
+<p><img src="cps_select_data.jpg" width="70%" style="display: block; margin: auto;"></p>
+<p>The project’s extract interface allows you to explore what’s
+available, find documentation about data concepts and sources, and
 specify the data you’d like to download. The data selection parameters
 will differ across projects; see each project’s documentation for more
-details on the available options. If you’ve never created an extract for
-the project you’re interested in, a good way to learn the basics is to
-watch a project-specific video on creating extracts hosted on the <a href="https://www.ipums.org/support/tutorials" class="external-link">IPUMS Tutorials
+details on the available options.</p>
+<p>If you’ve never created an extract for the project you’re interested
+in, a good way to learn the basics is to watch a project-specific video
+on creating extracts hosted on the <a href="https://www.ipums.org/support/tutorials" class="external-link">IPUMS Tutorials
 page</a>.</p>
 <div class="section level4">
 <h4 id="downloading-from-microdata-projects">Downloading from microdata projects<a class="anchor" aria-label="anchor" href="#downloading-from-microdata-projects"></a>
@@ -169,18 +170,18 @@ <h4 id="downloading-from-microdata-projects">Downloading from microdata projects
 button to download the data file. Then, right-click the
 <strong>DDI</strong> link in the Codebook column, and select
 <strong>Save Link As…</strong> (see below).</p>
+<p><img src="microdata_annotated_screenshot.png"></p>
 <p>Note that some browsers may display different text, but there should
-be an option to download the DDI file as .xml. For instance, on Safari,
-select <strong>Download Linked File As…</strong>. For ipumsr to read the
-metadata, it is necessary to <strong>save the file in .xml format,
+be an option to download the DDI file as .xml. (For instance, on Safari,
+select <strong>Download Linked File As…</strong>.) For ipumsr to read
+the metadata, you must <strong>save the file in .xml format,
 <em>not</em> .html format</strong>.</p>
-<p><img src="microdata_annotated_screenshot.png" width="1000"></p>
 </div>
 <div class="section level4">
 <h4 id="downloading-from-aggregate-data-projects">Downloading from aggregate data projects<a class="anchor" aria-label="anchor" href="#downloading-from-aggregate-data-projects"></a>
 </h4>
 <p>Aggregate data projects include data and metadata together in a
-single .zip archive file. To download them, simply click on the green
+single .zip archive. To download them, simply click on the green
 <strong>Tables</strong> button (for tabular data) and/or <strong>GIS
 Files</strong> button (for spatial boundary or location data) in the
 <strong>Download Data</strong> column.</p>
@@ -190,27 +191,53 @@ <h4 id="downloading-from-aggregate-data-projects">Downloading from aggregate dat
 <h3 id="obtaining-data-via-the-ipums-api">Obtaining data via the IPUMS API<a class="anchor" aria-label="anchor" href="#obtaining-data-via-the-ipums-api"></a>
 </h3>
 <p>Users can also create and submit extract requests within R by using
-ipumsr functions that interface with the IPUMS API. The IPUMS API
-currently supports access to the extract system for the following
+ipumsr functions that interface with the <a href="https://developer.ipums.org/" class="external-link">IPUMS API</a>. The IPUMS API
+currently supports access to the extract system for <a href="https://developer.ipums.org/docs/v2/apiprogram/apis/" class="external-link">certain
+IPUMS collections</a>.</p>
+<div class="section level4">
+<h4 id="extract-support">Extract support<a class="anchor" aria-label="anchor" href="#extract-support"></a>
+</h4>
+<p>ipumsr provides an interface to the IPUMS extract system via the
+IPUMS API for the following collections:</p>
+<ul>
+<li>IPUMS USA</li>
+<li>IPUMS CPS</li>
+<li>IPUMS International</li>
+<li>IPUMS NHGIS</li>
+</ul>
+</div>
+<div class="section level4">
+<h4 id="metadata-support">Metadata support<a class="anchor" aria-label="anchor" href="#metadata-support"></a>
+</h4>
+<p>ipumsr provides access to comprehensive metadata via the IPUMS API
+for the following collections:</p>
+<ul>
+<li>IPUMS NHGIS</li>
+</ul>
+<p>Users can query NHGIS metadata to explore available data when
+specifying NHGIS extract requests.</p>
+<p>A listing of available samples is provided for the following
 collections:</p>
 <ul>
-<li><p>IPUMS USA</p></li>
-<li><p>IPUMS CPS</p></li>
-<li><p>IPUMS International</p></li>
-<li><p>IPUMS NHGIS</p></li>
+<li>IPUMS USA</li>
+<li>IPUMS CPS</li>
+<li>IPUMS International</li>
 </ul>
-<p>The IPUMS API and ipumsr also support access to IPUMS NHGIS metadata,
-so users can query NHGIS metadata in R to explore what data are
-available and specify NHGIS data requests. At this time, creating
-requests for microdata generally requires using the corresponding
-project websites to find samples and variables of interest and obtain
-their identifiers for use in R extract definitions.</p>
+<p>Increased access to metadata for these projects is in progress.
+Currently, creating extract requests for these projects requires using
+the corresponding project websites to find samples and variables of
+interest and obtain their API identifiers for use in R extract
+definitions.</p>
+</div>
+<div class="section level4">
+<h4 id="workflow">Workflow<a class="anchor" aria-label="anchor" href="#workflow"></a>
+</h4>
 <p>Once you have identified the data you would like to request, the
-workflow for requesting and downloading data via API is straightforward.
-First, define the parameters of your extract. The available extract
+workflow for requesting and downloading data via API is
+straightforward.</p>
+<p>First, define the parameters of your extract. The available extract
 definition options will differ by IPUMS data collection. See the <a href="ipums-api-micro.html">microdata API request</a> and <a href="ipums-api-nhgis.html">NHGIS API request</a> vignettes for more
-details on defining an extract. (The NHGIS vignette also discusses how
-to access NHGIS metadata.)</p>
+details on defining an extract.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cps_extract_request</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/define_extract-micro.html">define_extract_cps</a></span><span class="op">(</span></span>
 <span>  description <span class="op">=</span> <span class="st">"2018-2019 CPS Data"</span>,</span>
@@ -230,16 +257,17 @@ <h3 id="obtaining-data-via-the-ipums-api">Obtaining data via the IPUMS API<a cla
 complete, you can download the files directly to your local machine
 without ever having to leave R:</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span><span class="va">submitted_extract</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/submit_extract.html">submit_extract</a></span><span class="op">(</span><span class="va">extract_request</span><span class="op">)</span></span>
+<code class="sourceCode R"><span><span class="va">submitted_extract</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/submit_extract.html">submit_extract</a></span><span class="op">(</span><span class="va">cps_extract_request</span><span class="op">)</span></span>
 <span><span class="va">downloadable_extract</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/wait_for_extract.html">wait_for_extract</a></span><span class="op">(</span><span class="va">submitted_extract</span><span class="op">)</span></span>
 <span><span class="va">data_files</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/download_extract.html">download_extract</a></span><span class="op">(</span><span class="va">downloadable_extract</span><span class="op">)</span></span></code></pre></div>
 <p>You can also get the specifications of your previous extract
 requests, even if they weren’t made with the API:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">past_extracts</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/get_extract_history.html">get_extract_history</a></span><span class="op">(</span><span class="st">"nhgis"</span><span class="op">)</span></span></code></pre></div>
-<p>See the <a href="ipums-api.html">introduction to the IPUMS API for R
-users</a> for more details about how to use ipumsr to interact with the
-IPUMS API.</p>
+<p>See the <a href="ipums-api.html">introduction to the IPUMS API</a>
+for more details about how to use ipumsr to interact with the IPUMS
+API.</p>
+</div>
 </div>
 </div>
 <div class="section level2">
@@ -247,18 +275,23 @@ <h2 id="reading-ipums-data">Reading IPUMS data<a class="anchor" aria-label="anch
 </h2>
 <p>Once you have downloaded an extract, you can load the data into R
 with the family of <code>read_*()</code> functions in ipumsr. These
-functions expand on those provided in <a href="https://readr.tidyverse.org/index.html" class="external-link">readr</a> in two ways:</p>
+functions expand on those provided in <a href="https://readr.tidyverse.org" class="external-link">readr</a> in two
+ways:</p>
 <ul>
-<li><p>ipumsr anticipates standard IPUMS file structures, limiting the
-need for users to manually extract and organize their downloaded files
-before reading.</p></li>
-<li><p>ipumsr uses an extract’s metadata files to automatically attach
+<li>ipumsr anticipates standard IPUMS file structures, limiting the need
+for users to manually extract and organize their downloaded files before
+reading.</li>
+<li>ipumsr uses an extract’s metadata files to automatically attach
 contextual information to the data. This allows users to easily identify
-variable names, variable descriptions, and labeled data values (from <a href="https://haven.tidyverse.org/" class="external-link">haven</a>), which are common in
-IPUMS files.</p></li>
+variable names, variable descriptions, and labeled data values (from
+<a href="https://haven.tidyverse.org" class="external-link">haven</a>), which are common in IPUMS files.</li>
 </ul>
-<p>For microdata files, use the <code>read_ipums_micro_*()</code>
-family:</p>
+<p>File loading is covered in depth in the <a href="ipums-read.html">reading IPUMS data</a> vignette.</p>
+<div class="section level4">
+<h4 id="microdata-files">Microdata files<a class="anchor" aria-label="anchor" href="#microdata-files"></a>
+</h4>
+<p>For microdata files, use the <code>read_ipums_micro_*()</code> family
+with the DDI (.xml) metadata file for your extract:</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">cps_file</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ipums_example.html">ipums_example</a></span><span class="op">(</span><span class="st">"cps_00157.xml"</span><span class="op">)</span></span>
 <span><span class="va">cps_data</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_ipums_micro.html">read_ipums_micro</a></span><span class="op">(</span><span class="va">cps_file</span><span class="op">)</span></span>
@@ -274,20 +307,14 @@ <h2 id="reading-ipums-data">Reading IPUMS data<a class="anchor" aria-label="anch
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">4</span>  <span style="text-decoration: underline;">1</span>962     82 3<span style="color: #949494;"> [March]</span>   <span style="text-decoration: underline;">1</span>598. 27<span style="color: #949494;"> [Minnesota]</span>      1  <span style="text-decoration: underline;">1</span>598.     <span style="text-decoration: underline;">14</span>015         </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">5</span>  <span style="text-decoration: underline;">1</span>962     83 3<span style="color: #949494;"> [March]</span>   <span style="text-decoration: underline;">1</span>707. 27<span style="color: #949494;"> [Minnesota]</span>      1  <span style="text-decoration: underline;">1</span>707.     <span style="text-decoration: underline;">16</span>552         </span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">6</span>  <span style="text-decoration: underline;">1</span>962     84 3<span style="color: #949494;"> [March]</span>   <span style="text-decoration: underline;">1</span>790. 27<span style="color: #949494;"> [Minnesota]</span>      1  <span style="text-decoration: underline;">1</span>790.      <span style="text-decoration: underline;">6</span>375</span></span></code></pre></div>
+</div>
+<div class="section level4">
+<h4 id="nhgis-files">NHGIS files<a class="anchor" aria-label="anchor" href="#nhgis-files"></a>
+</h4>
 <p>For NHGIS files, use <code><a href="../reference/read_nhgis.html">read_nhgis()</a></code>:</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="va">nhgis_file</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ipums_example.html">ipums_example</a></span><span class="op">(</span><span class="st">"nhgis0972_csv.zip"</span><span class="op">)</span></span>
-<span><span class="va">nhgis_data</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis.html">read_nhgis</a></span><span class="op">(</span><span class="va">nhgis_file</span><span class="op">)</span></span>
-<span><span class="co">#&gt; Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.</span></span>
-<span><span class="co">#&gt; <span style="font-weight: bold;">Rows: </span><span style="color: #0000BB;">71</span> <span style="font-weight: bold;">Columns: </span><span style="color: #0000BB;">25</span></span></span>
-<span><span class="co">#&gt; <span style="color: #00BBBB;">──</span> <span style="font-weight: bold;">Column specification</span> <span style="color: #00BBBB;">────────────────────────────────────────────────────────</span></span></span>
-<span><span class="co">#&gt; <span style="font-weight: bold;">Delimiter:</span> ","</span></span>
-<span><span class="co">#&gt; <span style="color: #BB0000;">chr</span>  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F...</span></span>
-<span><span class="co">#&gt; <span style="color: #00BB00;">dbl</span> (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003...</span></span>
-<span><span class="co">#&gt; <span style="color: #BBBB00;">lgl</span>  (3): DIVISIONA, REGIONA, STATEA</span></span>
-<span><span class="co">#&gt; </span></span>
-<span><span class="co">#&gt; <span style="color: #00BBBB;">ℹ</span> Use `spec()` to retrieve the full column specification for this data.</span></span>
-<span><span class="co">#&gt; <span style="color: #00BBBB;">ℹ</span> Specify the column types or set `show_col_types = FALSE` to quiet this message.</span></span>
+<span><span class="va">nhgis_data</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_nhgis.html">read_nhgis</a></span><span class="op">(</span><span class="va">nhgis_file</span>, verbose <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
 <span></span>
 <span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">nhgis_data</span><span class="op">)</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 6 × 25</span></span></span>
@@ -303,6 +330,10 @@ <h2 id="reading-ipums-data">Reading IPUMS data<a class="anchor" aria-label="anch
 <span><span class="co">#&gt; <span style="color: #949494;">#   FUNCSTAT &lt;chr&gt;, INTPTLAT &lt;dbl&gt;, INTPTLNG &lt;dbl&gt;, PSADC &lt;dbl&gt;, D6Z001 &lt;dbl&gt;,</span></span></span>
 <span><span class="co">#&gt; <span style="color: #949494;">#   D6Z002 &lt;dbl&gt;, D6Z003 &lt;dbl&gt;, D6Z004 &lt;dbl&gt;, D6Z005 &lt;dbl&gt;, D6Z006 &lt;dbl&gt;,</span></span></span>
 <span><span class="co">#&gt; <span style="color: #949494;">#   D6Z007 &lt;dbl&gt;, D6Z008 &lt;dbl&gt;</span></span></span></code></pre></div>
+</div>
+<div class="section level4">
+<h4 id="spatial-boundary-files">Spatial boundary files<a class="anchor" aria-label="anchor" href="#spatial-boundary-files"></a>
+</h4>
 <p>ipumsr also supports the reading of IPUMS shapefiles (spatial
 boundary and location files) into the <code>sf</code> format provided by
 the <a href="https://r-spatial.github.io/sf/" class="external-link">sf</a> package:</p>
@@ -326,15 +357,17 @@ <h2 id="reading-ipums-data">Reading IPUMS data<a class="anchor" aria-label="anch
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">5</span> 0080  1692    28      G0080   0080      <span style="text-decoration: underline;">2</span>401<span style="text-decoration: underline;">347</span>006.   <span style="text-decoration: underline;">218</span>892. G16920080</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">6</span> 1640  1642    21      G1640   1640      <span style="text-decoration: underline;">5</span>608<span style="text-decoration: underline;">404</span>797.   <span style="text-decoration: underline;">415</span>671. G16421640</span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 1 more variable: geometry &lt;MULTIPOLYGON [m]&gt;</span></span></span></code></pre></div>
+</div>
+<div class="section level4">
+<h4 id="ancillary-files">Ancillary files<a class="anchor" aria-label="anchor" href="#ancillary-files"></a>
+</h4>
 <p>ipumsr is primarily designed to read data produced by the IPUMS
 extract system. However, IPUMS does distribute other files, often
 available via direct download. In many cases, these can be loaded with
 ipumsr. Otherwise, these files can likely be handled by existing data
-reading packages like <a href="https://readr.tidyverse.org/" class="external-link">readr</a>
-(for delimited files) or <a href="https://haven.tidyverse.org/" class="external-link">haven</a> (for Stata, SPSS, or SAS
-files).</p>
-<p>See the vignette on <a href="ipums-read.html">reading IPUMS data</a>
-for more information.</p>
+reading packages like <a href="https://readr.tidyverse.org" class="external-link">readr</a> (for delimited files) or
+<a href="https://haven.tidyverse.org" class="external-link">haven</a> (for Stata, SPSS, or SAS files).</p>
+</div>
 <div class="section level3">
 <h3 id="exploring-file-metadata">Exploring file metadata<a class="anchor" aria-label="anchor" href="#exploring-file-metadata"></a>
 </h3>
@@ -381,6 +414,9 @@ <h3 id="exploring-file-metadata">Exploring file metadata<a class="anchor" aria-l
 <span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span>    11 District of Columbia</span></span>
 <span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span>    12 Florida             </span></span>
 <span><span class="co">#&gt; <span style="color: #949494;"># ℹ 65 more rows</span></span></span></code></pre></div>
+<div class="section level4">
+<h4 id="labelled-values">Labelled values<a class="anchor" aria-label="anchor" href="#labelled-values"></a>
+</h4>
 <p>ipumsr also provides a family of <code>lbl_*()</code> functions to
 assist in accessing and manipulating the value-level metadata included
 in IPUMS data. This allows for value labels to be incorporated into the
@@ -416,6 +452,7 @@ <h3 id="exploring-file-metadata">Exploring file metadata<a class="anchor" aria-l
 <p>See the <a href="value-labels.html">value labels</a> vignette for
 more details.</p>
 </div>
+</div>
 </div>
   </main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
     </nav></aside>
diff --git a/docs/articles/value-labels.html b/docs/articles/value-labels.html
index 39e16d51..ad18fa4c 100644
--- a/docs/articles/value-labels.html
+++ b/docs/articles/value-labels.html
@@ -133,47 +133,44 @@ <h2 id="ipums-variable-metadata">IPUMS variable metadata<a class="anchor" aria-l
 information about what a given variable measures.</p></li>
 <li><p><strong><em>Value labels</em></strong> link particular data
 values to more meaningful text labels. For instance, the
-<code>HEALTH</code> variable has data values including 1 and 2, but
-these are actually stand-ins for “Excellent” and “Very good” health.
-This mapping would be contained in a value-label pair that includes a
-value and its associated label.</p></li>
+<code>HEALTH</code> variable may have data values including
+<code>1</code> and <code>2</code>, but these are actually stand-ins for
+“Excellent” and “Very good” health. This mapping would be contained in a
+value-label pair that includes a value and its associated
+label.</p></li>
 </ul>
 <p>The rest of this article will focus on value labels; for more about
-variable labels and descriptions, see
-<code><a href="../articles/ipums.html">vignette("ipums")</a></code>.</p>
+variable labels and descriptions, see <code><a href="../reference/ipums_var_info.html">?ipums_var_info</a></code>.</p>
 </div>
 <div class="section level2">
 <h2 id="value-labels">Value labels<a class="anchor" aria-label="anchor" href="#value-labels"></a>
 </h2>
-<p>ipumsr uses the <code>labelled</code> <a href="https://haven.tidyverse.org/reference/labelled.html" class="external-link">class</a>
-from the <a href="https://haven.tidyverse.org/index.html" class="external-link">haven</a>
-package to handle value labels.</p>
+<p>ipumsr uses the <a href="https://haven.tidyverse.org/reference/labelled.html" class="external-link"><code>labelled</code></a>
+class from the <a href="https://haven.tidyverse.org" class="external-link">haven</a> package to handle value labels.</p>
 <p>You can see this in the column data types when loading IPUMS data.
-Note that <code>&lt;int+lbl&gt;</code> appears below
-<code>STATEFIP</code>, <code>ASECFLAG</code>, and other variables:</p>
+Note that <code>&lt;int+lbl&gt;</code> appears below <code>MONTH</code>
+and <code>ASECFLAG</code>:</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://tech.popdata.org/ipumsr/" class="external-link">ipumsr</a></span><span class="op">)</span></span>
 <span></span>
 <span><span class="va">ddi</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_ipums_ddi.html">read_ipums_ddi</a></span><span class="op">(</span><span class="fu"><a href="../reference/ipums_example.html">ipums_example</a></span><span class="op">(</span><span class="st">"cps_00160.xml"</span><span class="op">)</span><span class="op">)</span></span>
 <span><span class="va">cps</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/read_ipums_micro.html">read_ipums_micro</a></span><span class="op">(</span><span class="va">ddi</span>, verbose <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
 <span></span>
-<span><span class="va">cps</span></span>
-<span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 10,883 × 15</span></span></span>
-<span><span class="co">#&gt;     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH STATEFIP PERNUM  CPSIDP ASECWT</span></span>
-<span><span class="co">#&gt;    <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span>  <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int+lb&gt;</span>   <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int+lb&gt;</span>   <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int+lb&gt;</span>  <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span>   <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span>  <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span></span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 1</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>138 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">3</span>249. 55<span style="color: #949494;"> [Wis…</span>      1 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">3</span>249.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 2</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>139 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">3</span>154. 55<span style="color: #949494;"> [Wis…</span>      1 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">3</span>154.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 3</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>139 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">3</span>154. 55<span style="color: #949494;"> [Wis…</span>      2 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">3</span>154.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 4</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>652. 55<span style="color: #949494;"> [Wis…</span>      1 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>652.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 5</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>652. 55<span style="color: #949494;"> [Wis…</span>      2 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>503.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 6</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>652. 55<span style="color: #949494;"> [Wis…</span>      3 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>652.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 7</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>141 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">3</span>049. 55<span style="color: #949494;"> [Wis…</span>      1 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">3</span>049.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 8</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>637. 55<span style="color: #949494;"> [Wis…</span>      1 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>637.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>637. 55<span style="color: #949494;"> [Wis…</span>      2 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>637.</span></span>
-<span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [Marc…</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span>   <span style="text-decoration: underline;">1</span>637. 55<span style="color: #949494;"> [Wis…</span>      3 2.02<span style="color: #949494;">e</span>13  <span style="text-decoration: underline;">1</span>887.</span></span>
-<span><span class="co">#&gt; <span style="color: #949494;"># ℹ 10,873 more rows</span></span></span>
-<span><span class="co">#&gt; <span style="color: #949494;"># ℹ 5 more variables: AGE &lt;int+lbl&gt;, EDUC &lt;int+lbl&gt;, INCTOT &lt;dbl+lbl&gt;,</span></span></span>
-<span><span class="co">#&gt; <span style="color: #949494;">#   MIGRATE1 &lt;int+lbl&gt;, HEALTH &lt;int+lbl&gt;</span></span></span></code></pre></div>
+<span><span class="va">cps</span><span class="op">[</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span>
+<span><span class="co">#&gt; <span style="color: #949494;"># A tibble: 10,883 × 5</span></span></span>
+<span><span class="co">#&gt;     YEAR SERIAL MONTH       CPSID ASECFLAG </span></span>
+<span><span class="co">#&gt;    <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span>  <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int+lbl&gt;</span>   <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;int+lbl&gt;</span></span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 1</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>138 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 2</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>139 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 3</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>139 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 4</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 5</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 6</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>140 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 7</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>141 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 8</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;"> 9</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #BCBCBC;">10</span>  <span style="text-decoration: underline;">2</span>016  <span style="text-decoration: underline;">24</span>142 3<span style="color: #949494;"> [March]</span> 2.02<span style="color: #949494;">e</span>13 1<span style="color: #949494;"> [ASEC]</span> </span></span>
+<span><span class="co">#&gt; <span style="color: #949494;"># ℹ 10,873 more rows</span></span></span></code></pre></div>
 <p>This indicates that the data contained in these columns are integers
 but include value labels. You can use the function
 <code><a href="https://haven.tidyverse.org/reference/labelled.html" class="external-link">is.labelled()</a></code> to determine if a variable is indeed
@@ -289,14 +286,16 @@ <h3 id="cautions-regarding-labelled-variables">Cautions regarding <code>labelled
 <p>While <code>labelled</code> variables provide the benefits described
 above, they also present challenges.</p>
 <p>For example, you may have noticed that <em>both</em> of the means
-calculated above are suspect.</p>
-<p>In the case of <code>AGE_FACTOR</code>, the values have been remapped
-during conversion and several are inconsistent with the original
-data.</p>
-<p>In the case of <code>AGE</code>, we have considered all people over
+calculated above are suspect:</p>
+<ul>
+<li>In the case of <code>AGE_FACTOR</code>, the values have been
+remapped during conversion and several are inconsistent with the
+original data.</li>
+<li>In the case of <code>AGE</code>, we have considered all people over
 90 to be exactly 90, and all people over 99 to be exactly
 99—<code>labelled</code> variables don’t ensure that calculations are
-correct any more than factors do!</p>
+correct any more than factors do!</li>
+</ul>
 <p>Furthermore, many R functions ignore value labels or even actively
 remove them from the data:</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
@@ -425,10 +424,10 @@ <h4 id="syntax">Syntax for value label functions<a class="anchor" aria-label="an
 recoding of value-label pairs. ipumsr provides a syntax to easily
 reference the values and labels in this user-defined function:</p>
 <ul>
-<li><p>The <code>.val</code> argument references the
-<em>values</em></p></li>
-<li><p>The <code>.lbl</code> argument references the
-<em>labels</em></p></li>
+<li>The <code>.val</code> argument references the <em>values</em>
+</li>
+<li>The <code>.lbl</code> argument references the <em>labels</em>
+</li>
 </ul>
 <p>For instance, to convert all values equal to <code>999999999</code>
 to <code>NA</code>, we can provide a function that uses the
@@ -484,11 +483,11 @@ <h3 id="relabel-values">Relabel values<a class="anchor" aria-label="anchor" href
 pairs, often to recombine existing labels into more general categories.
 It takes a two-sided formula to handle the relabeling:</p>
 <ul>
-<li><p>On the left-hand side, use the <code><a href="../reference/lbl.html">lbl()</a></code> helper to
-define a new value-label pair.</p></li>
-<li><p>On the right-hand side, provide a function that returns
+<li>On the left-hand side, use the <code><a href="../reference/lbl.html">lbl()</a></code> helper to define a
+new value-label pair.</li>
+<li>On the right-hand side, provide a function that returns
 <code>TRUE</code> for those value-label pairs that should be relabelled
-with the new value-label pair from the left-hand side.</p></li>
+with the new value-label pair from the left-hand side.</li>
 </ul>
 <p>The function again uses the <code>.val</code> and <code>.lbl</code>
 <a href="#syntax">syntax</a> mentioned above to refer to values and
@@ -728,18 +727,17 @@ <h3 id="lbl_add">Add new labels<a class="anchor" aria-label="anchor" href="#lbl_
 <div class="section level2">
 <h2 id="other-resources">Other resources<a class="anchor" aria-label="anchor" href="#other-resources"></a>
 </h2>
-<p>The <a href="https://haven.tidyverse.org/index.html" class="external-link">haven</a>
-package, which underlies ipumsr’s handling of value labels, provides
-more details on the <code>labelled</code> class. See
-<code><a href="https://haven.tidyverse.org/articles/semantics.html" class="external-link">vignette("semantics", package = "haven")</a></code>.</p>
-<p>The <a href="http://larmarange.github.io/labelled/articles/intro_labelled.html" class="external-link">labelled</a>
-package provides other methods for manipulating value labels, some of
-which overlap those provided by ipumsr.</p>
-<p>The <a href="https://juba.github.io/questionr/" class="external-link">questionr</a> package
-includes functions for exploring <code>labelled</code> variables. In
-particular, the functions <code>describe</code>, <code>freq</code> and
-<code>lookfor</code> all print out to console information about the
-variable using the value labels.</p>
+<p>The <a href="https://haven.tidyverse.org" class="external-link">haven</a> package, which underlies ipumsr’s handling
+of value labels, provides more details on the <code>labelled</code>
+class. See <code><a href="https://haven.tidyverse.org/articles/semantics.html" class="external-link">vignette("semantics", package = "haven")</a></code>.</p>
+<p>The <a href="https://larmarange.github.io/labelled/" class="external-link">labelled</a> package provides other methods for
+manipulating value labels, some of which overlap those provided by
+ipumsr.</p>
+<p>The <a href="https://juba.github.io/questionr/" class="external-link">questionr</a> package includes functions for exploring
+<code>labelled</code> variables. In particular, the functions
+<code>describe</code>, <code>freq</code> and <code>lookfor</code> all
+print out to console information about the variable using the value
+labels.</p>
 <p>Finally, the <a href="https://cran.r-project.org/package=foreign" class="external-link">foreign</a> and <a href="https://cran.r-project.org/package=prettyR" class="external-link">prettyR</a> packages
 don’t use the <code>labelled</code> class, but provide similar
 functionality for handling value labels, which could be adapted for use
diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css
deleted file mode 100644
index 5a859415..00000000
--- a/docs/bootstrap-toc.css
+++ /dev/null
@@ -1,60 +0,0 @@
-/*!
- * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
- * Copyright 2015 Aidan Feldman
- * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
-
-/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
-
-/* All levels of nav */
-nav[data-toggle='toc'] .nav > li > a {
-  display: block;
-  padding: 4px 20px;
-  font-size: 13px;
-  font-weight: 500;
-  color: #767676;
-}
-nav[data-toggle='toc'] .nav > li > a:hover,
-nav[data-toggle='toc'] .nav > li > a:focus {
-  padding-left: 19px;
-  color: #563d7c;
-  text-decoration: none;
-  background-color: transparent;
-  border-left: 1px solid #563d7c;
-}
-nav[data-toggle='toc'] .nav > .active > a,
-nav[data-toggle='toc'] .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav > .active:focus > a {
-  padding-left: 18px;
-  font-weight: bold;
-  color: #563d7c;
-  background-color: transparent;
-  border-left: 2px solid #563d7c;
-}
-
-/* Nav: second level (shown on .active) */
-nav[data-toggle='toc'] .nav .nav {
-  display: none; /* Hide by default, but at >768px, show it */
-  padding-bottom: 10px;
-}
-nav[data-toggle='toc'] .nav .nav > li > a {
-  padding-top: 1px;
-  padding-bottom: 1px;
-  padding-left: 30px;
-  font-size: 12px;
-  font-weight: normal;
-}
-nav[data-toggle='toc'] .nav .nav > li > a:hover,
-nav[data-toggle='toc'] .nav .nav > li > a:focus {
-  padding-left: 29px;
-}
-nav[data-toggle='toc'] .nav .nav > .active > a,
-nav[data-toggle='toc'] .nav .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav .nav > .active:focus > a {
-  padding-left: 28px;
-  font-weight: 500;
-}
-
-/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
-nav[data-toggle='toc'] .nav > .active > ul {
-  display: block;
-}
diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js
deleted file mode 100644
index 1cdd573b..00000000
--- a/docs/bootstrap-toc.js
+++ /dev/null
@@ -1,159 +0,0 @@
-/*!
- * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
- * Copyright 2015 Aidan Feldman
- * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
-(function() {
-  'use strict';
-
-  window.Toc = {
-    helpers: {
-      // return all matching elements in the set, or their descendants
-      findOrFilter: function($el, selector) {
-        // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
-        // http://stackoverflow.com/a/12731439/358804
-        var $descendants = $el.find(selector);
-        return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
-      },
-
-      generateUniqueIdBase: function(el) {
-        var text = $(el).text();
-        var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
-        return anchor || el.tagName.toLowerCase();
-      },
-
-      generateUniqueId: function(el) {
-        var anchorBase = this.generateUniqueIdBase(el);
-        for (var i = 0; ; i++) {
-          var anchor = anchorBase;
-          if (i > 0) {
-            // add suffix
-            anchor += '-' + i;
-          }
-          // check if ID already exists
-          if (!document.getElementById(anchor)) {
-            return anchor;
-          }
-        }
-      },
-
-      generateAnchor: function(el) {
-        if (el.id) {
-          return el.id;
-        } else {
-          var anchor = this.generateUniqueId(el);
-          el.id = anchor;
-          return anchor;
-        }
-      },
-
-      createNavList: function() {
-        return $('<ul class="nav"></ul>');
-      },
-
-      createChildNavList: function($parent) {
-        var $childList = this.createNavList();
-        $parent.append($childList);
-        return $childList;
-      },
-
-      generateNavEl: function(anchor, text) {
-        var $a = $('<a></a>');
-        $a.attr('href', '#' + anchor);
-        $a.text(text);
-        var $li = $('<li></li>');
-        $li.append($a);
-        return $li;
-      },
-
-      generateNavItem: function(headingEl) {
-        var anchor = this.generateAnchor(headingEl);
-        var $heading = $(headingEl);
-        var text = $heading.data('toc-text') || $heading.text();
-        return this.generateNavEl(anchor, text);
-      },
-
-      // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
-      getTopLevel: function($scope) {
-        for (var i = 1; i <= 6; i++) {
-          var $headings = this.findOrFilter($scope, 'h' + i);
-          if ($headings.length > 1) {
-            return i;
-          }
-        }
-
-        return 1;
-      },
-
-      // returns the elements for the top level, and the next below it
-      getHeadings: function($scope, topLevel) {
-        var topSelector = 'h' + topLevel;
-
-        var secondaryLevel = topLevel + 1;
-        var secondarySelector = 'h' + secondaryLevel;
-
-        return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
-      },
-
-      getNavLevel: function(el) {
-        return parseInt(el.tagName.charAt(1), 10);
-      },
-
-      populateNav: function($topContext, topLevel, $headings) {
-        var $context = $topContext;
-        var $prevNav;
-
-        var helpers = this;
-        $headings.each(function(i, el) {
-          var $newNav = helpers.generateNavItem(el);
-          var navLevel = helpers.getNavLevel(el);
-
-          // determine the proper $context
-          if (navLevel === topLevel) {
-            // use top level
-            $context = $topContext;
-          } else if ($prevNav && $context === $topContext) {
-            // create a new level of the tree and switch to it
-            $context = helpers.createChildNavList($prevNav);
-          } // else use the current $context
-
-          $context.append($newNav);
-
-          $prevNav = $newNav;
-        });
-      },
-
-      parseOps: function(arg) {
-        var opts;
-        if (arg.jquery) {
-          opts = {
-            $nav: arg
-          };
-        } else {
-          opts = arg;
-        }
-        opts.$scope = opts.$scope || $(document.body);
-        return opts;
-      }
-    },
-
-    // accepts a jQuery object, or an options object
-    init: function(opts) {
-      opts = this.helpers.parseOps(opts);
-
-      // ensure that the data attribute is in place for styling
-      opts.$nav.attr('data-toggle', 'toc');
-
-      var $topContext = this.helpers.createChildNavList(opts.$nav);
-      var topLevel = this.helpers.getTopLevel(opts.$scope);
-      var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
-      this.helpers.populateNav($topContext, topLevel, $headings);
-    }
-  };
-
-  $(function() {
-    $('nav[data-toggle="toc"]').each(function(i, el) {
-      var $nav = $(el);
-      Toc.init($nav);
-    });
-  });
-})();
diff --git a/docs/docsearch.css b/docs/docsearch.css
deleted file mode 100644
index e5f1fe1d..00000000
--- a/docs/docsearch.css
+++ /dev/null
@@ -1,148 +0,0 @@
-/* Docsearch -------------------------------------------------------------- */
-/*
-  Source: https://github.com/algolia/docsearch/
-  License: MIT
-*/
-
-.algolia-autocomplete {
-  display: block;
-  -webkit-box-flex: 1;
-  -ms-flex: 1;
-  flex: 1
-}
-
-.algolia-autocomplete .ds-dropdown-menu {
-  width: 100%;
-  min-width: none;
-  max-width: none;
-  padding: .75rem 0;
-  background-color: #fff;
-  background-clip: padding-box;
-  border: 1px solid rgba(0, 0, 0, .1);
-  box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175);
-}
-
-@media (min-width:768px) {
-  .algolia-autocomplete .ds-dropdown-menu {
-      width: 175%
-  }
-}
-
-.algolia-autocomplete .ds-dropdown-menu::before {
-  display: none
-}
-
-.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] {
-  padding: 0;
-  background-color: rgb(255,255,255);
-  border: 0;
-  max-height: 80vh;
-}
-
-.algolia-autocomplete .ds-dropdown-menu .ds-suggestions {
-  margin-top: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion {
-  padding: 0;
-  overflow: visible
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--category-header {
-  padding: .125rem 1rem;
-  margin-top: 0;
-  font-size: 1.3em;
-  font-weight: 500;
-  color: #00008B;
-  border-bottom: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--wrapper {
-    float: none;
-    padding-top: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
-  float: none;
-  width: auto;
-  padding: 0;
-  text-align: left
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--content {
-  float: none;
-  width: auto;
-  padding: 0
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--content::before {
-  display: none
-}
-
-.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header {
-  padding-top: .75rem;
-  margin-top: .75rem;
-  border-top: 1px solid rgba(0, 0, 0, .1)
-}
-
-.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column {
-  display: block;
-  padding: .1rem 1rem;
-  margin-bottom: 0.1;
-  font-size: 1.0em;
-  font-weight: 400
-  /* display: none */
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--title {
-  display: block;
-  padding: .25rem 1rem;
-  margin-bottom: 0;
-  font-size: 0.9em;
-  font-weight: 400
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--text {
-  padding: 0 1rem .5rem;
-  margin-top: -.25rem;
-  font-size: 0.8em;
-  font-weight: 400;
-  line-height: 1.25
-}
-
-.algolia-autocomplete .algolia-docsearch-footer {
-  width: 110px;
-  height: 20px;
-  z-index: 3;
-  margin-top: 10.66667px;
-  float: right;
-  font-size: 0;
-  line-height: 0;
-}
-
-.algolia-autocomplete .algolia-docsearch-footer--logo {
-  background-image: url("data:image/svg+xml;utf8,<svg viewBox='0 0 130 18' xmlns='http://www.w3.org/2000/svg'><defs><linearGradient x1='-36.868%' y1='134.936%' x2='129.432%' y2='-27.7%' id='a'><stop stop-color='%2300AEFF' offset='0%'/><stop stop-color='%233369E7' offset='100%'/></linearGradient></defs><g fill='none' fill-rule='evenodd'><path d='M59.399.022h13.299a2.372 2.372 0 0 1 2.377 2.364V15.62a2.372 2.372 0 0 1-2.377 2.364H59.399a2.372 2.372 0 0 1-2.377-2.364V2.381A2.368 2.368 0 0 1 59.399.022z' fill='url(%23a)'/><path d='M66.257 4.56c-2.815 0-5.1 2.272-5.1 5.078 0 2.806 2.284 5.072 5.1 5.072 2.815 0 5.1-2.272 5.1-5.078 0-2.806-2.279-5.072-5.1-5.072zm0 8.652c-1.983 0-3.593-1.602-3.593-3.574 0-1.972 1.61-3.574 3.593-3.574 1.983 0 3.593 1.602 3.593 3.574a3.582 3.582 0 0 1-3.593 3.574zm0-6.418v2.664c0 .076.082.131.153.093l2.377-1.226c.055-.027.071-.093.044-.147a2.96 2.96 0 0 0-2.465-1.487c-.055 0-.11.044-.11.104l.001-.001zm-3.33-1.956l-.312-.311a.783.783 0 0 0-1.106 0l-.372.37a.773.773 0 0 0 0 1.101l.307.305c.049.049.121.038.164-.011.181-.245.378-.479.597-.697.225-.223.455-.42.707-.599.055-.033.06-.109.016-.158h-.001zm5.001-.806v-.616a.781.781 0 0 0-.783-.779h-1.824a.78.78 0 0 0-.783.779v.632c0 .071.066.12.137.104a5.736 5.736 0 0 1 1.588-.223c.52 0 1.035.071 1.534.207a.106.106 0 0 0 .131-.104z' fill='%23FFF'/><path d='M102.162 13.762c0 1.455-.372 2.517-1.123 3.193-.75.676-1.895 1.013-3.44 1.013-.564 0-1.736-.109-2.673-.316l.345-1.689c.783.163 1.819.207 2.361.207.86 0 1.473-.174 1.84-.523.367-.349.548-.866.548-1.553v-.349a6.374 6.374 0 0 1-.838.316 4.151 4.151 0 0 1-1.194.158 4.515 4.515 0 0 1-1.616-.278 3.385 3.385 0 0 1-1.254-.817 3.744 3.744 0 0 1-.811-1.351c-.192-.539-.29-1.504-.29-2.212 0-.665.104-1.498.307-2.054a3.925 3.925 0 0 1 .904-1.433 4.124 4.124 0 0 1 1.441-.926 5.31 5.31 0 0 1 1.945-.365c.696 0 1.337.087 1.961.191a15.86 15.86 0 0 1 1.588.332v8.456h-.001zm-5.954-4.206c0 .893.197 1.885.592 2.299.394.414.904.621 1.528.621.34 0 .663-.049.964-.142a2.75 2.75 0 0 0 .734-.332v-5.29a8.531 8.531 0 0 0-1.413-.18c-.778-.022-1.369.294-1.786.801-.411.507-.619 1.395-.619 2.223zm16.12 0c0 .719-.104 1.264-.318 1.858a4.389 4.389 0 0 1-.904 1.52c-.389.42-.854.746-1.402.975-.548.229-1.391.36-1.813.36-.422-.005-1.26-.125-1.802-.36a4.088 4.088 0 0 1-1.397-.975 4.486 4.486 0 0 1-.909-1.52 5.037 5.037 0 0 1-.329-1.858c0-.719.099-1.411.318-1.999.219-.588.526-1.09.92-1.509.394-.42.865-.741 1.402-.97a4.547 4.547 0 0 1 1.786-.338 4.69 4.69 0 0 1 1.791.338c.548.229 1.019.55 1.402.97.389.42.69.921.909 1.509.23.588.345 1.28.345 1.999h.001zm-2.191.005c0-.921-.203-1.689-.597-2.223-.394-.539-.948-.806-1.654-.806-.707 0-1.26.267-1.654.806-.394.539-.586 1.302-.586 2.223 0 .932.197 1.558.592 2.098.394.545.948.812 1.654.812.707 0 1.26-.272 1.654-.812.394-.545.592-1.166.592-2.098h-.001zm6.962 4.707c-3.511.016-3.511-2.822-3.511-3.274L113.583.926l2.142-.338v10.003c0 .256 0 1.88 1.375 1.885v1.792h-.001zm3.774 0h-2.153V5.072l2.153-.338v9.534zm-1.079-10.542c.718 0 1.304-.578 1.304-1.291 0-.714-.581-1.291-1.304-1.291-.723 0-1.304.578-1.304 1.291 0 .714.586 1.291 1.304 1.291zm6.431 1.013c.707 0 1.304.087 1.786.262.482.174.871.42 1.156.73.285.311.488.735.608 1.182.126.447.186.937.186 1.476v5.481a25.24 25.24 0 0 1-1.495.251c-.668.098-1.419.147-2.251.147a6.829 6.829 0 0 1-1.517-.158 3.213 3.213 0 0 1-1.178-.507 2.455 2.455 0 0 1-.761-.904c-.181-.37-.274-.893-.274-1.438 0-.523.104-.855.307-1.215.208-.36.487-.654.838-.883a3.609 3.609 0 0 1 1.227-.49 7.073 7.073 0 0 1 2.202-.103c.263.027.537.076.833.147v-.349c0-.245-.027-.479-.088-.697a1.486 1.486 0 0 0-.307-.583c-.148-.169-.34-.3-.581-.392a2.536 2.536 0 0 0-.915-.163c-.493 0-.942.06-1.353.131-.411.071-.75.153-1.008.245l-.257-1.749c.268-.093.668-.185 1.183-.278a9.335 9.335 0 0 1 1.66-.142l-.001-.001zm.181 7.731c.657 0 1.145-.038 1.484-.104v-2.168a5.097 5.097 0 0 0-1.978-.104c-.241.033-.46.098-.652.191a1.167 1.167 0 0 0-.466.392c-.121.169-.175.267-.175.523 0 .501.175.79.493.981.323.196.75.289 1.293.289h.001zM84.109 4.794c.707 0 1.304.087 1.786.262.482.174.871.42 1.156.73.29.316.487.735.608 1.182.126.447.186.937.186 1.476v5.481a25.24 25.24 0 0 1-1.495.251c-.668.098-1.419.147-2.251.147a6.829 6.829 0 0 1-1.517-.158 3.213 3.213 0 0 1-1.178-.507 2.455 2.455 0 0 1-.761-.904c-.181-.37-.274-.893-.274-1.438 0-.523.104-.855.307-1.215.208-.36.487-.654.838-.883a3.609 3.609 0 0 1 1.227-.49 7.073 7.073 0 0 1 2.202-.103c.257.027.537.076.833.147v-.349c0-.245-.027-.479-.088-.697a1.486 1.486 0 0 0-.307-.583c-.148-.169-.34-.3-.581-.392a2.536 2.536 0 0 0-.915-.163c-.493 0-.942.06-1.353.131-.411.071-.75.153-1.008.245l-.257-1.749c.268-.093.668-.185 1.183-.278a8.89 8.89 0 0 1 1.66-.142l-.001-.001zm.186 7.736c.657 0 1.145-.038 1.484-.104v-2.168a5.097 5.097 0 0 0-1.978-.104c-.241.033-.46.098-.652.191a1.167 1.167 0 0 0-.466.392c-.121.169-.175.267-.175.523 0 .501.175.79.493.981.318.191.75.289 1.293.289h.001zm8.682 1.738c-3.511.016-3.511-2.822-3.511-3.274L89.461.926l2.142-.338v10.003c0 .256 0 1.88 1.375 1.885v1.792h-.001z' fill='%23182359'/><path d='M5.027 11.025c0 .698-.252 1.246-.757 1.644-.505.397-1.201.596-2.089.596-.888 0-1.615-.138-2.181-.414v-1.214c.358.168.739.301 1.141.397.403.097.778.145 1.125.145.508 0 .884-.097 1.125-.29a.945.945 0 0 0 .363-.779.978.978 0 0 0-.333-.747c-.222-.204-.68-.446-1.375-.725-.716-.29-1.221-.621-1.515-.994-.294-.372-.44-.82-.44-1.343 0-.655.233-1.171.698-1.547.466-.376 1.09-.564 1.875-.564.752 0 1.5.165 2.245.494l-.408 1.047c-.698-.294-1.321-.44-1.869-.44-.415 0-.73.09-.945.271a.89.89 0 0 0-.322.717c0 .204.043.379.129.524.086.145.227.282.424.411.197.129.551.299 1.063.51.577.24.999.464 1.268.671.269.208.466.442.591.704.125.261.188.569.188.924l-.001.002zm3.98 2.24c-.924 0-1.646-.269-2.167-.808-.521-.539-.782-1.281-.782-2.226 0-.97.242-1.733.725-2.288.483-.555 1.148-.833 1.993-.833.784 0 1.404.238 1.858.714.455.476.682 1.132.682 1.966v.682H7.357c.018.577.174 1.02.467 1.329.294.31.707.465 1.241.465.351 0 .678-.033.98-.099a5.1 5.1 0 0 0 .975-.33v1.026a3.865 3.865 0 0 1-.935.312 5.723 5.723 0 0 1-1.08.091l.002-.001zm-.231-5.199c-.401 0-.722.127-.964.381s-.386.625-.432 1.112h2.696c-.007-.491-.125-.862-.354-1.115-.229-.252-.544-.379-.945-.379l-.001.001zm7.692 5.092l-.252-.827h-.043c-.286.362-.575.608-.865.739-.29.131-.662.196-1.117.196-.584 0-1.039-.158-1.367-.473-.328-.315-.491-.761-.491-1.337 0-.612.227-1.074.682-1.386.455-.312 1.148-.482 2.079-.51l1.026-.032v-.317c0-.38-.089-.663-.266-.851-.177-.188-.452-.282-.824-.282-.304 0-.596.045-.876.134a6.68 6.68 0 0 0-.806.317l-.408-.902a4.414 4.414 0 0 1 1.058-.384 4.856 4.856 0 0 1 1.085-.132c.756 0 1.326.165 1.711.494.385.329.577.847.577 1.552v4.002h-.902l-.001-.001zm-1.88-.859c.458 0 .826-.128 1.104-.384.278-.256.416-.615.416-1.077v-.516l-.763.032c-.594.021-1.027.121-1.297.298s-.406.448-.406.814c0 .265.079.47.236.615.158.145.394.218.709.218h.001zm7.557-5.189c.254 0 .464.018.628.054l-.124 1.176a2.383 2.383 0 0 0-.559-.064c-.505 0-.914.165-1.227.494-.313.329-.47.757-.47 1.284v3.105h-1.262V7.218h.988l.167 1.047h.064c.197-.354.454-.636.771-.843a1.83 1.83 0 0 1 1.023-.312h.001zm4.125 6.155c-.899 0-1.582-.262-2.049-.787-.467-.525-.701-1.277-.701-2.259 0-.999.244-1.767.733-2.304.489-.537 1.195-.806 2.119-.806.627 0 1.191.116 1.692.349l-.381 1.015c-.534-.208-.974-.312-1.321-.312-1.028 0-1.542.682-1.542 2.046 0 .666.128 1.166.384 1.501.256.335.631.502 1.125.502a3.23 3.23 0 0 0 1.595-.419v1.101a2.53 2.53 0 0 1-.722.285 4.356 4.356 0 0 1-.932.086v.002zm8.277-.107h-1.268V9.506c0-.458-.092-.8-.277-1.026-.184-.226-.477-.338-.878-.338-.53 0-.919.158-1.168.475-.249.317-.373.848-.373 1.593v2.949h-1.262V4.801h1.262v2.122c0 .34-.021.704-.064 1.09h.081a1.76 1.76 0 0 1 .717-.666c.306-.158.663-.236 1.072-.236 1.439 0 2.159.725 2.159 2.175v3.873l-.001-.001zm7.649-6.048c.741 0 1.319.269 1.732.806.414.537.62 1.291.62 2.261 0 .974-.209 1.732-.628 2.275-.419.542-1.001.814-1.746.814-.752 0-1.336-.27-1.751-.811h-.086l-.231.704h-.945V4.801h1.262v1.987l-.021.655-.032.553h.054c.401-.591.992-.886 1.772-.886zm-.328 1.031c-.508 0-.875.149-1.098.448-.224.299-.339.799-.346 1.501v.086c0 .723.115 1.247.344 1.571.229.324.603.486 1.123.486.448 0 .787-.177 1.018-.532.231-.354.346-.867.346-1.536 0-1.35-.462-2.025-1.386-2.025l-.001.001zm3.244-.924h1.375l1.209 3.368c.183.48.304.931.365 1.354h.043c.032-.197.091-.436.177-.717.086-.281.541-1.616 1.364-4.004h1.364l-2.541 6.73c-.462 1.235-1.232 1.853-2.31 1.853-.279 0-.551-.03-.816-.091v-.999c.19.043.406.064.65.064.609 0 1.037-.353 1.284-1.058l.22-.559-2.385-5.941h.001z' fill='%231D3657'/></g></svg>");
-  background-repeat: no-repeat;
-  background-position: 50%;
-  background-size: 100%;
-  overflow: hidden;
-  text-indent: -9000px;
-  width: 100%;
-  height: 100%;
-  display: block;
-  transform: translate(-8px);
-}
-
-.algolia-autocomplete .algolia-docsearch-suggestion--highlight {
-  color: #FF8C00;
-  background: rgba(232, 189, 54, 0.1)
-}
-
-
-.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {
-  box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5)
-}
-
-.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content {
-  background-color: rgba(192, 192, 192, .15)
-}
diff --git a/docs/docsearch.js b/docs/docsearch.js
deleted file mode 100644
index b35504cd..00000000
--- a/docs/docsearch.js
+++ /dev/null
@@ -1,85 +0,0 @@
-$(function() {
-
-  // register a handler to move the focus to the search bar
-  // upon pressing shift + "/" (i.e. "?")
-  $(document).on('keydown', function(e) {
-    if (e.shiftKey && e.keyCode == 191) {
-      e.preventDefault();
-      $("#search-input").focus();
-    }
-  });
-
-  $(document).ready(function() {
-    // do keyword highlighting
-    /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
-    var mark = function() {
-
-      var referrer = document.URL ;
-      var paramKey = "q" ;
-
-      if (referrer.indexOf("?") !== -1) {
-        var qs = referrer.substr(referrer.indexOf('?') + 1);
-        var qs_noanchor = qs.split('#')[0];
-        var qsa = qs_noanchor.split('&');
-        var keyword = "";
-
-        for (var i = 0; i < qsa.length; i++) {
-          var currentParam = qsa[i].split('=');
-
-          if (currentParam.length !== 2) {
-            continue;
-          }
-
-          if (currentParam[0] == paramKey) {
-            keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
-          }
-        }
-
-        if (keyword !== "") {
-          $(".contents").unmark({
-            done: function() {
-              $(".contents").mark(keyword);
-            }
-          });
-        }
-      }
-    };
-
-    mark();
-  });
-});
-
-/* Search term highlighting ------------------------------*/
-
-function matchedWords(hit) {
-  var words = [];
-
-  var hierarchy = hit._highlightResult.hierarchy;
-  // loop to fetch from lvl0, lvl1, etc.
-  for (var idx in hierarchy) {
-    words = words.concat(hierarchy[idx].matchedWords);
-  }
-
-  var content = hit._highlightResult.content;
-  if (content) {
-    words = words.concat(content.matchedWords);
-  }
-
-  // return unique words
-  var words_uniq = [...new Set(words)];
-  return words_uniq;
-}
-
-function updateHitURL(hit) {
-
-  var words = matchedWords(hit);
-  var url = "";
-
-  if (hit.anchor) {
-    url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
-  } else {
-    url = hit.url + '?q=' + escape(words.join(" "));
-  }
-
-  return url;
-}
diff --git a/docs/docsearch.json b/docs/docsearch.json
deleted file mode 100644
index 8ec26afd..00000000
--- a/docs/docsearch.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-  "index_name": "ipumsr",
-  "start_urls": [
-    {
-      "url": "http://tech.popdata.org/ipumsr/index.html",
-      "selectors_key": "homepage",
-      "tags": [
-        "homepage"
-      ]
-    },
-    {
-      "url": "http://tech.popdata.org/ipumsr/reference",
-      "selectors_key": "reference",
-      "tags": [
-        "reference"
-      ]
-    },
-    {
-      "url": "http://tech.popdata.org/ipumsr/articles",
-      "selectors_key": "articles",
-      "tags": [
-        "articles"
-      ]
-    }
-  ],
-  "stop_urls": [
-    "/reference/$",
-    "/reference/index.html",
-    "/articles/$",
-    "/articles/index.html"
-  ],
-  "sitemap_urls": [
-    "http://tech.popdata.org/ipumsr/sitemap.xml"
-  ],
-  "selectors": {
-    "homepage": {
-      "lvl0": {
-        "selector": ".contents h1",
-        "default_value": "ipumsr Home page"
-      },
-      "lvl1": {
-        "selector": ".contents h2"
-      },
-      "lvl2": {
-        "selector": ".contents h3",
-        "default_value": "Context"
-      },
-      "lvl3": ".ref-arguments td, .ref-description",
-      "text": ".contents p, .contents li, .contents .pre"
-    },
-    "reference": {
-      "lvl0": {
-        "selector": ".contents h1"
-      },
-      "lvl1": {
-        "selector": ".contents .name",
-        "default_value": "Argument"
-      },
-      "lvl2": {
-        "selector": ".ref-arguments th",
-        "default_value": "Description"
-      },
-      "lvl3": ".ref-arguments td, .ref-description",
-      "text": ".contents p, .contents li"
-    },
-    "articles": {
-      "lvl0": {
-        "selector": ".contents h1"
-      },
-      "lvl1": {
-        "selector": ".contents .name"
-      },
-      "lvl2": {
-        "selector": ".contents h2, .contents h3",
-        "default_value": "Context"
-      },
-      "text": ".contents p, .contents li"
-    }
-  },
-  "selectors_exclude": [
-    ".dont-index"
-    ],
-  "min_indexed_level": 2,
-  "custom_settings": {
-    "separatorsToIndex": "_",
-    "attributesToRetrieve": [
-      "hierarchy",
-      "content",
-      "anchor",
-      "url",
-      "url_without_anchor"
-    ]
-  }
-}
-
diff --git a/docs/index.html b/docs/index.html
index acd73e89..721c4528 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -142,15 +142,15 @@ <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#i
 <div class="section level2">
 <h2 id="what-is-ipums">What is IPUMS?<a class="anchor" aria-label="anchor" href="#what-is-ipums"></a>
 </h2>
-<p><a href="https://www.ipums.org/mission-purpose" class="external-link">IPUMS</a> is the world’s largest publicly available population database, providing census and survey data from around the world integrated across time and space. IPUMS integration and documentation make it easy to study change, conduct comparative research, merge information across data types, and analyze individuals within family and community context. Data and services are available free of charge.</p>
-<p>IPUMS consists of multiple projects, or collections, that provide different data products.</p>
+<p><a href="https://www.ipums.org" class="external-link">IPUMS</a> is the world’s largest publicly available population database, providing census and survey data from around the world integrated across time and space. IPUMS integration and documentation make it easy to study change, conduct comparative research, merge information across data types, and analyze individuals within family and community context. Data and services are available free of charge.</p>
+<p>IPUMS consists of multiple projects, or <em>collections</em>, that provide different data products.</p>
 <ul>
 <li>
 <strong>Microdata</strong> projects distribute data for individual survey units, like people or households.</li>
 <li>
 <strong>Aggregate data</strong> projects distribute summary tables of aggregate statistics for particular geographic units along with corresponding GIS mapping files.</li>
 </ul>
-<p>ipumsr supports different levels of functionality for each IPUMS project, as summarized in the following table:</p>
+<p>ipumsr supports different levels of functionality for each IPUMS project, as summarized in the table below.</p>
 <table class="table table-hover table-proj-summary">
 <thead><tr>
 <th style="text-align:center;">
@@ -369,19 +369,19 @@ <h2 id="what-is-ipums">What is IPUMS?<a class="anchor" aria-label="anchor" href=
 </tr>
 </tbody>
 </table>
-<p>ipumsr uses the <a href="https://developer.ipums.org/" class="external-link">IPUMS API</a> to submit data requests, download data extracts, and get metadata, so the scope of ipumsr functionality generally corresponds to the <a href="https://developer.ipums.org/docs/v2/apiprogram/apis/" class="external-link">available API functionality</a>. As the IPUMS team extends the API to support more functionality for more projects, we aim to extend ipumsr capabilities accordingly.</p>
+<p>ipumsr uses the <a href="https://developer.ipums.org/" class="external-link">IPUMS API</a> to submit data requests, download data extracts, and get metadata, so the scope of functionality generally corresponds to that <a href="https://developer.ipums.org/docs/v2/apiprogram/apis/" class="external-link">available via the API</a>. As the IPUMS team extends the API to support more functionality for more projects, we aim to extend ipumsr capabilities accordingly.</p>
 </div>
 <div class="section level2">
 <h2 id="getting-started">Getting started<a class="anchor" aria-label="anchor" href="#getting-started"></a>
 </h2>
-<p>If you’re new to IPUMS data, learn more about what’s available through the <a href="https://www.ipums.org/overview" class="external-link">IPUMS Projects Overview</a>.</p>
-<p>The package vignettes are the best place to learn about what’s available in ipumsr itself:</p>
+<p>If you’re new to IPUMS data, learn more about what’s available through the <a href="https://www.ipums.org/overview" class="external-link">IPUMS Projects Overview</a>. Then, see <code><a href="articles/ipums.html">vignette("ipums")</a></code> for an overview of how to obtain IPUMS data.</p>
+<p>The package vignettes are the best place to explore what ipumsr has to offer:</p>
 <ul>
-<li>To read IPUMS data extracts into R, see <code><a href="articles/ipums-read.html">vignette("ipums-read")</a></code>.</li>
-<li>To interact with the IPUMS extract system via the IPUMS API, see <code><a href="articles/ipums-api.html">vignette("ipums-api")</a></code>.</li>
-<li>For additional details about microdata and NHGIS extract requests, see <code><a href="articles/ipums-api-micro.html">vignette("ipums-api-micro")</a></code> and <code><a href="articles/ipums-api-nhgis.html">vignette("ipums-api-nhgis")</a></code>.</li>
-<li>To work with labelled values in IPUMS data, see <code><a href="articles/value-labels.html">vignette("value-labels")</a></code>.</li>
-<li>For techniques for working with large data extracts, see <code><a href="articles/ipums-bigdata.html">vignette("ipums-bigdata")</a></code>.</li>
+<li><p>To read IPUMS data extracts into R, see <code><a href="articles/ipums-read.html">vignette("ipums-read")</a></code>.</p></li>
+<li><p>To interact with the IPUMS extract and metadata system via the IPUMS API, see <code><a href="articles/ipums-api.html">vignette("ipums-api")</a></code>.</p></li>
+<li><p>For additional details about microdata and NHGIS extract requests, see <code><a href="articles/ipums-api-micro.html">vignette("ipums-api-micro")</a></code> and <code><a href="articles/ipums-api-nhgis.html">vignette("ipums-api-nhgis")</a></code>.</p></li>
+<li><p>To work with labelled values in IPUMS data, see <code><a href="articles/value-labels.html">vignette("value-labels")</a></code>.</p></li>
+<li><p>For techniques for working with large data extracts, see <code><a href="articles/ipums-bigdata.html">vignette("ipums-bigdata")</a></code>.</p></li>
 </ul>
 <p>The <a href="https://www.ipums.org/support" class="external-link">IPUMS support website</a> also houses many project-specific R-based <a href="https://www.ipums.org/support/exercises" class="external-link">training exercises</a>. However, note that some of these exercises may not be be up to date with ipumsr’s current functionality.</p>
 </div>
@@ -397,7 +397,7 @@ <h2 id="related-work">Related work<a class="anchor" aria-label="anchor" href="#r
 <div class="section level2">
 <h2 id="getting-help--contributing">Getting help + contributing<a class="anchor" aria-label="anchor" href="#getting-help--contributing"></a>
 </h2>
-<p>We greatly appreciate feedback and development contributions. Please submit any bug reports, pull requests, or other suggestions on <a href="https://github.com/ipums/ipumsr/issues" class="external-link">GitHub</a>. Before contributing, please be sure to read the <a href="https://github.com/ipums/ipumsr/blob/master/CONTRIBUTING.md" class="external-link">Contributing Guidelines</a> and the <a href="https://github.com/ipums/ipumsr/blob/master/CONDUCT.md" class="external-link">Code of Conduct</a>.</p>
+<p>We greatly appreciate feedback and development contributions. Please submit any bug reports, pull requests, or other suggestions on <a href="https://github.com/ipums/ipumsr/issues" class="external-link">GitHub</a>. Before contributing, please be sure to read the <a href="https://tech.popdata.org/ipumsr/CONTRIBUTING.html" class="external-link">Contributing Guidelines</a> and the <a href="https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html" class="external-link">Code of Conduct</a>.</p>
 <p>If you have general questions or concerns about IPUMS data, check out our <a href="https://forum.ipums.org" class="external-link">user forum</a> or send an email to <a href="mailto:ipums@umn.edu" class="email">ipums@umn.edu</a>.</p>
 </div>
 </div>
diff --git a/docs/pkgdown.css b/docs/pkgdown.css
deleted file mode 100644
index 80ea5b83..00000000
--- a/docs/pkgdown.css
+++ /dev/null
@@ -1,384 +0,0 @@
-/* Sticky footer */
-
-/**
- * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
- * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
- *
- * .Site -> body > .container
- * .Site-content -> body > .container .row
- * .footer -> footer
- *
- * Key idea seems to be to ensure that .container and __all its parents__
- * have height set to 100%
- *
- */
-
-html, body {
-  height: 100%;
-}
-
-body {
-  position: relative;
-}
-
-body > .container {
-  display: flex;
-  height: 100%;
-  flex-direction: column;
-}
-
-body > .container .row {
-  flex: 1 0 auto;
-}
-
-footer {
-  margin-top: 45px;
-  padding: 35px 0 36px;
-  border-top: 1px solid #e5e5e5;
-  color: #666;
-  display: flex;
-  flex-shrink: 0;
-}
-footer p {
-  margin-bottom: 0;
-}
-footer div {
-  flex: 1;
-}
-footer .pkgdown {
-  text-align: right;
-}
-footer p {
-  margin-bottom: 0;
-}
-
-img.icon {
-  float: right;
-}
-
-/* Ensure in-page images don't run outside their container */
-.contents img {
-  max-width: 100%;
-  height: auto;
-}
-
-/* Fix bug in bootstrap (only seen in firefox) */
-summary {
-  display: list-item;
-}
-
-/* Typographic tweaking ---------------------------------*/
-
-.contents .page-header {
-  margin-top: calc(-60px + 1em);
-}
-
-dd {
-  margin-left: 3em;
-}
-
-/* Section anchors ---------------------------------*/
-
-a.anchor {
-  display: none;
-  margin-left: 5px;
-  width: 20px;
-  height: 20px;
-
-  background-image: url(./link.svg);
-  background-repeat: no-repeat;
-  background-size: 20px 20px;
-  background-position: center center;
-}
-
-h1:hover .anchor,
-h2:hover .anchor,
-h3:hover .anchor,
-h4:hover .anchor,
-h5:hover .anchor,
-h6:hover .anchor {
-  display: inline-block;
-}
-
-/* Fixes for fixed navbar --------------------------*/
-
-.contents h1, .contents h2, .contents h3, .contents h4 {
-  padding-top: 60px;
-  margin-top: -40px;
-}
-
-/* Navbar submenu --------------------------*/
-
-.dropdown-submenu {
-  position: relative;
-}
-
-.dropdown-submenu>.dropdown-menu {
-  top: 0;
-  left: 100%;
-  margin-top: -6px;
-  margin-left: -1px;
-  border-radius: 0 6px 6px 6px;
-}
-
-.dropdown-submenu:hover>.dropdown-menu {
-  display: block;
-}
-
-.dropdown-submenu>a:after {
-  display: block;
-  content: " ";
-  float: right;
-  width: 0;
-  height: 0;
-  border-color: transparent;
-  border-style: solid;
-  border-width: 5px 0 5px 5px;
-  border-left-color: #cccccc;
-  margin-top: 5px;
-  margin-right: -10px;
-}
-
-.dropdown-submenu:hover>a:after {
-  border-left-color: #ffffff;
-}
-
-.dropdown-submenu.pull-left {
-  float: none;
-}
-
-.dropdown-submenu.pull-left>.dropdown-menu {
-  left: -100%;
-  margin-left: 10px;
-  border-radius: 6px 0 6px 6px;
-}
-
-/* Sidebar --------------------------*/
-
-#pkgdown-sidebar {
-  margin-top: 30px;
-  position: -webkit-sticky;
-  position: sticky;
-  top: 70px;
-}
-
-#pkgdown-sidebar h2 {
-  font-size: 1.5em;
-  margin-top: 1em;
-}
-
-#pkgdown-sidebar h2:first-child {
-  margin-top: 0;
-}
-
-#pkgdown-sidebar .list-unstyled li {
-  margin-bottom: 0.5em;
-}
-
-/* bootstrap-toc tweaks ------------------------------------------------------*/
-
-/* All levels of nav */
-
-nav[data-toggle='toc'] .nav > li > a {
-  padding: 4px 20px 4px 6px;
-  font-size: 1.5rem;
-  font-weight: 400;
-  color: inherit;
-}
-
-nav[data-toggle='toc'] .nav > li > a:hover,
-nav[data-toggle='toc'] .nav > li > a:focus {
-  padding-left: 5px;
-  color: inherit;
-  border-left: 1px solid #878787;
-}
-
-nav[data-toggle='toc'] .nav > .active > a,
-nav[data-toggle='toc'] .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav > .active:focus > a {
-  padding-left: 5px;
-  font-size: 1.5rem;
-  font-weight: 400;
-  color: inherit;
-  border-left: 2px solid #878787;
-}
-
-/* Nav: second level (shown on .active) */
-
-nav[data-toggle='toc'] .nav .nav {
-  display: none; /* Hide by default, but at >768px, show it */
-  padding-bottom: 10px;
-}
-
-nav[data-toggle='toc'] .nav .nav > li > a {
-  padding-left: 16px;
-  font-size: 1.35rem;
-}
-
-nav[data-toggle='toc'] .nav .nav > li > a:hover,
-nav[data-toggle='toc'] .nav .nav > li > a:focus {
-  padding-left: 15px;
-}
-
-nav[data-toggle='toc'] .nav .nav > .active > a,
-nav[data-toggle='toc'] .nav .nav > .active:hover > a,
-nav[data-toggle='toc'] .nav .nav > .active:focus > a {
-  padding-left: 15px;
-  font-weight: 500;
-  font-size: 1.35rem;
-}
-
-/* orcid ------------------------------------------------------------------- */
-
-.orcid {
-  font-size: 16px;
-  color: #A6CE39;
-  /* margins are required by official ORCID trademark and display guidelines */
-  margin-left:4px;
-  margin-right:4px;
-  vertical-align: middle;
-}
-
-/* Reference index & topics ----------------------------------------------- */
-
-.ref-index th {font-weight: normal;}
-
-.ref-index td {vertical-align: top; min-width: 100px}
-.ref-index .icon {width: 40px;}
-.ref-index .alias {width: 40%;}
-.ref-index-icons .alias {width: calc(40% - 40px);}
-.ref-index .title {width: 60%;}
-
-.ref-arguments th {text-align: right; padding-right: 10px;}
-.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px}
-.ref-arguments .name {width: 20%;}
-.ref-arguments .desc {width: 80%;}
-
-/* Nice scrolling for wide elements --------------------------------------- */
-
-table {
-  display: block;
-  overflow: auto;
-}
-
-/* Syntax highlighting ---------------------------------------------------- */
-
-pre, code, pre code {
-  background-color: #f8f8f8;
-  color: #333;
-}
-pre, pre code {
-  white-space: pre-wrap;
-  word-break: break-all;
-  overflow-wrap: break-word;
-}
-
-pre {
-  border: 1px solid #eee;
-}
-
-pre .img, pre .r-plt {
-  margin: 5px 0;
-}
-
-pre .img img, pre .r-plt img {
-  background-color: #fff;
-}
-
-code a, pre a {
-  color: #375f84;
-}
-
-a.sourceLine:hover {
-  text-decoration: none;
-}
-
-.fl      {color: #1514b5;}
-.fu      {color: #000000;} /* function */
-.ch,.st  {color: #036a07;} /* string */
-.kw      {color: #264D66;} /* keyword */
-.co      {color: #888888;} /* comment */
-
-.error   {font-weight: bolder;}
-.warning {font-weight: bolder;}
-
-/* Clipboard --------------------------*/
-
-.hasCopyButton {
-  position: relative;
-}
-
-.btn-copy-ex {
-  position: absolute;
-  right: 0;
-  top: 0;
-  visibility: hidden;
-}
-
-.hasCopyButton:hover button.btn-copy-ex {
-  visibility: visible;
-}
-
-/* headroom.js ------------------------ */
-
-.headroom {
-  will-change: transform;
-  transition: transform 200ms linear;
-}
-.headroom--pinned {
-  transform: translateY(0%);
-}
-.headroom--unpinned {
-  transform: translateY(-100%);
-}
-
-/* mark.js ----------------------------*/
-
-mark {
-  background-color: rgba(255, 255, 51, 0.5);
-  border-bottom: 2px solid rgba(255, 153, 51, 0.3);
-  padding: 1px;
-}
-
-/* vertical spacing after htmlwidgets */
-.html-widget {
-  margin-bottom: 10px;
-}
-
-/* fontawesome ------------------------ */
-
-.fab {
-    font-family: "Font Awesome 5 Brands" !important;
-}
-
-/* don't display links in code chunks when printing */
-/* source: https://stackoverflow.com/a/10781533 */
-@media print {
-  code a:link:after, code a:visited:after {
-    content: "";
-  }
-}
-
-/* Section anchors ---------------------------------
-   Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71
-*/
-
-div.csl-bib-body { }
-div.csl-entry {
-  clear: both;
-}
-.hanging-indent div.csl-entry {
-  margin-left:2em;
-  text-indent:-2em;
-}
-div.csl-left-margin {
-  min-width:2em;
-  float:left;
-}
-div.csl-right-inline {
-  margin-left:2em;
-  padding-left:1em;
-}
-div.csl-indent {
-  margin-left: 2em;
-}
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 6187d6a6..e0970590 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -9,7 +9,7 @@ articles:
   ipums-read: ipums-read.html
   ipums: ipums.html
   value-labels: value-labels.html
-last_built: 2024-02-22T19:34Z
+last_built: 2024-02-23T18:11Z
 urls:
   reference: http://tech.popdata.org/ipumsr/reference
   article: http://tech.popdata.org/ipumsr/articles
diff --git a/docs/search.json b/docs/search.json
index e1ab4156..ebeb13b2 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -1 +1 @@
-[{"path":"http://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (http:contributor-covenant.org), version 1.0.0, available http://contributor-covenant.org/version/1/0/0/","code":""},{"path":"http://tech.popdata.org/ipumsr/CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (http:contributor-covenant.org), version 1.0.0, available http://contributor-covenant.org/version/1/0/0/","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing","title":"Contributing","text":"Thank considering improving project! participating, agree abide code conduct.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"issues-reporting-a-problem-or-suggestion","dir":"","previous_headings":"","what":"Issues (Reporting a problem or suggestion)","title":"Contributing","text":"’ve experience problem package, suggestion , please post issues tab. space meant questions directly related R package, questions related specific extract may better answered via email ipums@umn.edu (don’t worry making mistake, know tough tell difference). Since extracts large files, posting minimal reproducible examples may difficult. Therefore, helpful can provide much detail problem possible including code error message, project extract , variables selected, file type, etc. ’ll best answer question.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"pull-requests-making-changes-to-the-package","dir":"","previous_headings":"","what":"Pull Requests (Making changes to the package)","title":"Contributing","text":"appreciate pull requests follow guidelines: 1) Make sure tests pass (add new ones possible). best conform code style package, currently based tidyverse style guide. See styler package easily catch stylistic errors. Please add name affiliation NOTICE.txt file. Summarize changes NEWS.md file.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"basics-of-pull-requests","dir":"","previous_headings":"","what":"Basics of Pull Requests","title":"Contributing","text":"’ve never worked R package , book R Packages Hadley Wickham great resource learning mechanics building R package contributing R packages github. Additionally, ’s great primer git github specifically. meantime, ’s quick step--step guide contributing project using RStudio: don’t already RStudio Git installed, can download . Fork repo (top right corner button github website). Clone repo RStudio’s toolbar: File > New Project > Verson Control > https://github.com/*YOUR_USER_NAME*/ipumsr/. Make changes local copy. Commit changes push github webiste using RStudio’s Git pane (push using green arrow). Submit pull request, selecting “compare across forks” option. Please include short message summarizing changes.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"supported-microdata-collections","dir":"Articles","previous_headings":"","what":"Supported microdata collections","title":"Microdata API Requests","text":"IPUMS provides several data collections classified microdata. Currently, following microdata collections supported IPUMS API (shown codes used refer ipumsr): IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") API support continue added collections future. See API documentation information upcoming additions API. addition microdata projects, IPUMS API also supports IPUMS NHGIS data. details obtaining IPUMS NHGIS data using ipumsr, see NHGIS-specific vignette. getting started, ’ll load ipumsr dplyr, helpful demo:","code":"library(ipumsr) library(dplyr)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"basic-ipums-microdata-concepts","dir":"Articles","previous_headings":"","what":"Basic IPUMS microdata concepts","title":"Microdata API Requests","text":"Every microdata extract definition must contain set requested samples variables. IPUMS microdata collection, sample refers distinct combination records variables. record set values describe characteristics single unit measurement (e.g. single person single household), variables define characteristics measured. single sample can contain multiple record types (e.g. person records, household records, activity records, ), correspond different units measurement. Note usage term “sample” correspond perfectly statistical sense subset individuals population. Many IPUMS samples samples statistical sense, “full-count” samples, meaning contain individuals population.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"ipums-microdata-metadata-forthcoming","dir":"Articles","previous_headings":"","what":"IPUMS microdata metadata (forthcoming)","title":"Microdata API Requests","text":"course, request samples variables, know codes API uses refer . samples, IPUMS API uses special codes don’t appear web-based extract builder. variables, API uses variable names appear web. IPUMS API yet provide comprehensive set metadata endpoints IPUMS microdata collections, users can use get_sample_info() function identify codes used refer specific samples communicating API. values listed name column correspond code use request sample creating extract definition submitted IPUMS API. can use basic functions dplyr filter metadata samples interest. instance, find IPUMS International samples Mexico, following: IPUMS intends add support accessing variable metadata via API future. , use web-based extract builder given collection find variable names availability sample. See IPUMS API documentation links extract builder microdata collection API support. Alternatively, made extract previously web interface, can use get_extract_info() identify variable names includes. See IPUMS API introduction details.","code":"cps_samps <- get_sample_info(\"cps\")  head(cps_samps) #> # A tibble: 6 × 2 #>   name        description          #>   <chr>       <chr>                #> 1 cps1962_03s IPUMS-CPS, ASEC 1962 #> 2 cps1963_03s IPUMS-CPS, ASEC 1963 #> 3 cps1964_03s IPUMS-CPS, ASEC 1964 #> 4 cps1965_03s IPUMS-CPS, ASEC 1965 #> 5 cps1966_03s IPUMS-CPS, ASEC 1966 #> 6 cps1967_03s IPUMS-CPS, ASEC 1967 ipumsi_samps <- get_sample_info(\"ipumsi\")  ipumsi_samps %>%    filter(grepl(\"Mexico\", description)) #> # A tibble: 70 × 2 #>    name    description        #>    <chr>   <chr>              #>  1 mx1960a Mexico 1960        #>  2 mx1970a Mexico 1970        #>  3 mx1990a Mexico 1990        #>  4 mx1995a Mexico 1995        #>  5 mx2000a Mexico 2000        #>  6 mx2005a Mexico 2005        #>  7 mx2010a Mexico 2010        #>  8 mx2015a Mexico 2015        #>  9 mx2005h Mexico 2005 Q1 LFS #> 10 mx2005i Mexico 2005 Q2 LFS #> # ℹ 60 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"defining-an-ipums-microdata-extract-request","dir":"Articles","previous_headings":"","what":"Defining an IPUMS microdata extract request","title":"Microdata API Requests","text":"IPUMS collection extract definition function used specify parameters new extract request scratch. functions take form define_extract_*(). microdata collections, : IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() define extract request, can specify data included extract indicate desired format layout. microdata collection extract definition function, uses syntax. examples vignette use multiple collections, syntax demonstrate can applied supported microdata collections. simple extract definition needs contain names samples variables include request: produces ipums_extract object containing extract request specifications ready submitted IPUMS API. request variable extract definition, resulting data extract include variable requested samples available. request variable available requested samples, IPUMS API throw informative error try submit request. Beyond just specifying samples variables, several additional options available refine data requested microdata extract request.","code":"cps_ext <- define_extract_cps(   description = \"Example CPS extract\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\") )  cps_ext #> Unsubmitted IPUMS CPS extract  #> Description: Example CPS extract #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (4 total) AGE, SEX, RACE, STATEFIP"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"detailed-variable-specifications","dir":"Articles","previous_headings":"","what":"Detailed variable specifications","title":"Microdata API Requests","text":"IPUMS API supports several detailed specification options can applied individual variables extract request: case selections, attached characteristics, data quality flags. describe options depth, ’ll introduce syntax used add extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"syntax","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Syntax","title":"Microdata API Requests","text":"add options variable, need introduce var_spec() helper function. var_spec() bundles selections given variable together single object (case, var_spec object): include specification extract, simply provide variables argument extract definition. multiple variables included, pass list var_spec objects: fact, investigate original extract object , ’ll notice variables automatically converted var_spec objects, even though provided character vectors: , var_spec object additional specifications produce default data given variable. , following equivalent: specified variables converted var_spec objects, can also pass list elements var_spec objects just variable names. convenient detailed specifications subset variables: (Samples also converted samp_spec objects, currently aren’t additional specifications available samples, reason use anything character vector samples argument.) Now ’ve covered basic syntax including detailed variable specifications, can describe available options depth.","code":"var <- var_spec(\"SEX\", case_selections = \"2\")  str(var) #> List of 3 #>  $ name               : chr \"SEX\" #>  $ case_selections    : chr \"2\" #>  $ case_selection_type: chr \"general\" #>  - attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" define_extract_cps(   description = \"Case selection example\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = list(     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"AGE\", attached_characteristics = \"head\")   ) ) #> Unsubmitted IPUMS CPS extract  #> Description: Case selection example #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (2 total) SEX, AGE str(cps_ext$variables) #> List of 4 #>  $ AGE     :List of 1 #>   ..$ name: chr \"AGE\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ SEX     :List of 1 #>   ..$ name: chr \"SEX\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ RACE    :List of 1 #>   ..$ name: chr \"RACE\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ STATEFIP:List of 1 #>   ..$ name: chr \"STATEFIP\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" define_extract_cps(   description = \"Example CPS extract\",   samples = \"cps2018_03s\",   variables = \"AGE\" )  define_extract_cps(   description = \"Example CPS extract\",   samples = \"cps2018_03s\",   variables = var_spec(\"AGE\") ) define_extract_cps(   description = \"Case selection example\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = list(     var_spec(\"SEX\", case_selections = \"2\"),     \"AGE\"   ) ) #> Unsubmitted IPUMS CPS extract  #> Description: Case selection example #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (2 total) SEX, AGE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"case-selections","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Case selections","title":"Microdata API Requests","text":"Case selections allow us limit data records match particular value specified variable. instance, following specification indicate records value \"27\" (Minnesota) \"19\" (Iowa) variable \"STATEFIP\" included: variables versions general detailed coding schemes. default, case selections interpreted refer general codes: variables detailed versions, can also select detailed codes. instance, IPUMS USA variable RACE available general detailed versions. wanted limit extract persons identifying “Two major races”, specifying case selection \"8\". However, wanted limit extract persons identifying “White Chinese” “White Japanese”, need specify detailed codes \"811\" \"812\". include case selections detailed codes, set case_selection_type = \"detailed\": noted , IPUMS intends add support accessing variable metadata via API future, users able query variable coding schemes right R sessions. , use IPUMS web interface given collection find general detailed variable codes purposes case selection. See IPUMS API documentation relevant links. default, case selection person-level variables produces data file includes individuals match specified values specified variables. ’s also possible use case selection include matching individuals members households, using case_select_who parameter. case_select_who parameter must case selections extract, thus set extract level rather var_spec level. include household members matching individuals, set case_select_who = \"households\" extract definition:","code":"var <- var_spec(\"STATEFIP\", case_selections = c(\"27\", \"19\")) var$case_selection_type #> [1] \"general\" # General case selection is the default var_spec(\"RACE\", case_selections = \"8\") #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"8\" #>  #> $case_selection_type #> [1] \"general\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"  # For detailed case selection, change the `case_selection_type` var_spec(   \"RACE\",    case_selections = c(\"811\", \"812\"),    case_selection_type = \"detailed\" ) #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"811\" \"812\" #>  #> $case_selection_type #> [1] \"detailed\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" define_extract_usa(   description = \"Household level case selection\",   samples = \"us2021a\",   variables = var_spec(\"RACE\", case_selections = \"8\"),   case_select_who = \"households\" ) #> Unsubmitted IPUMS USA extract  #> Description: Household level case selection #>  #> Samples: (1 total) us2021a #> Variables: (1 total) RACE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"attached-characteristics","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Attached characteristics","title":"Microdata API Requests","text":"IPUMS allows users create variables reflect characteristics household members. , use attached_characteristics argument var_spec(). instance, attach spouse’s SEX value record: add new variable (case, SEX_SP) output data contain sex person’s spouse (record exists, value 0). Multiple attached characteristics can attached single variable: Acceptable values \"spouse\", \"mother\", \"father\", \"head\".","code":"var_spec(\"SEX\", attached_characteristics = \"spouse\") #> $name #> [1] \"SEX\" #>  #> $attached_characteristics #> [1] \"spouse\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" var_spec(\"AGE\", attached_characteristics = c(\"mother\", \"father\")) #> $name #> [1] \"AGE\" #>  #> $attached_characteristics #> [1] \"mother\" \"father\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"data-quality-flags","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Data quality flags","title":"Microdata API Requests","text":"variables IPUMS edited missing, illegible, inconsistent values. Data quality flags indicate values edited allocated. include data quality flags individual variable, use data_quality_flags argument var_spec(): produce new variable (QRACE) containing data quality flag given variable. add data quality flags variables , set data_quality_flags = TRUE extract definition directly: data quality flag corresponds one variables, codes flag vary based sample. See documentation IPUMS collection interest information data quality flag codes.","code":"var_spec(\"RACE\", data_quality_flags = TRUE) #> $name #> [1] \"RACE\" #>  #> $data_quality_flags #> [1] TRUE #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" usa_ext <- define_extract_usa(   description = \"Data quality flags\",   samples = \"us2021a\",   variables = list(     var_spec(\"RACE\", case_selections = \"8\"),     var_spec(\"AGE\")   ),   data_quality_flags = TRUE )"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"data-structure-and-file-format","dir":"Articles","previous_headings":"","what":"Data structure and file format","title":"Microdata API Requests","text":"default, microdata extract definitions request data rectangular structure fixed-width file format. Rectangular data data person records included, household-level variables converted person-level variables copying values associated household record onto household members. instead create hierarchical extract, includes separate records households persons, set data_structure = \"hierarchical\" extract definition. See IPUMS data reading vignette information loading hierarchical data R. request file format fixed-width, adjust data_format argument. Note can request data variety formats (Stata, SPSS, etc.), ipumsr’s read_ipums_micro() function supports fixed-width csv files.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"Microdata API Requests","text":"defined extract request, can submit extract processing: workflow submitting monitoring extract request downloading files complete described IPUMS API introduction.","code":"usa_ext_submitted <- submit_extract(usa_ext)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"basic-ipums-nhgis-concepts","dir":"Articles","previous_headings":"","what":"Basic IPUMS NHGIS concepts","title":"NHGIS API Requests","text":"IPUMS NHGIS supports 3 main types data products: datasets, time series tables, shapefiles. dataset contains collection data tables correspond particular tabulated summary statistic. dataset distinguished years, geographic levels, topics covers. instance, 2021 1-year data American Community Survey (ACS) encapsulated single dataset. cases, single census product split multiple datasets. time series table longitudinal data source links comparable statistics multiple U.S. censuses single bundle. table comprised one related time series, describes single summary statistic measured multiple times given geographic level. shapefile (GIS file) contains geographic data given geographic level year. Typically, files composed polygon geometries containing boundaries census reporting areas.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"ipums-nhgis-metadata","dir":"Articles","previous_headings":"","what":"IPUMS NHGIS metadata","title":"NHGIS API Requests","text":"course, make request data sources, know codes API uses refer . Fortunately, can browse metadata available IPUMS NHGIS data sources get_metadata_nhgis(). Users can view summary metadata available data sources given data type, detailed metadata specific data source name.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"summary-metadata","dir":"Articles","previous_headings":"IPUMS NHGIS metadata","what":"Summary metadata","title":"NHGIS API Requests","text":"see summary available sources given data product type, use type argument. returns data frame containing available datasets, data tables, time series tables, shapefiles. can use basic functions dplyr filter metadata records interest. instance, wanted find data sources related agriculture 1900 Census, filter group description: values listed name column correspond code use request dataset creating extract definition submitted IPUMS API. Similarly, time series tables: metadata fields consistent across different data types, , like geographic_integration, specific time series tables: Note time series tables, metadata fields stored list columns, entry data frame: filter columns, can use map_lgl() purrr. instance, find time series tables include data particular year: details working nested data frames, see documentation dplyr purrr.","code":"ds <- get_metadata_nhgis(type = \"datasets\")  head(ds) #> # A tibble: 6 × 4 #>   name      group       description                              sequence #>   <chr>     <chr>       <chr>                                       <int> #> 1 1790_cPop 1790 Census Population Data [US, States & Counties]       101 #> 2 1800_cPop 1800 Census Population Data [US, States & Counties]       201 #> 3 1810_cPop 1810 Census Population Data [US, States & Counties]       301 #> 4 1820_cPop 1820 Census Population Data [US, States & Counties]       401 #> 5 1830_cPop 1830 Census Population Data [US, States & Counties]       501 #> 6 1840_cAg  1840 Census Agriculture Data [US, States & Counties]      601 ds %>%    filter(     group == \"1900 Census\",      grepl(\"Agriculture\", description)   ) #> # A tibble: 2 × 4 #>   name       group       description                                    sequence #>   <chr>      <chr>       <chr>                                             <int> #> 1 1900_cAg   1900 Census Agriculture Data [US, States & Counties]           1401 #> 2 1900_cPHAM 1900 Census Population, Housing, Agriculture & Manufactur…     1403 tst <- get_metadata_nhgis(\"time_series_tables\") head(tst) #> # A tibble: 6 × 7 #>   name  description         geographic_integration sequence time_series years    #>   <chr> <chr>               <chr>                     <dbl> <list>      <list>   #> 1 A00   Total Population    Nominal                    100. <tibble>    <tibble> #> 2 AV0   Total Population    Nominal                    100. <tibble>    <tibble> #> 3 B78   Total Population    Nominal                    100. <tibble>    <tibble> #> 4 CL8   Total Population    Standardized to 2010       100. <tibble>    <tibble> #> 5 A57   Persons by Urban/R… Nominal                    101. <tibble>    <tibble> #> 6 A59   Persons by Urban/R… Nominal                    101. <tibble>    <tibble> #> # ℹ 1 more variable: geog_levels <list> tst$years[[1]] #> # A tibble: 24 × 3 #>    name  description sequence #>    <chr> <chr>          <int> #>  1 1790  1790               1 #>  2 1800  1800               2 #>  3 1810  1810               3 #>  4 1820  1820               4 #>  5 1830  1830               5 #>  6 1840  1840               6 #>  7 1850  1850               7 #>  8 1860  1860               8 #>  9 1870  1870              12 #> 10 1880  1880              22 #> # ℹ 14 more rows  tst$geog_levels[[1]] #> # A tibble: 2 × 3 #>   name   description   sequence #>   <chr>  <chr>            <int> #> 1 state  State                4 #> 2 county State--County       25 # Iterate over each `years` entry, identifying whether that entry # contains \"1840\" in its `name` column. tst %>%   filter(map_lgl(years, ~ \"1840\" %in% .x$name)) #> # A tibble: 2 × 7 #>   name  description        geographic_integration sequence time_series years    #>   <chr> <chr>              <chr>                     <dbl> <list>      <list>   #> 1 A00   Total Population   Nominal                    100. <tibble>    <tibble> #> 2 A08   Persons by Sex [2] Nominal                    102. <tibble>    <tibble> #> # ℹ 1 more variable: geog_levels <list>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"detailed-metadata","dir":"Articles","previous_headings":"IPUMS NHGIS metadata","what":"Detailed metadata","title":"NHGIS API Requests","text":"identified data source interest, can find detailed options providing name corresponding argument get_metadata_nhgis(): provides comprehensive list possible specifications input data source. instance, 1900_cAg dataset, 66 tables choose , 3 possible geographic levels: can also get detailed metadata individual data table. Since data tables belong specific datasets, need specified identify data table: Note name element one contains codes used interacting IPUMS API. nhgis_code element refers prefix attached individual variables output data, API throw error use extract definition. details interpreting provided metadata elements, see documentation get_metadata_nhgis(). Now identified options, can go ahead define extract request submit IPUMS API.","code":"cAg_meta <- get_metadata_nhgis(dataset = \"1900_cAg\") cAg_meta$data_tables #> # A tibble: 66 × 4 #>    name  nhgis_code description                           sequence #>    <chr> <chr>      <chr>                                    <int> #>  1 NT1   AWS        Total Population                             1 #>  2 NT2   AW3        Number of Farms                              2 #>  3 NT3   AXE        Average Farm Size                            3 #>  4 NT4   AXP        Farm Acreage                                 4 #>  5 NT5   AXZ        Farm Management                              5 #>  6 NT6   AYA        Race of Farmer                               6 #>  7 NT7   AYJ        Race of Farmer by Detailed Management        7 #>  8 NT8   AYK        Number of Farms                              8 #>  9 NT9   AYL        Farms with Buildings                         9 #> 10 NT10  AWT        Acres of Farmland                           10 #> # ℹ 56 more rows  cAg_meta$geog_levels #> # A tibble: 3 × 4 #>   name   description   has_geog_extent_selection sequence #>   <chr>  <chr>         <lgl>                        <int> #> 1 nation Nation        FALSE                            1 #> 2 state  State         FALSE                            4 #> 3 county State--County FALSE                           25 get_metadata_nhgis(dataset = \"1900_cAg\", data_table = \"NT2\") #> $name #> [1] \"NT2\" #>  #> $description #> [1] \"Number of Farms\" #>  #> $universe #> [1] \"Farms\" #>  #> $nhgis_code #> [1] \"AW3\" #>  #> $sequence #> [1] 2 #>  #> $dataset_name #> [1] \"1900_cAg\" #>  #> $variables #> # A tibble: 1 × 2 #>   description nhgis_code #>   <chr>       <chr>      #> 1 Total       AW3001"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"defining-an-ipums-nhgis-extract-request","dir":"Articles","previous_headings":"","what":"Defining an IPUMS NHGIS extract request","title":"NHGIS API Requests","text":"create extract definition containing specifications specific set IPUMS NHGIS data, use define_extract_nhgis(). define extract request, can specify data included extract indicate desired format layout.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"basic-extract-definitions","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"Basic extract definitions","title":"NHGIS API Requests","text":"Let’s say ’re interested getting state-level data number farms average size 1900_cAg dataset identified . can see metadata, data contained tables NT2 NT3: request data, need make explicit dataset specification. datasets must associated selection data tables geographic levels. can use ds_spec() helper function specify selections parameters. ds_spec() bundles selections given dataset together single object (case, ds_spec object): dataset specification can provided extract definition: (Dataset specifications can also include selections years breakdown_values, available datasets.) Similarly, make request time series tables, use tst_spec() helper. makes tst_spec object containing time series table specification. Time series tables contain individual data tables, require geographic level selection, allow optional selection years: attempt define extract required specifications given dataset time series table throw error: Note still possible make invalid extract requests (instance, requesting dataset table doesn’t exist). kind issue caught upon submission API, upon creation extract definition. Shapefiles don’t additional specification options, therefore can requested simply providing names:","code":"cAg_meta$data_tables #> # A tibble: 66 × 4 #>    name  nhgis_code description                           sequence #>    <chr> <chr>      <chr>                                    <int> #>  1 NT1   AWS        Total Population                             1 #>  2 NT2   AW3        Number of Farms                              2 #>  3 NT3   AXE        Average Farm Size                            3 #>  4 NT4   AXP        Farm Acreage                                 4 #>  5 NT5   AXZ        Farm Management                              5 #>  6 NT6   AYA        Race of Farmer                               6 #>  7 NT7   AYJ        Race of Farmer by Detailed Management        7 #>  8 NT8   AYK        Number of Farms                              8 #>  9 NT9   AYL        Farms with Buildings                         9 #> 10 NT10  AWT        Acres of Farmland                           10 #> # ℹ 56 more rows dataset <- ds_spec(   \"1900_cAg\",    data_tables = c(\"NT1\", \"NT2\"),    geog_levels = \"state\" )  str(dataset) #> List of 3 #>  $ name       : chr \"1900_cAg\" #>  $ data_tables: chr [1:2] \"NT1\" \"NT2\" #>  $ geog_levels: chr \"state\" #>  - attr(*, \"class\")= chr [1:3] \"ds_spec\" \"ipums_spec\" \"list\" nhgis_ext <- define_extract_nhgis(   description = \"Example farm data in 1900\",   datasets = dataset )  nhgis_ext #> Unsubmitted IPUMS NHGIS extract  #> Description: Example farm data in 1900 #>  #> Dataset: 1900_cAg #>   Tables: NT1, NT2 #>   Geog Levels: state define_extract_nhgis(   description = \"Example time series table request\",   time_series_tables = tst_spec(     \"CW3\",      geog_levels = c(\"county\", \"tract\"),     years = c(\"1990\", \"2000\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example time series table request #>  #> Time Series Table: CW3 #>   Geog Levels: county, tract #>   Years: 1990, 2000 define_extract_nhgis(   description = \"Invalid extract\",   datasets = ds_spec(\"1900_STF1\", data_tables = \"NP1\") ) #> Error in `validate_ipums_extract()`: #> ! Invalid `ds_spec` specification: #> ✖ `geog_levels` must not contain missing values. define_extract_nhgis(   description = \"Example shapefiles request\",   shapefiles = c(\"us_county_2021_tl2021\", \"us_county_2020_tl2020\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example shapefiles request #>  #> Shapefiles: us_county_2021_tl2021, us_county_2020_tl2020"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"more-complicated-extract-definitions","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"More complicated extract definitions","title":"NHGIS API Requests","text":"’s possible request data multiple datasets (time series tables) single extract definition. , pass list ds_spec tst_spec objects define_extract_nhgis(): extracts multiple datasets time series tables, may easier generate specifications independently creating extract request object. can quickly create multiple ds_spec objects iterating across specifications want include. , use purrr , also use loop: workflow also makes easy quickly update specifications future. instance, add 2017 ACS 1-year data extract definition , ’d need add \"2017_ACS1\" ds_names variable. iteration automatically add selected tables geog levels new dataset. (workflow works particularly well ACS datasets, often data table names across datasets.)","code":"define_extract_nhgis(   description = \"Slightly more complicated extract request\",   datasets = list(     ds_spec(\"2018_ACS1\", \"B01001\", \"state\"),     ds_spec(\"2019_ACS1\", \"B01001\", \"state\")   ),   shapefiles = c(\"us_state_2018_tl2018\", \"us_state_2019_tl2019\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Slightly more complicated extract request #>  #> Dataset: 2018_ACS1 #>   Tables: B01001 #>   Geog Levels: state #>  #> Dataset: 2019_ACS1 #>   Tables: B01001 #>   Geog Levels: state #>  #> Shapefiles: us_state_2018_tl2018, us_state_2019_tl2019 ds_names <- c(\"2019_ACS1\", \"2018_ACS1\") tables <- c(\"B01001\", \"B01002\") geogs <- c(\"county\", \"state\")  # For each dataset to include, create a specification with the # data tabels and geog levels indicated above datasets <- purrr::map(   ds_names,   ~ ds_spec(     name = .x,      data_tables = tables,      geog_levels = geogs   ) )  nhgis_ext <- define_extract_nhgis(   description = \"Slightly more complicated extract request\",   datasets = datasets )  nhgis_ext #> Unsubmitted IPUMS NHGIS extract  #> Description: Slightly more complicated extract request #>  #> Dataset: 2019_ACS1 #>   Tables: B01001, B01002 #>   Geog Levels: county, state #>  #> Dataset: 2018_ACS1 #>   Tables: B01001, B01002 #>   Geog Levels: county, state"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"data-layout-and-file-format","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"Data layout and file format","title":"NHGIS API Requests","text":"IPUMS NHGIS extract definitions also support additional options modify layout format extract’s resulting data files. extracts contain time series tables, tst_layout argument indicates longitudinal data organized. extracts contain datasets multiple breakdowns data types, use breakdown_and_data_type_layout argument specify layout . common data sources contain estimates margins error, like ACS. File formats can specified data_format argument. IPUMS NHGIS currently distributes files csv fixed-width format. See documentation define_extract_nhgis() details options.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"NHGIS API Requests","text":"defined extract request, can submit extract processing: workflow submitting monitoring extract request downloading files complete described IPUMS API introduction.","code":"nhgis_ext_submitted <- submit_extract(nhgis_ext)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"api-availability","dir":"Articles","previous_headings":"","what":"API availability","title":"Introduction to the IPUMS API for R Users","text":"IPUMS extract support currently available via API following collections: IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS Note support includes data available via collection’s extract engine. Many collections provide additional data via direct download, products supported IPUMS API. IPUMS metadata support currently available via API following collections: IPUMS NHGIS API support continue added collections future. can check general API availability IPUMS collections ipums_data_collections(). Note tools ipumsr may necessarily support functionality currently supported IPUMS API. See API documentation information latest features.","code":"ipums_data_collections() #> # A tibble: 14 × 4 #>    collection_name     collection_type code_for_api api_support #>    <chr>               <chr>           <chr>        <lgl>       #>  1 IPUMS USA           microdata       usa          TRUE        #>  2 IPUMS CPS           microdata       cps          TRUE        #>  3 IPUMS International microdata       ipumsi       TRUE        #>  4 IPUMS NHGIS         aggregate data  nhgis        TRUE        #>  5 IPUMS IHGIS         aggregate data  ihgis        FALSE       #>  6 IPUMS ATUS          microdata       atus         FALSE       #>  7 IPUMS AHTUS         microdata       ahtus        FALSE       #>  8 IPUMS MTUS          microdata       mtus         FALSE       #>  9 IPUMS DHS           microdata       dhs          FALSE       #> 10 IPUMS PMA           microdata       pma          FALSE       #> 11 IPUMS MICS          microdata       mics         FALSE       #> 12 IPUMS NHIS          microdata       nhis         FALSE       #> 13 IPUMS MEPS          microdata       meps         FALSE       #> 14 IPUMS Higher Ed     microdata       highered     FALSE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"set-key","dir":"Articles","previous_headings":"","what":"Set up your API key","title":"Introduction to the IPUMS API for R Users","text":"interact IPUMS API, ’ll need register access IPUMS project ’ll using. yet registered, can find links register API-supported IPUMS collections : IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS ’re registered, ’ll able create API key. default, ipumsr API functions assume key stored IPUMS_API_KEY environment variable. can also provide key directly functions, storing environment variable saves typing helps prevent inadvertently sharing key others (instance, GitHub). can save API key IPUMS_API_KEY environment variable set_ipums_api_key(). save key use future sessions, set save = TRUE. add API key .Renviron file user home directory. rest vignette assumes obtained API key stored IPUMS_API_KEY environment variable.","code":"# Save key in .Renviron for use across sessions set_ipums_api_key(\"paste-your-key-here\", save = TRUE)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"define","dir":"Articles","previous_headings":"","what":"Define an extract request","title":"Introduction to the IPUMS API for R Users","text":"IPUMS collection extract definition function used specify parameters new extract request scratch. functions take form define_extract_*(): define_extract_usa() define_extract_cps() define_extract_ipumsi() define_extract_nhgis() define extract request, can specify data included extract indicate desired format layout. instance, following defines simple IPUMS USA extract request AGE, SEX, RACE, STATEFIP, MARST variables 2018 2019 American Community Survey (ACS): exact extract definition options vary across collections, collections can used general workflow. details available extract definition options, see associated microdata NHGIS vignettes. purposes demonstrating overall workflow, continue work sample IPUMS USA extract definition created .","code":"usa_ext_def <- define_extract_usa(   description = \"USA extract for API vignette\",   samples = c(\"us2018a\", \"us2019a\"),   variables = c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\", \"MARST\") )  usa_ext_def #> Unsubmitted IPUMS USA extract  #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (5 total) AGE, SEX, RACE, STATEFIP, MARST"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"extract-request-objects","dir":"Articles","previous_headings":"Define an extract request","what":"Extract request objects","title":"Introduction to the IPUMS API for R Users","text":"define_extract_*() functions always produce ipums_extract object, can handled API functions (see ?ipums_extract). Furthermore, objects subclass particular collection associated. Many specifications given extract request object can accessed indexing object: ipums_extract objects also contain information extract request’s processing status assigned extract number, serves identifier extract request. Since extract request still unsubmitted, request number: obtain data requested extract definition, must first submit IPUMS API processing.","code":"class(usa_ext_def) #> [1] \"usa_extract\"   \"micro_extract\" \"ipums_extract\" \"list\" names(usa_ext_def$samples) #> [1] \"us2018a\" \"us2019a\"  names(usa_ext_def$variables) #> [1] \"AGE\"      \"SEX\"      \"RACE\"     \"STATEFIP\" \"MARST\"  usa_ext_def$data_format #> [1] \"fixed_width\" usa_ext_def$status #> [1] \"unsubmitted\"  usa_ext_def$number #> [1] NA"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"submit","dir":"Articles","previous_headings":"","what":"Submit an extract request","title":"Introduction to the IPUMS API for R Users","text":"submit extract definition, use submit_extract(). errors detected extract definition, submitted extract request returned assigned number status. Storing returned object can useful checking extract request’s status later. extract number stored returned object: Note fields submitted extract may automatically updated API upon submission. instance, microdata extracts, additional preselected variables may added extract even weren’t specified explicitly extract definition. forget store updated extract object returned submit_extract(), can use get_last_extract_info() helper request information recent extract request given collection:","code":"usa_ext_submitted <- submit_extract(usa_ext_def) #> Successfully submitted IPUMS USA extract number 348 usa_ext_submitted$number #> [1] 348  usa_ext_submitted$status #> [1] \"queued\" names(usa_ext_submitted$variables) #>  [1] \"YEAR\"     \"SAMPLE\"   \"SERIAL\"   \"CBSERIAL\" \"HHWT\"     \"CLUSTER\"  #>  [7] \"STATEFIP\" \"STRATA\"   \"GQ\"       \"PERNUM\"   \"PERWT\"    \"SEX\"      #> [13] \"AGE\"      \"MARST\"    \"RACE\" usa_ext_submitted <- get_last_extract_info(\"usa\")  usa_ext_submitted$number #> [1] 348"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"wait","dir":"Articles","previous_headings":"","what":"Wait for an extract request to complete","title":"Introduction to the IPUMS API for R Users","text":"may take time IPUMS servers process extract request. can ensure extract finished processing attempt download files using wait_for_extract(). polls API regularly processing completed (default, interval increases 10 seconds). returns ipums_extract object containing completed extract definition. Note wait_for_extract() tie R session extract ready download. fine strictly programmatic workflow, may frustrating working interactively, especially large extracts IPUMS servers busy. cases, can manually check whether extract ready download is_extract_ready(). long returns TRUE, able download extract’s files. detailed status check, provide extract’s collection number get_extract_info(). returns ipums_extract object reflecting requested extract definition current status. status submitted extract one \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\". Note extracts removed IPUMS servers set period time (72 hours microdata collections, 2 weeks IPUMS NHGIS). Therefore, extract \"completed\" status may still unavailable download. is_extract_ready() alert extract expired needs resubmitted. Simply use submit_extract() resubmit extract request. Note produce new extract (new extract number), even extract definition identical.","code":"usa_ext_complete <- wait_for_extract(usa_ext_submitted) #> Checking extract status... #> Waiting 10 seconds... #> Checking extract status... #> IPUMS USA extract 348 is ready to download.  usa_ext_complete$status #> [1] \"completed\"  # `download_links` should be populated if the extract is ready for download names(usa_ext_complete$download_links) #> [1] \"r_command_file\"     \"basic_codebook\"     \"data\"               #> [4] \"stata_command_file\" \"sas_command_file\"   \"spss_command_file\"  #> [7] \"ddi_codebook\" is_extract_ready(usa_ext_submitted) #> [1] TRUE usa_ext_submitted <- get_extract_info(usa_ext_submitted)  usa_ext_submitted$status #> [1] \"completed\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"download","dir":"Articles","previous_headings":"","what":"Download an extract","title":"Introduction to the IPUMS API for R Users","text":"extract finished processing, use download_extract() download extract’s data files local machine. return path downloaded file(s) required load data R. microdata collections, path DDI codebook (.xml) file, can used read associated data (contained .dat.gz file). NHGIS, path .zip archive containing requested data files /shapefiles. files produced download_extract() can passed directly reader functions provided ipumsr. instance, microdata projects: instead ’re working NHGIS extract, use read_nhgis() read_ipums_sf(). See associated vignette information loading IPUMS data R.","code":"# By default, downloads to your current working directory filepath <- download_extract(usa_ext_submitted) ddi <- read_ipums_ddi(filepath) micro_data <- read_ipums_micro(ddi)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"recent","dir":"Articles","previous_headings":"","what":"Get info on past extracts","title":"Introduction to the IPUMS API for R Users","text":"retrieve definition corresponding particular extract, provide collection number get_extract_info(). can provided either single string form \"collection:number\" length-2 vector: c(collection, number). Several API functions support syntax well. know made specific extract definition past, can’t remember exact number, can use get_extract_history() peruse recent extract requests particular collection. default, returns 10 recent extract requests list ipums_extract objects. can adjust many requests retrieve how_many argument: list ipums_extract objects, can operate API functions introduced already. can also iterate extract history find extracts particular characteristics. instance, can use purrr::keep() find extracts contain certain variable ready download: can use purrr::map() family browse certain values: regularly use single IPUMS collection, can save typing setting collection default. set_ipums_default_collection() save specified collection value IPUMS_DEFAULT_COLLECTION environment variable. default collection set, API functions use collection requests, assuming collection specified.","code":"usa_ext <- get_extract_info(\"usa:47\")  # Alternatively: usa_ext <- get_extract_info(c(\"usa\", 47))  usa_ext #> Submitted IPUMS USA extract number 47 #> Description: Test extract #>  #> Samples: (1 total) us2017b #> Variables: (8 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNUM, PERWT usa_extracts <- get_extract_history(\"usa\", how_many = 3)  usa_extracts #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,... #>  #> [[2]] #> Submitted IPUMS USA extract number 347 #> Description: Data from long ago #>  #> Samples: (1 total) us1880a #> Variables: (12 total) YEAR, SAMPLE, SERIAL, HHWT, CLUSTER, STRATA, G... #>  #> [[3]] #> Submitted IPUMS USA extract number 346 #> Description: Data from 2017 PRCS #>  #> Samples: (1 total) us2017b #> Variables: (9 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNU... is_extract_ready(usa_extracts[[2]]) #> [1] TRUE purrr::keep(usa_extracts, ~ \"MARST\" %in% names(.x$variables)) #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,...  purrr::keep(usa_extracts, is_extract_ready) #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,... #>  #> [[2]] #> Submitted IPUMS USA extract number 347 #> Description: Data from long ago #>  #> Samples: (1 total) us1880a #> Variables: (12 total) YEAR, SAMPLE, SERIAL, HHWT, CLUSTER, STRATA, G... #>  #> [[3]] #> Submitted IPUMS USA extract number 346 #> Description: Data from 2017 PRCS #>  #> Samples: (1 total) us2017b #> Variables: (9 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNU... purrr::map_chr(usa_extracts, ~ .x$description) #> [1] \"USA extract for API vignette\" \"Data from long ago\"           #> [3] \"Data from 2017 PRCS\" set_ipums_default_collection(\"usa\") # Set `save = TRUE` to store across sessions # Check the default collection: Sys.getenv(\"IPUMS_DEFAULT_COLLECTION\") #> [1] \"usa\"  # Most recent USA extract: usa_last <- get_last_extract_info()  # Request info on extract request \"usa:10\" usa_ext_10 <- get_extract_info(10)  # You can still request other collections as usual: cps_ext_10 <- get_extract_info(\"cps:10\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"share","dir":"Articles","previous_headings":"","what":"Share an extract definition","title":"Introduction to the IPUMS API for R Users","text":"One exciting feature enabled IPUMS API ability share standardized extract definition IPUMS users can create identical extract request . terms use IPUMS collections prohibit redistribution IPUMS data, don’t prohibit sharing data extract definitions. ipumsr facilitates type sharing save_extract_as_json() define_extract_from_json(), read write ipums_extract objects standardized JSON-formatted file. point, can send usa_extract_10.json another user allow create duplicate ipums_extract object, can load submit API (long API access). Note code previous chunk assumes file saved current working directory. ’s saved somewhere else, replace \"usa_extract_10.json\" full path file.","code":"usa_ext_10 <- get_extract_info(\"usa:10\") save_extract_as_json(usa_ext_10, file = \"usa_extract_10.json\") clone_of_usa_ext_10 <- define_extract_from_json(\"usa_extract_10.json\") usa_ext_10_resubmitted <- submit_extract(clone_of_usa_ext_10)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"revise-a-previous-extract-request","dir":"Articles","previous_headings":"","what":"Revise a previous extract request","title":"Introduction to the IPUMS API for R Users","text":"Occasionally, may want modify existing extract definition (e.g. update analysis new data). easiest way add new specifications define_extract_*() code produced original extract definition. highly recommend save code somewhere can accessed updated future. However, cases original extract definition code exist (e.g. extract created using online IPUMS extract system). case, best approach view extract definition get_extract_info() create new extract definition (using define_extract_*() function) reproduces definition along desired modifications. may bit tedious complex extract definitions, one-time investment make future updates extract definition much easier. Previously, encouraged users use helpers add_to_extract() remove_from_extract() modifying extracts. now encourage re-write extract definitions improve reproducibility: extract definition code always clear stable written explicitly, rather based old extract number. two functions may retired future.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"putting-it-all-together","dir":"Articles","previous_headings":"","what":"Putting it all together","title":"Introduction to the IPUMS API for R Users","text":"core API functions ipumsr compatible one another can combined single pipeline requests, downloads, reads extract data R data frame: Note NHGIS extracts contain data shapefiles, single file need selected reading, download_extract() return path file. instance, hypothetical nhgis_extract contains tabular spatial data: API workflow allow obtain IPUMS data without ever leaving R environment, also allows retain reproducible record process. makes much easier document workflow, collaborate researchers, update analysis future.","code":"usa_data <- define_extract_usa(   \"USA extract for API vignette\",   c(\"us2018a\", \"us2019a\"),   c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\") ) %>%   submit_extract() %>%   wait_for_extract() %>%   download_extract() %>%   read_ipums_micro() nhgis_data <- download_extract(nhgis_extract) %>%   purrr::pluck(\"data\") %>% # Select only the tabular data file to read   read_nhgis()"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Big IPUMS Data","text":"examples vignette rely helpful packages. haven’t already installed , can :","code":"# To run the full vignette, you'll also need the following packages. If they # aren't installed already, do so with: install.packages(\"biglm\") install.packages(\"DBI\") install.packages(\"RSQLite\") install.packages(\"dbplyr\") library(ipumsr) library(dplyr)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"option-1-trade-money-for-convenience","dir":"Articles","previous_headings":"","what":"Option 1: Trade money for convenience","title":"Big IPUMS Data","text":"need work dataset ’s big RAM, simplest option get space. upgrading hardware isn’t option, paying cloud service like Amazon Microsoft Azure may worth considering. guides using R Amazon Microsoft Azure. course, option isn’t feasible users—case, updates data used analysis processing pipeline may required.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"remove-unused-data","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Remove unused data","title":"Big IPUMS Data","text":"easiest way reduce size extract drop unused samples variables. can done extract interface specific IPUMS project ’re using within R using IPUMS API (projects supported). using API, simply updated extract definition code exclude specifications longer need. , resubmit extract request download new files. See introduction IPUMS API information making extract requests ipumsr.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"select-cases","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Select cases","title":"Big IPUMS Data","text":"microdata projects, another good option reducing extract size select cases relevant research question, producing extract containing data particular subset values given variable. ’re using IPUMS API, can use var_spec() specify case selections variable extract definition. instance, following produce extract including records married women: ’re using online interface, “Select Cases” option available last page submitting extract request.","code":"define_extract_usa(   description = \"2013 ACS Data for Married Women\",   samples = \"us2013a\",   variables = list(     var_spec(\"MARST\", case_selections = \"1\"),     var_spec(\"SEX\", case_selections = \"2\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data for Married Women #>  #> Samples: (1 total) us2013a #> Variables: (2 total) MARST, SEX"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"use-a-sampled-subset-of-the-data","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Use a sampled subset of the data","title":"Big IPUMS Data","text":"Yet another option (also microdata projects) take random subsample data producing extract. Sampled data available via IPUMS API, can use “Customize Sample Size” option online interface . also appears final page submitting extract request. ’ve already submitted extract, can click “REVISE” link “Download Revise Extracts” page access features produce new data extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"option-3-process-the-data-in-pieces","dir":"Articles","previous_headings":"","what":"Option 3: Process the data in pieces","title":"Big IPUMS Data","text":"ipumsr provides two related options reading data sources increments: Chunked functions allow specify function called chunk data read well like chunks combined end. functions use readr framework reading chunked data. Yielded functions allow flexibility returning control user loading piece data. functions unique ipumsr fixed-width data.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"reading-chunked-data","dir":"Articles","previous_headings":"Option 3: Process the data in pieces","what":"Reading chunked data","title":"Big IPUMS Data","text":"Use read_ipums_micro_chunked() read_ipums_micro_list_chunked() read data chunks. analogous standard read_ipums_micro() read_ipums_micro_list() functions, allow specify function applied data chunk control results chunks combined. , ’ll use chunking outline solutions three common use-cases IPUMS data: tabulation, regression case selection. First, ’ll load example data. Note -sampled data example storage reasons; none output “results” reflected vignette considered legitimate!","code":"cps_ddi_file <- ipums_example(\"cps_00097.xml\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"chunked-tab","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading chunked data","what":"Chunked tabulation","title":"Big IPUMS Data","text":"Imagine wanted find percent people workforce grouped self-reported health. Since example extract small enough fit memory, load full dataset read_ipums_micro(), relabel EMPSTAT variable binary variable (see vignette(\"value-labels\")), count people group. sake example, let’s imagine can store 1,000 rows memory time. case, need use chunked function, tabulate chunk, calculate counts across chunks. chunked functions apply user-defined callback function chunk. callback takes two arguments: x, represents data contained given chunk, pos, represents position chunk, expressed line input file chunk starts. Generally need use x, callback must always take arguments. case, callback implement processing steps demonstrated : Next, need create callback object, determine want combine ultimate results chunk. ipumsr provides three main types callback objects preserve variable metadata: IpumsDataFrameCallback combines results chunk together row binding together IpumsListCallback returns list one item per chunk containing results chunk. Use don’t want (can’t) immediately combine results. IpumsSideEffectCallback return results. Use callback function intended side effects (instance, saving results chunk disk). (ipumsr also provides fourth callback used running linear regression models discussed ). case, want row-bind data frames returned cb_function(), use IpumsDataFrameCallback. Callback objects R6 objects, don’t need familiar R6 use them2. initialize callback object, simply use $new(): point, ’re ready load data chunks. use read_ipums_micro_chunked() specify callback chunk size: Now data frame counts health work status within chunk. get full table, just need sum health work status one time:","code":"read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%   mutate(     HEALTH = as_factor(HEALTH),     AT_WORK = as_factor(       lbl_relabel(         EMPSTAT,         lbl(1, \"Yes\") ~ .lbl == \"At work\",         lbl(0, \"No\") ~ .lbl != \"At work\"       )     )   ) %>%   group_by(HEALTH, AT_WORK) %>%   summarize(n = n(), .groups = \"drop\") #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65 cb_function <- function(x, pos) {   x %>%     mutate(       HEALTH = as_factor(HEALTH),       AT_WORK = as_factor(         lbl_relabel(           EMPSTAT,           lbl(1, \"Yes\") ~ .lbl == \"At work\",           lbl(0, \"No\") ~ .lbl != \"At work\"         )       )     ) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = n(), .groups = \"drop\") } cb <- IpumsDataFrameCallback$new(cb_function) chunked_tabulations <- read_ipums_micro_chunked(   cps_ddi_file,   callback = cb,   chunk_size = 1000,   verbose = FALSE )  chunked_tabulations #> # A tibble: 209 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No        183 #>  2 Excellent Yes       147 #>  3 Very good No        134 #>  4 Very good Yes       217 #>  5 Good      No        111 #>  6 Good      Yes       105 #>  7 Fair      No         53 #>  8 Fair      Yes        22 #>  9 Poor      No         27 #> 10 Poor      Yes         1 #> # ℹ 199 more rows chunked_tabulations %>%   group_by(HEALTH, AT_WORK) %>%   summarize(n = sum(n), .groups = \"drop\") #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"chunked-reg","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading chunked data","what":"Chunked regression","title":"Big IPUMS Data","text":"biglm package, possible use R perform regression data large store memory . ipumsr package provides another callback designed make simple: IpumsBiglmCallback. example, ’ll conduct regression total hours worked (AHRSWORKT) outcome age (AGE) self-reported health (HEALTH) predictors. (Note intended code demonstration, ignore many complexities addressed real analyses.) running analysis full dataset, ’d first load data prepare variables analysis use model: , ’d provide model formula data lm: regression, 1,000 rows loaded time, work similar manner. First make IpumsBiglmCallback callback object. provide model formula well code used process data running regression: read data using read_ipums_micro_chunked(), passing callback just made.","code":"data <- read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%   mutate(     HEALTH = as_factor(HEALTH),     AHRSWORKT = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\"),     AT_WORK = as_factor(       lbl_relabel(         EMPSTAT,         lbl(1, \"Yes\") ~ .lbl == \"At work\",         lbl(0, \"No\") ~ .lbl != \"At work\"       )     )   ) %>%   filter(AT_WORK == \"Yes\") model <- lm(AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data = data) summary(model) #>  #> Call: #> lm(formula = AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data = data) #>  #> Residuals: #>     Min      1Q  Median      3Q     Max  #> -41.217  -4.734  -0.077   5.957  63.994  #>  #> Coefficients: #>                   Estimate Std. Error t value Pr(>|t|)     #> (Intercept)      5.2440289  1.1823985   4.435 9.31e-06 *** #> AGE              1.5868169  0.0573268  27.680  < 2e-16 *** #> I(AGE^2)        -0.0170043  0.0006568 -25.888  < 2e-16 *** #> HEALTHVery good -0.2550306  0.3276759  -0.778 0.436412     #> HEALTHGood      -0.9637395  0.3704123  -2.602 0.009289 **  #> HEALTHFair      -3.8899430  0.6629725  -5.867 4.58e-09 *** #> HEALTHPoor      -5.7597200  1.6197136  -3.556 0.000378 *** #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #>  #> Residual standard error: 12.88 on 8950 degrees of freedom #> Multiple R-squared:  0.08711,    Adjusted R-squared:  0.0865  #> F-statistic: 142.3 on 6 and 8950 DF,  p-value: < 2.2e-16 library(biglm) #> Loading required package: DBI  biglm_cb <- IpumsBiglmCallback$new(   model = AHRSWORKT ~ AGE + I(AGE^2) + HEALTH,   prep = function(x, pos) {     x %>%       mutate(         HEALTH = as_factor(HEALTH),         AHRSWORKT = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\"),         AT_WORK = as_factor(           lbl_relabel(             EMPSTAT,             lbl(1, \"Yes\") ~ .lbl == \"At work\",             lbl(0, \"No\") ~ .lbl != \"At work\"           )         )       ) %>%       filter(AT_WORK == \"Yes\")   } ) chunked_model <- read_ipums_micro_chunked(   cps_ddi_file,   callback = biglm_cb,   chunk_size = 1000,   verbose = FALSE )  summary(chunked_model) #> Large data regression model: biglm(AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data, ...) #> Sample size =  8957  #>                    Coef    (95%     CI)     SE      p #> (Intercept)      5.2440  2.8792  7.6088 1.1824 0.0000 #> AGE              1.5868  1.4722  1.7015 0.0573 0.0000 #> I(AGE^2)        -0.0170 -0.0183 -0.0157 0.0007 0.0000 #> HEALTHVery good -0.2550 -0.9104  0.4003 0.3277 0.4364 #> HEALTHGood      -0.9637 -1.7046 -0.2229 0.3704 0.0093 #> HEALTHFair      -3.8899 -5.2159 -2.5640 0.6630 0.0000 #> HEALTHPoor      -5.7597 -8.9991 -2.5203 1.6197 0.0004"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"reading-yielded-data","dir":"Articles","previous_headings":"Option 3: Process the data in pieces","what":"Reading yielded data","title":"Big IPUMS Data","text":"addition chunked reading, ipumsr also provides similar flexible “yielded” reading. read_ipums_micro_yield() read_ipums_micro_list_yield() grant freedom determining R code run chunks include ability multiple files open . Additionally, yields compatible bigglm function biglm, allows run glm models data larger memory. downside greater control yields API unique IPUMS data way work unusual R code.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"yielded-tabulation","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading yielded data","what":"Yielded tabulation","title":"Big IPUMS Data","text":"’ll compare yield chunked functions conducting tabulation example using yields. First, create yield object function read_ipums_micro_yield(): function returns R6 object contains methods reading data. important method yield() method return n rows data: Note row position data stored object, running code produce different rows data: Use cur_pos get current position data file: is_done() method tells us whether read entire file yet: preparation actual example, ’ll use reset() reset beginning data: Using yield() is_done(), can set processing pipeline. First, create empty placeholder tibble store results: , iterate data, yielding 1,000 rows time processing results chunked example. iteration end ’ve finished reading entire file.","code":"data <- read_ipums_micro_yield(cps_ddi_file, verbose = FALSE) # Return the first 10 rows of data data$yield(10) #> # A tibble: 10 × 14 #>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl> #>  1  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        1 2.01e13   308. #>  2  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        2 2.01e13   217. #>  3  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        3 2.01e13   249. #>  4  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        1 2.01e13   266. #>  5  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        2 2.01e13   266. #>  6  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        3 2.01e13   265. #>  7  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        4 2.01e13   296. #>  8  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        1 2.01e13   241. #>  9  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        2 2.01e13   241. #> 10  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        3 2.01e13   278. #> # ℹ 4 more variables: AGE <int+lbl>, EMPSTAT <int+lbl>, AHRSWORKT <dbl+lbl>, #> #   HEALTH <int+lbl> # Return the next 10 rows of data data$yield(10) #> # A tibble: 10 × 14 #>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl> #>  1  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        1 0         373. #>  2  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        2 0         373. #>  3  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        3 0         326. #>  4  2011     86 3 [Marc… 2.01e13 1 [ASEC]    554. 1 [No]        1 2.01e13   554. #>  5  2011    104 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        1 2.01e13   543. #>  6  2011    104 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        2 2.01e13   543. #>  7  2011    106 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        1 2.01e13   543. #>  8  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        1 2.01e13   271. #>  9  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        2 2.01e13   271. #> 10  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        3 2.01e13   365. #> # ℹ 4 more variables: AGE <int+lbl>, EMPSTAT <int+lbl>, AHRSWORKT <dbl+lbl>, #> #   HEALTH <int+lbl> data$cur_pos #> [1] 21 data$is_done() #> [1] FALSE data$reset() yield_results <- tibble(   HEALTH = factor(levels = c(\"Excellent\", \"Very good\", \"Good\", \"Fair\", \"Poor\")),   AT_WORK = factor(levels = c(\"No\", \"Yes\")),   n = integer(0) ) while (!data$is_done()) {   # Yield new data and process   new <- data$yield(n = 1000) %>%     mutate(       HEALTH = as_factor(HEALTH),       AT_WORK = as_factor(         lbl_relabel(           EMPSTAT,           lbl(1, \"Yes\") ~ .lbl == \"At work\",           lbl(0, \"No\") ~ .lbl != \"At work\"         )       )     ) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = n(), .groups = \"drop\")    # Combine the new yield with the previously processed yields   yield_results <- bind_rows(yield_results, new) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = sum(n), .groups = \"drop\") }  yield_results #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"yielded-glm-regression","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading yielded data","what":"Yielded GLM regression","title":"Big IPUMS Data","text":"One major benefits yielded reading chunked reading compatible GLM functions biglm, allowing use complicated models. run logistic regression, first need reset yield object previous example: Next make function takes single argument: reset. reset TRUE, resets data beginning. dictated bigglm biglm. create function, use reset() method yield object: Finally feed function model specification bigglm() function:","code":"data$reset() get_model_data <- function(reset) {   if (reset) {     data$reset()   } else {     yield <- data$yield(n = 1000)      if (is.null(yield)) {       return(yield)     }      yield %>%       mutate(         HEALTH = as_factor(HEALTH),         WORK30PLUS = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\") >= 30,         AT_WORK = as_factor(           lbl_relabel(             EMPSTAT,             lbl(1, \"Yes\") ~ .lbl == \"At work\",             lbl(0, \"No\") ~ .lbl != \"At work\"           )         )       ) %>%       filter(AT_WORK == \"Yes\")   } } results <- bigglm(   WORK30PLUS ~ AGE + I(AGE^2) + HEALTH,   family = binomial(link = \"logit\"),   data = get_model_data )  summary(results) #> Large data regression model: bigglm(WORK30PLUS ~ AGE + I(AGE^2) + HEALTH, family = binomial(link = \"logit\"),  #>     data = get_model_data) #> Sample size =  8957  #>                    Coef    (95%     CI)     SE      p #> (Intercept)     -4.0021 -4.4297 -3.5744 0.2138 0.0000 #> AGE              0.2714  0.2498  0.2930 0.0108 0.0000 #> I(AGE^2)        -0.0029 -0.0032 -0.0027 0.0001 0.0000 #> HEALTHVery good  0.0038 -0.1346  0.1423 0.0692 0.9557 #> HEALTHGood      -0.1129 -0.2685  0.0426 0.0778 0.1465 #> HEALTHFair      -0.6637 -0.9160 -0.4115 0.1261 0.0000 #> HEALTHPoor      -0.7879 -1.3697 -0.2062 0.2909 0.0068"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"database","dir":"Articles","previous_headings":"","what":"Option 4: Use a database","title":"Big IPUMS Data","text":"Storing data database another way work data fit memory data frame. access database remote machine, can easily select use parts data analysis. Even databases machine may provide efficient data storage use hard drive, enabling data loaded R. many different kinds databases, benefits drawbacks, database choose use specific use case. However, ’ve chosen database, two general steps: Importing data database Connecting database R R several tools support database integration, including DBI, dbplyr, sparklyr, sparkR, bigrquery, others. example, ’ll use RSQLite load data -memory database. (use RSQLite easy set , likely efficient enough fully resolve issues large IPUMS data, may wise consider alternative practice.)","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"importing-data-into-the-database","dir":"Articles","previous_headings":"Option 4: Use a database","what":"Importing data into the database","title":"Big IPUMS Data","text":"rectangular extracts, likely simplest load data database CSV format, widely supported. working hierarchical extract (database software doesn’t support CSV format), can use ipumsr chunked function load data database without needing store entire dataset R. (rectangular vs. hierarchical extracts, see “Hierarchical extracts” section vignette(\"ipums-read\").)","code":"library(DBI) library(RSQLite)  # Connect to database con <- dbConnect(SQLite(), path = \":memory:\")  # Load file metadata ddi <- read_ipums_ddi(cps_ddi_file)  # Write data to database in chunks read_ipums_micro_chunked(   ddi,   readr::SideEffectChunkCallback$new(     function(x, pos) {       if (pos == 1) {         dbWriteTable(con, \"cps\", x)       } else {         dbWriteTable(con, \"cps\", x, row.names = FALSE, append = TRUE)       }     }   ),   chunk_size = 1000,   verbose = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"connecting-to-a-database-with-dbplyr","dir":"Articles","previous_headings":"Option 4: Use a database","what":"Connecting to a database with dbplyr","title":"Big IPUMS Data","text":"variety ways access data stored database. example, use dbplyr. details dbplyr, see vignette(\"dbplyr\", package = \"dbplyr\"). run simple query AGE, can use syntax use dplyr: dbplyr shows us nice preview first rows result query, data still exist database. can use dplyr::collect() load full results query current R session. However, omit variable metadata attached IPUMS data, since database doesn’t store metadata: Instead, use ipums_collect(), uses provided ipums_ddi object reattach metadata loading R environment: variable metadata IPUMS data, see vignette(\"value-labels\").","code":"example <- tbl(con, \"cps\")  example %>%   filter(\"AGE\" > 25) #> # Source:   SQL [?? x 14] #> # Database: sqlite 3.43.2 [] #>     YEAR SERIAL MONTH   CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int>   <dbl>    <int>   <dbl>    <int>  <dbl>   <dbl>  <dbl> #>  1  2011     33     3 2.01e13        1    308.        1      1 2.01e13   308. #>  2  2011     33     3 2.01e13        1    308.        1      2 2.01e13   217. #>  3  2011     33     3 2.01e13        1    308.        1      3 2.01e13   249. #>  4  2011     46     3 2.01e13        1    266.        1      1 2.01e13   266. #>  5  2011     46     3 2.01e13        1    266.        1      2 2.01e13   266. #>  6  2011     46     3 2.01e13        1    266.        1      3 2.01e13   265. #>  7  2011     46     3 2.01e13        1    266.        1      4 2.01e13   296. #>  8  2011     64     3 2.01e13        1    241.        1      1 2.01e13   241. #>  9  2011     64     3 2.01e13        1    241.        1      2 2.01e13   241. #> 10  2011     64     3 2.01e13        1    241.        1      3 2.01e13   278. #> # ℹ more rows #> # ℹ 4 more variables: AGE <int>, EMPSTAT <int>, AHRSWORKT <dbl>, HEALTH <int> data <- example %>%   filter(\"AGE\" > 25) %>%   collect()  # Variable metadata is missing ipums_val_labels(data$MONTH) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> data <- example %>%   filter(\"AGE\" > 25) %>%   ipums_collect(ddi)  ipums_val_labels(data$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"learning-more","dir":"Articles","previous_headings":"","what":"Learning more","title":"Big IPUMS Data","text":"Big data isn’t just problem IPUMS users, many R resources available. See documentation packages mentioned databases section information options. past blog posts articles topic, see following: Big Data R - Part Stephen Mooney’s EPIC: Epidemiologic Analysis Using R, June 2015 class Statistical Analysis Open-Source R RStudio Amazon EMR - Markus Schmidberger AWS Big Data Blog Hosting RStudio Server Azure - Colin Gillespie’s blog post using Rstudio Azure Improving DBI: Retrospect - Kirill Müller’s report R Consortium grant improve database support R","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"ipums-extract-structure","dir":"Articles","previous_headings":"","what":"IPUMS extract structure","title":"Reading IPUMS Data","text":"IPUMS extracts organized slightly differently different IPUMS projects. general, projects provide multiple files data extract. files relevant ipumsr : metadata file containing information variables included extract data One data files, depending project specifications extract files necessary properly load data R. Obviously, data files contain actual data values loaded. often fixed-width format, metadata files required correctly parse data load. Even .csv files, metadata file allows addition contextual variable information loaded data. makes much easier interpret values data variables effectively use data processing pipeline. See vignette value labels information working labels.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-microdata-extracts","dir":"Articles","previous_headings":"","what":"Reading microdata extracts","title":"Reading IPUMS Data","text":"Microdata extracts typically provide metadata DDI (.xml) file separate compressed data (.dat.gz) files. Provide path DDI file read_ipums_micro() directly load associated data file R. Note provide path DDI file, data file. ipumsr needs find DDI data files read data, DDI file includes name data file, whereas data file contains raw data. loaded data parsed correctly include variable metadata column. summary column contents, use ipums_var_info(): information also attached specific columns. can obtain attributes() using ipumsr helpers: straightforward way load microdata, ’s often advantageous independently load DDI file ipums_ddi object containing metadata: many common data processing functions side-effect removing attributes: case, can always use separate DDI metadata reference: even reattach metadata, assuming variable names still match DDI:","code":"library(ipumsr) library(dplyr)  # Example data cps_ddi_file <- ipums_example(\"cps_00157.xml\")  cps_data <- read_ipums_micro(cps_ddi_file)  head(cps_data) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375 ipums_var_info(cps_data) #> # A tibble: 8 × 4 #>   var_name var_label                                         var_desc val_labels #>   <chr>    <chr>                                             <chr>    <list>     #> 1 YEAR     Survey year                                       \"YEAR r… <tibble>   #> 2 SERIAL   Household serial number                           \"SERIAL… <tibble>   #> 3 MONTH    Month                                             \"MONTH … <tibble>   #> 4 ASECWTH  Annual Social and Economic Supplement Household … \"ASECWT… <tibble>   #> 5 STATEFIP State (FIPS code)                                 \"STATEF… <tibble>   #> 6 PERNUM   Person number in sample unit                      \"PERNUM… <tibble>   #> 7 ASECWT   Annual Social and Economic Supplement Weight      \"ASECWT… <tibble>   #> 8 INCTOT   Total personal income                             \"INCTOT… <tibble> attributes(cps_data$MONTH) #> $labels #>   January  February     March     April       May      June      July    August  #>         1         2         3         4         5         6         7         8  #> September   October  November  December  #>         9        10        11        12  #>  #> $class #> [1] \"haven_labelled\" \"vctrs_vctr\"     \"integer\"        #>  #> $label #> [1] \"Month\" #>  #> $var_desc #> [1] \"MONTH indicates the calendar month of the CPS interview.\"  ipums_val_labels(cps_data$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December cps_ddi <- read_ipums_ddi(cps_ddi_file)  cps_ddi #> An IPUMS DDI for IPUMS CPS with 8 variables #> Extract 'cps_00157.dat' created on 2023-07-10 #> User notes:  User-provided description: Reproducing cps00006 # This doesn't actually change the data... cps_data2 <- cps_data %>%   mutate(MONTH = ifelse(TRUE, MONTH, MONTH))  # but removes attributes! ipums_val_labels(cps_data2$MONTH) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> ipums_val_labels(cps_ddi, var = MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <dbl> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December cps_data2 <- set_ipums_var_attributes(cps_data2, cps_ddi)  ipums_val_labels(cps_data2$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"hierarchical-extracts","dir":"Articles","previous_headings":"Reading microdata extracts","what":"Hierarchical extracts","title":"Reading IPUMS Data","text":"IPUMS microdata can come either “rectangular” “hierarchical” format. Rectangular data transformed every row data represents type record. instance, row represent person record, household-level information person included row. (case CPS example .) Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can loaded list format long format. read_ipums_micro() read long format: long format consists single data.frame includes rows varying record types. example, rows record type “Household” others record type “Person”. Variables apply particular record type filled NA rows record type. read data list format, use read_ipums_micro_list(). function returns list element contains records given record type: read_ipums_micro() read_ipums_micro_list() also support partial loading selecting subset columns limited number rows. See documentation details options.","code":"cps_hier_ddi <- read_ipums_ddi(ipums_example(\"cps_00159.xml\"))  read_ipums_micro(cps_hier_ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> # A tibble: 11,053 × 9 #>    RECTYPE     YEAR SERIAL MONTH    ASECWTH STATEFIP PERNUM ASECWT INCTOT        #>    <chr+lbl>  <dbl>  <dbl> <int+lb>   <dbl> <int+lb>  <dbl>  <dbl> <dbl+lbl>     #>  1 H [Househ…  1962     80  3 [Mar…   1476. 55 [Wis…     NA    NA  NA            #>  2 P [Person…  1962     80 NA           NA  NA            1  1476.  4.88e3       #>  3 P [Person…  1962     80 NA           NA  NA            2  1471.  5.8 e3       #>  4 P [Person…  1962     80 NA           NA  NA            3  1579.  1.00e9 [Mis… #>  5 H [Househ…  1962     82  3 [Mar…   1598. 27 [Min…     NA    NA  NA            #>  6 P [Person…  1962     82 NA           NA  NA            1  1598.  1.40e4       #>  7 H [Househ…  1962     83  3 [Mar…   1707. 27 [Min…     NA    NA  NA            #>  8 P [Person…  1962     83 NA           NA  NA            1  1707.  1.66e4       #>  9 H [Househ…  1962     84  3 [Mar…   1790. 27 [Min…     NA    NA  NA            #> 10 P [Person…  1962     84 NA           NA  NA            1  1790.  6.38e3       #> # ℹ 11,043 more rows read_ipums_micro_list(cps_hier_ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> $HOUSEHOLD #> # A tibble: 3,385 × 6 #>    RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>    <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #>  1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #>  2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #>  3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #>  4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #>  5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #>  6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]      #>  7 H [Household Record]  1962    122 3 [March]   3603. 27 [Minnesota] #>  8 H [Household Record]  1962    124 3 [March]   4104. 55 [Wisconsin] #>  9 H [Household Record]  1962    125 3 [March]   2182. 55 [Wisconsin] #> 10 H [Household Record]  1962    126 3 [March]   1826. 55 [Wisconsin] #> # ℹ 3,375 more rows #>  #> $PERSON #> # A tibble: 7,668 × 6 #>    RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                           #>    <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                        #>  1 P [Person Record]  1962     80      1  1476.      4883                        #>  2 P [Person Record]  1962     80      2  1471.      5800                        #>  3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 … #>  4 P [Person Record]  1962     82      1  1598.     14015                        #>  5 P [Person Record]  1962     83      1  1707.     16552                        #>  6 P [Person Record]  1962     84      1  1790.      6375                        #>  7 P [Person Record]  1962    107      1  4355. 999999999 [N.I.U.]               #>  8 P [Person Record]  1962    107      2  1386.         0                        #>  9 P [Person Record]  1962    107      3  1629.       600                        #> 10 P [Person Record]  1962    107      4  1432. 999999999 [N.I.U.]               #> # ℹ 7,658 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-ipums-nhgis-extracts","dir":"Articles","previous_headings":"","what":"Reading IPUMS NHGIS extracts","title":"Reading IPUMS Data","text":"Unlike microdata projects, NHGIS extracts provide data metadata files bundled single .zip archive. read_nhgis() anticipates structure can read data files directly file without need manually extract files: Like microdata extracts, data include variable-level metadata, available: Variable metadata NHGIS data slightly different provided microdata products. First, come .txt codebook file rather .xml DDI file. Codebooks can still loaded ipums_ddi object, fields apply aggregate data empty. general, NHGIS codebooks provide variable labels descriptions, along citation information. design, NHGIS codebooks human-readable. view codebook contents without converting ipums_ddi object, set raw = TRUE.","code":"nhgis_ex1 <- ipums_example(\"nhgis0972_csv.zip\")  nhgis_data <- read_nhgis(nhgis_ex1) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 71 Columns: 25 #> ── Column specification ──────────────────────────────────────────────────────── #> Delimiter: \",\" #> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F... #> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003... #> lgl  (3): DIVISIONA, REGIONA, STATEA #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.  nhgis_data #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA             4472 Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA             2162 Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA             1602 Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA             6282 Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA             5602 Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA             5602 Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl> attributes(nhgis_data$D6Z001) #> $label #> [1] \"Total area: 1989 to March 1990\" #>  #> $var_desc #> [1] \"Table D6Z: Year Structure Built (Universe: Housing Units)\" nhgis_cb <- read_nhgis_codebook(nhgis_ex1)  # Most useful metadata for NHGIS is for variable labels: ipums_var_info(nhgis_cb) %>%   select(var_name, var_label, var_desc) #> # A tibble: 25 × 3 #>    var_name  var_label                                                  var_desc #>    <chr>     <chr>                                                      <chr>    #>  1 GISJOIN   GIS Join Match Code                                        \"\"       #>  2 YEAR      Data File Year                                             \"\"       #>  3 STUSAB    State/US Abbreviation                                      \"\"       #>  4 CMSA      Consolidated Metropolitan Statistical Area                 \"\"       #>  5 DIVISIONA Division Code                                              \"\"       #>  6 MSA_CMSAA Metropolitan Statistical Area/Consolidated Metropolitan S… \"\"       #>  7 PMSA      Primary Metropolitan Statistical Area Name                 \"\"       #>  8 PMSAA     Primary Metropolitan Statistical Area Code                 \"\"       #>  9 REGIONA   Region Code                                                \"\"       #> 10 STATEA    State Code                                                 \"\"       #> # ℹ 15 more rows nhgis_cb <- read_nhgis_codebook(nhgis_ex1, raw = TRUE)  cat(nhgis_cb[1:20], sep = \"\\n\") #> -------------------------------------------------------------------------------- #> Codebook for NHGIS data file 'nhgis0972_ds135_1990_pmsa' #> -------------------------------------------------------------------------------- #>   #> Contents #>     - Data Summary #>     - Data Dictionary #>     - Citation and Use #>   #> Additional documentation on NHGIS data sources is available at:  #>     https://www.nhgis.org/documentation/tabular-data  #>   #> -------------------------------------------------------------------------------- #> Data Summary #> -------------------------------------------------------------------------------- #>   #> Year:             1990 #> Geographic level: Consolidated Metropolitan Statistical Area--Primary Metropolitan Statistical Area #> Dataset:          1990 Census: SSTF 9 - Housing Characteristics of New Units #>    NHGIS code:    1990_SSTF09"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"handling-multiple-files","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts","what":"Handling multiple files","title":"Reading IPUMS Data","text":"example, read_nhgis_codebook() able identify load codebook file, even though provided file path provided read_nhgis() earlier. However, complicated NHGIS extracts include data multiple data sources, provided .zip archive contain multiple codebook data files. can view files contained extract determine case: cases, can use file_select argument indicate file load. file_select supports features tidyselect selection language. (See ?selection_language documentation features supported ipumsr.) matching codebook automatically loaded attached data: (reason codebook loaded correctly, can load separately read_nhgis_codebook(), also accepts file_select specification.) file_select also accepts full path index file load:","code":"nhgis_ex2 <- ipums_example(\"nhgis0731_csv.zip\")  ipums_list_files(nhgis_ex2) #> # A tibble: 2 × 2 #>   type  file                                           #>   <chr> <chr>                                          #> 1 data  nhgis0731_csv/nhgis0731_ds239_20185_nation.csv #> 2 data  nhgis0731_csv/nhgis0731_ts_nominal_state.csv nhgis_data2 <- read_nhgis(nhgis_ex2, file_select = contains(\"nation\"))  nhgis_data3 <- read_nhgis(nhgis_ex2, file_select = contains(\"ts_nominal_state\")) attributes(nhgis_data2$AJWBE001) #> $label #> [1] \"Estimates: Total\" #>  #> $var_desc #> [1] \"Table AJWB: Sex by Age (Universe: Total population)\"  attributes(nhgis_data3$A00AA1790) #> $label #> [1] \"1790: Persons: Total\" #>  #> $var_desc #> [1] \"Table A00: Total Population\" # Match by file name read_nhgis(nhgis_ex2, file_select = \"nhgis0731_csv/nhgis0731_ds239_20185_nation.csv\")  # Match first file in extract read_nhgis(nhgis_ex2, file_select = 1)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"csv-data","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts > NHGIS data formats","what":"CSV data","title":"Reading IPUMS Data","text":"NHGIS data easily handled .csv format. read_nhgis() uses readr::read_csv() handle generation column type specifications. guessed specifications incorrect, can use col_types argument adjust. likely occur columns contain geographic codes stored numeric values:","code":"# Convert MSA codes to character format read_nhgis(   nhgis_ex1,   col_types = c(MSA_CMSAA = \"c\"),   verbose = FALSE ) #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>     <chr>     <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA        1692      Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA        4472      Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA        2162      Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA        1602      Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA        6282      Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA        5602      Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA        1122      Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA        2082      Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA        3362      Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA        5602      Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"fixed-width-data","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts > NHGIS data formats","what":"Fixed-width data","title":"Reading IPUMS Data","text":"read_nhgis() also handles NHGIS files provided fixed-width format: Note case numeric geographic codes correctly loaded character variables. correct parsing NHGIS fixed-width files driven column parsing information contained .file provided .zip archive. contains information column positions data types, also implicit decimals data. longer access .file, best resubmit /re-download extract (may also consider converting .csv format process). moved .file, provide file path do_file argument use column parsing information. Note unlike read_ipums_micro(), fixed-width files NHGIS still handled providing path data file, metadata file (.e. provide ipums_ddi object data_file argument read_nhgis()). syntactical consistency loading NHGIS .csv files.","code":"nhgis_fwf <- ipums_example(\"nhgis0730_fixed.zip\")  nhgis_fwf_data <- read_nhgis(nhgis_fwf, file_select = matches(\"ts_nominal\")) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 84 Columns: 28 #> ── Column specification ──────────────────────────────────────────────────────── #>  #> chr  (4): GISJOIN, STATE, STATEFP, STATENH #> dbl (24): A00AA1790, A00AA1800, A00AA1810, A00AA1820, A00AA1830, A00AA1840, ... #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.  nhgis_fwf_data #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-spatial-data","dir":"Articles","previous_headings":"","what":"Reading spatial data","title":"Reading IPUMS Data","text":"IPUMS distributes spatial data several projects. microdata projects, spatial data distributed shapefiles dedicated geography pages separate standard extract system. Look “Geography GIS” link “Supplemental Data” section project’s website find spatial data files information. NHGIS, spatial data can obtained within extract system. Shapefiles distributed .zip archive alongside .zip archive containing extract’s tabular data (tabular data requested). Use read_ipums_sf() load spatial data sources (ipumsr phasing support objects sp package. prefer work objects, use sf::as_Spatial() convert sf sp). read_ipums_sf() also supports loading spatial files within .zip archives file_select syntax file selection (don’t need file_select example one shapefile example extract). data can joined associated tabular data. preserve IPUMS attributes tabular data used join, use anipums_shape_*_join function: NHGIS data, join code typically corresponds “GISJOIN” variable. However, microdata projects, variable name used geographic level tabular data may differ spatial data. Consult documentation metadata files identify correct join columns use argument join columns. joined, data include statistical spatial information along variable metadata.","code":"nhgis_shp_file <- ipums_example(\"nhgis0972_shape_small.zip\")  shp_data <- read_ipums_sf(nhgis_shp_file)  head(shp_data) #> Simple feature collection with 6 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -129888.4 ymin: -967051.1 xmax: 1948770 ymax: 751282.5 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 6 × 9 #>   PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>   <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #> 1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #> 2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #> 3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #> 4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #> 5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #> 6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]> joined_data <- ipums_shape_left_join(   nhgis_data,   shp_data,   by = \"GISJOIN\" )  attributes(joined_data$MSA_CMSAA) #> $label #> [1] \"Metropolitan Statistical Area/Consolidated Metropolitan Statistical Area Code\" #>  #> $var_desc #> [1] \"\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"harmonized-vs--non-harmonized-data","dir":"Articles","previous_headings":"Reading spatial data","what":"Harmonized vs. non-harmonized data","title":"Reading IPUMS Data","text":"Longitudinal analysis geographic data complicated fact geographic boundaries shift time. IPUMS therefore provides multiple types spatial data: Harmonized (also called “integrated” “consistent”) files made consistent time combining geographies share area different time periods. Non-harmonized, year-specific, files represent geographies specific point time. Furthermore, NHGIS time series tables standardized statistics adjusted apply year-specific geographical boundary. using spatial data, important consult project-specific documentation ensure using appropriate boundaries research question data included analysis. always, documentation IPUMS project ’re working explain different options available.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-ipums-data","dir":"Articles","previous_headings":"","what":"Obtaining IPUMS data","title":"IPUMS Data and R","text":"IPUMS data free, require registration. New users can register particular IPUMS project clicking Register link top right project website. Users obtain IPUMS data creating submitting extract request. specifies data include resulting extract (data extract). IPUMS servers process submitted extract request, complete, users can download extract containing requested data. Extracts typically contain data metadata files. Data files typically come fixed-width (.dat) files comma-delimited (.csv) files. Metadata files contain information data file contents, including variable descriptions parsing instructions fixed-width data files. IPUMS microdata projects provide metadata DDI (.xml) files. Aggregate data projects provide metadata either .txt .csv formats. Users can submit extract requests download extracts via either IPUMS website IPUMS API, via ipumsr functions interface IPUMS API. API currently supports access extract system certain IPUMS projects, also determines functionality ipumsr can support.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-data-via-an-ipums-project-website","dir":"Articles","previous_headings":"Obtaining IPUMS data","what":"Obtaining data via an IPUMS project website","title":"IPUMS Data and R","text":"create new extract request via IPUMS project website, navigate extract interface IPUMS project interest clicking Select Data heading project website. project extract interface allows explore ’s available, find documentation data concepts sources, specify data ’d like download. data selection parameters differ across projects; see project’s documentation details available options. ’ve never created extract project ’re interested , good way learn basics watch project-specific video creating extracts hosted IPUMS Tutorials page.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"downloading-from-microdata-projects","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via an IPUMS project website","what":"Downloading from microdata projects","title":"IPUMS Data and R","text":"extract ready, click green Download button download data file. , right-click DDI link Codebook column, select Save Link … (see ). Note browsers may display different text, option download DDI file .xml. instance, Safari, select Download Linked File …. ipumsr read metadata, necessary save file .xml format, .html format.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"downloading-from-aggregate-data-projects","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via an IPUMS project website","what":"Downloading from aggregate data projects","title":"IPUMS Data and R","text":"Aggregate data projects include data metadata together single .zip archive file. download , simply click green Tables button (tabular data) /GIS Files button (spatial boundary location data) Download Data column.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-data-via-the-ipums-api","dir":"Articles","previous_headings":"Obtaining IPUMS data","what":"Obtaining data via the IPUMS API","title":"IPUMS Data and R","text":"Users can also create submit extract requests within R using ipumsr functions interface IPUMS API. IPUMS API currently supports access extract system following collections: IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS IPUMS API ipumsr also support access IPUMS NHGIS metadata, users can query NHGIS metadata R explore data available specify NHGIS data requests. time, creating requests microdata generally requires using corresponding project websites find samples variables interest obtain identifiers use R extract definitions. identified data like request, workflow requesting downloading data via API straightforward. First, define parameters extract. available extract definition options differ IPUMS data collection. See microdata API request NHGIS API request vignettes details defining extract. (NHGIS vignette also discusses access NHGIS metadata.) Next, submit extract definition. waiting complete, can download files directly local machine without ever leave R: can also get specifications previous extract requests, even weren’t made API: See introduction IPUMS API R users details use ipumsr interact IPUMS API.","code":"cps_extract_request <- define_extract_cps(   description = \"2018-2019 CPS Data\",   samples = c(\"cps2018_05s\", \"cps2019_05s\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  nhgis_extract_request <- define_extract_nhgis(   description = \"NHGIS Data via IPUMS API\",   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"state\"   ) ) submitted_extract <- submit_extract(extract_request) downloadable_extract <- wait_for_extract(submitted_extract) data_files <- download_extract(downloadable_extract) past_extracts <- get_extract_history(\"nhgis\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"reading-ipums-data","dir":"Articles","previous_headings":"","what":"Reading IPUMS data","title":"IPUMS Data and R","text":"downloaded extract, can load data R family read_*() functions ipumsr. functions expand provided readr two ways: ipumsr anticipates standard IPUMS file structures, limiting need users manually extract organize downloaded files reading. ipumsr uses extract’s metadata files automatically attach contextual information data. allows users easily identify variable names, variable descriptions, labeled data values (haven), common IPUMS files. microdata files, use read_ipums_micro_*() family: NHGIS files, use read_nhgis(): ipumsr also supports reading IPUMS shapefiles (spatial boundary location files) sf format provided sf package: ipumsr primarily designed read data produced IPUMS extract system. However, IPUMS distribute files, often available via direct download. many cases, can loaded ipumsr. Otherwise, files can likely handled existing data reading packages like readr (delimited files) haven (Stata, SPSS, SAS files). See vignette reading IPUMS data information.","code":"cps_file <- ipums_example(\"cps_00157.xml\") cps_data <- read_ipums_micro(cps_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps_data) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375 nhgis_file <- ipums_example(\"nhgis0972_csv.zip\") nhgis_data <- read_nhgis(nhgis_file) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 71 Columns: 25 #> ── Column specification ──────────────────────────────────────────────────────── #> Delimiter: \",\" #> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F... #> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003... #> lgl  (3): DIVISIONA, REGIONA, STATEA #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.  head(nhgis_data) #> # A tibble: 6 × 25 #>   GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA       PMSAA REGIONA STATEA #>   <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>      <chr> <lgl>   <lgl>  #> 1 G0080    1990 OH     28    NA             1692 Akron, OH… 0080  NA      NA     #> 2 G0360    1990 CA     49    NA             4472 Anaheim--… 0360  NA      NA     #> 3 G0440    1990 MI     35    NA             2162 Ann Arbor… 0440  NA      NA     #> 4 G0620    1990 IL     14    NA             1602 Aurora--E… 0620  NA      NA     #> 5 G0845    1990 PA     78    NA             6282 Beaver Co… 0845  NA      NA     #> 6 G0875    1990 NJ     70    NA             5602 Bergen--P… 0875  NA      NA     #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl> shp_file <- ipums_example(\"nhgis0972_shape_small.zip\") nhgis_shp <- read_ipums_sf(shp_file)  head(nhgis_shp) #> Simple feature collection with 6 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -129888.4 ymin: -967051.1 xmax: 1948770 ymax: 751282.5 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 6 × 9 #>   PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>   <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #> 1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #> 2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #> 3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #> 4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #> 5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #> 6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"exploring-file-metadata","dir":"Articles","previous_headings":"Reading IPUMS data","what":"Exploring file metadata","title":"IPUMS Data and R","text":"Load file’s metadata read_ipums_ddi() (microdata projects) read_nhgis_codebook() (NHGIS). provide file- variable-level metadata given data source, can used interpret data contents. Summarize variable metadata dataset using ipums_var_info(): can also get contextual details specific variables: ipumsr also provides family lbl_*() functions assist accessing manipulating value-level metadata included IPUMS data. allows value labels incorporated data processing pipeline. instance: See value labels vignette details.","code":"cps_meta <- read_ipums_ddi(cps_file) nhgis_meta <- read_nhgis_codebook(nhgis_file) ipums_var_info(cps_meta) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl> ipums_var_desc(cps_data$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\"  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 75 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     1 Alabama              #>  2     2 Alaska               #>  3     4 Arizona              #>  4     5 Arkansas             #>  5     6 California           #>  6     8 Colorado             #>  7     9 Connecticut          #>  8    10 Delaware             #>  9    11 District of Columbia #> 10    12 Florida              #> # ℹ 65 more rows # Remove labels for values that do not appear in the data cps_data$STATEFIP <- lbl_clean(cps_data$STATEFIP)  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 5 × 2 #>     val lbl          #>   <int> <chr>        #> 1    19 Iowa         #> 2    27 Minnesota    #> 3    38 North Dakota #> 4    46 South Dakota #> 5    55 Wisconsin # Combine North and South Dakota into a single value/label pair cps_data$STATEFIP <- lbl_relabel(   cps_data$STATEFIP,   lbl(\"38_46\", \"Dakotas\") ~ grepl(\"Dakota\", .lbl) )  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 4 × 2 #>   val   lbl       #>   <chr> <chr>     #> 1 19    Iowa      #> 2 27    Minnesota #> 3 38_46 Dakotas   #> 4 55    Wisconsin"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"ipums-variable-metadata","dir":"Articles","previous_headings":"","what":"IPUMS variable metadata","title":"Value Labels in IPUMS data","text":"IPUMS data come three primary types variable-level metadata: Variable labels succinct labels serve human-readable variable names (contrast esoteric column names). Variable descriptions extended text descriptions contents variable. provide information given variable measures. Value labels link particular data values meaningful text labels. instance, HEALTH variable data values including 1 2, actually stand-ins “Excellent” “good” health. mapping contained value-label pair includes value associated label. rest article focus value labels; variable labels descriptions, see vignette(\"ipums\").","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"value-labels","dir":"Articles","previous_headings":"","what":"Value labels","title":"Value Labels in IPUMS data","text":"ipumsr uses labelled class haven package handle value labels. can see column data types loading IPUMS data. Note <int+lbl> appears STATEFIP, ASECFLAG, variables: indicates data contained columns integers include value labels. can use function .labelled() determine variable indeed labelled: labels actually printed inline alongside data values, can easier see isolating :","code":"library(ipumsr)  ddi <- read_ipums_ddi(ipums_example(\"cps_00160.xml\")) cps <- read_ipums_micro(ddi, verbose = FALSE)  cps #> # A tibble: 10,883 × 15 #>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH STATEFIP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl> #>  1  2016  24138 3 [Marc… 2.02e13 1 [ASEC]   3249. 55 [Wis…      1 2.02e13  3249. #>  2  2016  24139 3 [Marc… 2.02e13 1 [ASEC]   3154. 55 [Wis…      1 2.02e13  3154. #>  3  2016  24139 3 [Marc… 2.02e13 1 [ASEC]   3154. 55 [Wis…      2 2.02e13  3154. #>  4  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      1 2.02e13  1652. #>  5  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      2 2.02e13  1503. #>  6  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      3 2.02e13  1652. #>  7  2016  24141 3 [Marc… 2.02e13 1 [ASEC]   3049. 55 [Wis…      1 2.02e13  3049. #>  8  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      1 2.02e13  1637. #>  9  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      2 2.02e13  1637. #> 10  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      3 2.02e13  1887. #> # ℹ 10,873 more rows #> # ℹ 5 more variables: AGE <int+lbl>, EDUC <int+lbl>, INCTOT <dbl+lbl>, #> #   MIGRATE1 <int+lbl>, HEALTH <int+lbl> is.labelled(cps$STATEFIP) #> [1] TRUE # Labels print when accessing the column head(cps$MONTH) #> <labelled<integer>[6]>: Month #> [1] 3 3 3 3 3 3 #>  #> Labels: #>  value     label #>      1   January #>      2  February #>      3     March #>      4     April #>      5       May #>      6      June #>      7      July #>      8    August #>      9 September #>     10   October #>     11  November #>     12  December  # Get labels alone ipums_val_labels(cps$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"labelled-vs--factor","dir":"Articles","previous_headings":"","what":"labelled vs. factor","title":"Value Labels in IPUMS data","text":"Base R already supports linking numeric data categories using factor data type. factors may familiar, designed support efficient calculations linear models, human-readable labeling system interpreting processing data. Compared factors, labelled vectors two main properties make suitable working IPUMS data: don’t require values labelled don’t require values assigned increasing integers starting 1 Consider case AGE variable. many IPUMS products, AGE provides person’s age years, certain special values interpretations: can see, 0 value represents ages less 1, 90 99 values actually represent ranges ages. Coercing AGE factor convert values 0 1, factors always assign values starting 1: Additionally, values exist data, high values, like 85, 90, 99 mapped lower values: different representations lead inconsistencies calculated values:","code":"head(cps$AGE) #> <labelled<integer>[6]>: Age #> [1] 54 54 52 38 15 38 #>  #> Labels: #>  value               label #>      0        Under 1 year #>     90 90 (90+, 1988-2002) #>     99                 99+ cps$AGE_FACTOR <- as_factor(cps$AGE)  age0_factor <- cps[cps$AGE == 0, ]$AGE_FACTOR  # The levels look the same unique(age0_factor) #> [1] Under 1 year #> 84 Levels: Under 1 year 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ... 99+  # But the values have changed unique(as.numeric(age0_factor)) #> [1] 1 age85_factor <- cps[cps$AGE == 85, ]$AGE_FACTOR  unique(as.numeric(age85_factor)) #> [1] 82 mean(cps$AGE) #> [1] 35.0226  mean(as.numeric(cps$AGE_FACTOR)) #> [1] 35.94836"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"cautions-regarding-labelled-variables","dir":"Articles","previous_headings":"labelled vs. factor","what":"Cautions regarding labelled variables","title":"Value Labels in IPUMS data","text":"labelled variables provide benefits described , also present challenges. example, may noticed means calculated suspect. case AGE_FACTOR, values remapped conversion several inconsistent original data. case AGE, considered people 90 exactly 90, people 99 exactly 99—labelled variables don’t ensure calculations correct factors ! Furthermore, many R functions ignore value labels even actively remove data: , labelled vectors intended use throughout entire analysis process. Instead, used initial data preparation process convert raw data values meaningful. can converted variable types (often factors) analysis. Unfortunately, isn’t process can typically automated, depends primarily research questions data used address. However, ipumsr provides several functions manipulate value labels make process easier.","code":"ipums_val_labels(cps$HEALTH) #> # A tibble: 5 × 2 #>     val lbl       #>   <int> <chr>     #> 1     1 Excellent #> 2     2 Very good #> 3     3 Good      #> 4     4 Fair      #> 5     5 Poor  HEALTH2 <- ifelse(cps$HEALTH > 3, 3, cps$HEALTH) ipums_val_labels(HEALTH2) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr>"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"convert-labelled-values-to-other-data-types","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Convert labelled values to other data types","title":"Value Labels in IPUMS data","text":"Use as_factor() labels correct categories need manipulation. instance, MONTH already sensible categories, can convert factor right away: as_factor() can also convert labelled variables data frame factors . prefer work factors, can conversion immediately loading data, prepare variables using techniques use factors. prefer handle variables labelled format, can use lbl_* helpers first, call as_factor() entire data frame. variables may appropriate use numeric values rather factors. cases, can simply remove labels zap_labels(). INCTOT, measures personal income, fits description: Note labelled values generally intended interpreted numeric values, zap_labels() used labels properly handled. example, INCTOT, labelled values used identify missing values encoded large numbers: Treating legitimate observations significantly skew calculations variable first converted NA.","code":"ipums_val_labels(cps$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December  cps$MONTH <- as_factor(cps$MONTH) cps <- as_factor(cps)  # ... further preparation of variables as factors inctot_num <- zap_labels(cps$INCTOT)  typeof(inctot_num) #> [1] \"double\"  ipums_val_labels(inctot_num) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> ipums_val_labels(cps$INCTOT) #> # A tibble: 2 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) #> 2 999999999 N.I.U."},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"create-missing-values-based-on-value-labels","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Create missing values based on value labels","title":"Value Labels in IPUMS data","text":"Many IPUMS variables use labelled values identify missing data. allows detail certain observations missing available values loaded NA. saw INCTOT, value labels used identify two types missing data: legitimately missing universe observations. convert one labelled values NA, use lbl_na_if(). use lbl_na_if(), must supply function handle conversion. function take value-label pair input output TRUE pairs whose values converted NA.","code":"ipums_val_labels(cps$INCTOT) #> # A tibble: 2 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) #> 2 999999999 N.I.U."},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"syntax","dir":"Articles","previous_headings":"Prepping data with value labels > Create missing values based on value labels","what":"Syntax for value label functions","title":"Value Labels in IPUMS data","text":"Several lbl_* helper functions, including lbl_na_if(), require user-defined function handle recoding value-label pairs. ipumsr provides syntax easily reference values labels user-defined function: .val argument references values .lbl argument references labels instance, convert values equal 999999999 NA, can provide function uses .val argument: achieve result referencing labels : can also specify function using one-sided formula: Note .val refers labelled values—unlabelled values affected: convert unlabelled values NA, use dplyr::na_if() instead.","code":"# Convert to NA using function that returns TRUE for all labelled values equal to 99999999 inctot_na <- lbl_na_if(   cps$INCTOT,   function(.val, .lbl) .val == 999999999 )  # All 99999999 values have been converted to NA any(inctot_na == 999999999, na.rm = TRUE) #> [1] FALSE  # And the label has been removed: ipums_val_labels(inctot_na) #> # A tibble: 1 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) # Convert to NA for labels that contain \"N.I.U.\" inctot_na2 <- lbl_na_if(   cps$INCTOT,   function(.val, .lbl) grepl(\"N.I.U.\", .lbl) )  # Same result all(inctot_na2 == inctot_na, na.rm = TRUE) #> [1] TRUE lbl_na_if(cps$INCTOT, ~ .val == 999999999) x <- lbl_na_if(cps$INCTOT, ~ .val >= 0)  # Unlabelled values greater than the cutoff are still present: length(which(x > 0)) #> [1] 7501"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"relabel-values","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Relabel values","title":"Value Labels in IPUMS data","text":"lbl_relabel() can used create new value-label pairs, often recombine existing labels general categories. takes two-sided formula handle relabeling: left-hand side, use lbl() helper define new value-label pair. right-hand side, provide function returns TRUE value-label pairs relabelled new value-label pair left-hand side. function uses .val .lbl syntax mentioned refer values labels, respectively. instance, reclassify categories MIGRATE1 migration within state captured single category: Many IPUMS variables include detailed labels grouped together general categories. often encoded multi-digit values, starting digit refers larger category. instance, EDUC variable contains categories individual grades well categories multiple grade groups: use lbl_relabel() collapse detailed categories general ones, define new value labels categories. Instead, use lbl_collapse(). lbl_collapse() uses function takes .val .lbl arguments returns new value input value assigned . label lowest original value used collapsed group. group tens digit, use integer division operator %/%:","code":"ipums_val_labels(cps$MIGRATE1) #> # A tibble: 8 × 2 #>     val lbl                                  #>   <int> <chr>                                #> 1     0 NIU                                  #> 2     1 Same house                           #> 3     2 Different house, place not reported  #> 4     3 Moved within county                  #> 5     4 Moved within state, different county #> 6     5 Moved between states                 #> 7     6 Abroad                               #> 8     9 Unknown  cps$MIGRATE1 <- lbl_relabel(   cps$MIGRATE1,   lbl(0, \"NIU / Missing / Unknown\") ~ .val %in% c(0, 2, 9),   lbl(1, \"Stayed in state\") ~ .val %in% c(1, 3, 4) )  ipums_val_labels(cps$MIGRATE1) #> # A tibble: 4 × 2 #>     val lbl                     #>   <dbl> <chr>                   #> 1     0 NIU / Missing / Unknown #> 2     1 Stayed in state         #> 3     5 Moved between states    #> 4     6 Abroad head(ipums_val_labels(cps$EDUC), 15) #> # A tibble: 15 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     0 NIU or no schooling  #>  2     1 NIU or blank         #>  3     2 None or preschool    #>  4    10 Grades 1, 2, 3, or 4 #>  5    11 Grade 1              #>  6    12 Grade 2              #>  7    13 Grade 3              #>  8    14 Grade 4              #>  9    20 Grades 5 or 6        #> 10    21 Grade 5              #> 11    22 Grade 6              #> 12    30 Grades 7 or 8        #> 13    31 Grade 7              #> 14    32 Grade 8              #> 15    40 Grade 9 # %/% refers to integer division, which divides but discards the remainder 10 %/% 10 #> [1] 1 11 %/% 10 #> [1] 1  # Convert to groups by tens digit cps$EDUC2 <- lbl_collapse(cps$EDUC, ~ .val %/% 10)  ipums_val_labels(cps$EDUC2) #> # A tibble: 14 × 2 #>      val lbl                  #>    <dbl> <chr>                #>  1     0 NIU or no schooling  #>  2     1 Grades 1, 2, 3, or 4 #>  3     2 Grades 5 or 6        #>  4     3 Grades 7 or 8        #>  5     4 Grade 9              #>  6     5 Grade 10             #>  7     6 Grade 11             #>  8     7 Grade 12             #>  9     8 1 year of college    #> 10     9 2 years of college   #> 11    10 3 years of college   #> 12    11 4 years of college   #> 13    12 5+ years of college  #> 14    99 Missing/Unknown"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"relabeling-caveats","dir":"Articles","previous_headings":"Prepping data with value labels > Relabel values","what":"Relabeling caveats","title":"Value Labels in IPUMS data","text":"always worth checking new labels make sense based research question. instance, example, \"12th grade, diploma\" \"High school diploma equivalent\" collapsed single group values 70s. may suitable purposes, control, best use lbl_relabel(). Note lbl_relabel() lbl_collapse() operate labelled values, therefore designed use fully labelled vectors. , attempt relabel vector unlabelled values, converted NA. avoid , can add labels values using lbl_add_vals() relabeling (see ). general, shouldn’t necessary, partially-labelled vectors include labels ancillary information, like missing value indicators. can typically handled helpers, like lbl_na_if(), without requiring relabeling.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"remove-unused-value-labels","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Remove unused value labels","title":"Value Labels in IPUMS data","text":"variables may contain labels values don’t appear data. Unused levels still appear factor representations variables, often beneficial remove lbl_clean():","code":"ipums_val_labels(cps$STATEFIP) #> # A tibble: 75 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     1 Alabama              #>  2     2 Alaska               #>  3     4 Arizona              #>  4     5 Arkansas             #>  5     6 California           #>  6     8 Colorado             #>  7     9 Connecticut          #>  8    10 Delaware             #>  9    11 District of Columbia #> 10    12 Florida              #> # ℹ 65 more rows  ipums_val_labels(lbl_clean(cps$STATEFIP)) #> # A tibble: 5 × 2 #>     val lbl          #>   <int> <chr>        #> 1    19 Iowa         #> 2    27 Minnesota    #> 3    38 North Dakota #> 4    46 South Dakota #> 5    55 Wisconsin"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"lbl_add","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Add new labels","title":"Value Labels in IPUMS data","text":"mentioned , value labels intended used intermediate data structure preparing newly-imported data. , ’re likely need add new labels, , use lbl_add(), lbl_add_vals(), lbl_define(). lbl_add() takes arbitrary number lbl() placeholders added given labelled vector: lbl_add_vals() adds labels unlabelled values labelled vector optional labeller function. (can useful wish operate partially labelled vector function requires labelled input, like lbl_relabel().) lbl_define() makes labelled vector unlabelled one. Use syntax used lbl_relabel() define new labels based unlabelled values: labelled variables appropriately converted factors numeric values, data can move forward processing pipeline.","code":"x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") ) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU # `.` refers to each label value lbl_add_vals(x, ~ paste0(\"$\", .)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU age <- c(10, 12, 16, 18, 20, 22, 25, 27)  # Group age values into two label groups. # Values not captured by the right hand side functions remain unlabelled lbl_define(   age,   lbl(1, \"Pre-college age\") ~ .val < 18,   lbl(2, \"College age\") ~ .val >= 18 & .val <= 22 ) #> <labelled<double>[8]> #> [1]  1  1  1  2  2  2 25 27 #>  #> Labels: #>  value           label #>      1 Pre-college age #>      2     College age"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"other-resources","dir":"Articles","previous_headings":"","what":"Other resources","title":"Value Labels in IPUMS data","text":"haven package, underlies ipumsr’s handling value labels, provides details labelled class. See vignette(\"semantics\", package = \"haven\"). labelled package provides methods manipulating value labels, overlap provided ipumsr. questionr package includes functions exploring labelled variables. particular, functions describe, freq lookfor print console information variable using value labels. Finally, foreign prettyR packages don’t use labelled class, provide similar functionality handling value labels, adapted use labelled vectors.","code":""},{"path":"http://tech.popdata.org/ipumsr/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Greg Freedman Ellis. Author. Derek Burk. Author, maintainer. Finn Roberts. Author. Joe Grover. Contributor. Dan Ehrlich. Contributor. Renae Rodgers. Contributor. Institute Social Research Data Innovation. Copyright holder.","code":""},{"path":"http://tech.popdata.org/ipumsr/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Greg Freedman Ellis, Derek Burk, Finn Roberts (2024). ipumsr: R Interface Downloading, Reading, Handling IPUMS Data. https://tech.popdata.org/ipumsr/, https://github.com/ipums/ipumsr, https://www.ipums.org.","code":"@Manual{,   title = {ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data},   author = {{Greg Freedman Ellis} and {Derek Burk} and {Finn Roberts}},   year = {2024},   note = {https://tech.popdata.org/ipumsr/, https://github.com/ipums/ipumsr, https://www.ipums.org}, }"},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"ipumsr-","dir":"","previous_headings":"","what":"An R Interface for Downloading, Reading, and Handling IPUMS Data","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"ipumsr provides R interface handling IPUMS data, allowing users : Easily read files downloaded IPUMS extract system Request data, download files, get metadata certain IPUMS collections Interpret process data using contextual information included many IPUMS files","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"install package CRAN, use install development version package, use","code":"install.packages(\"ipumsr\") remotes::install_github(\"ipums/ipumsr\")"},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"what-is-ipums","dir":"","previous_headings":"","what":"What is IPUMS?","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"IPUMS world’s largest publicly available population database, providing census survey data around world integrated across time space. IPUMS integration documentation make easy study change, conduct comparative research, merge information across data types, analyze individuals within family community context. Data services available free charge. IPUMS consists multiple projects, collections, provide different data products. Microdata projects distribute data individual survey units, like people households. Aggregate data projects distribute summary tables aggregate statistics particular geographic units along corresponding GIS mapping files. ipumsr supports different levels functionality IPUMS project, summarized following table: ipumsr uses IPUMS API submit data requests, download data extracts, get metadata, scope ipumsr functionality generally corresponds available API functionality. IPUMS team extends API support functionality projects, aim extend ipumsr capabilities accordingly.","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting started","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"’re new IPUMS data, learn ’s available IPUMS Projects Overview. package vignettes best place learn ’s available ipumsr : read IPUMS data extracts R, see vignette(\"ipums-read\"). interact IPUMS extract system via IPUMS API, see vignette(\"ipums-api\"). additional details microdata NHGIS extract requests, see vignette(\"ipums-api-micro\") vignette(\"ipums-api-nhgis\"). work labelled values IPUMS data, see vignette(\"value-labels\"). techniques working large data extracts, see vignette(\"ipums-bigdata\"). IPUMS support website also houses many project-specific R-based training exercises. However, note exercises may date ipumsr’s current functionality.","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"related-work","dir":"","previous_headings":"","what":"Related work","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"survey srvyr packages can help incorporate IPUMS survey weights analysis various survey designs. See haven information value labels labelled vectors hipread underlies hierarchical file reading functions ipumsr","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"getting-help--contributing","dir":"","previous_headings":"","what":"Getting help + contributing","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"greatly appreciate feedback development contributions. Please submit bug reports, pull requests, suggestions GitHub. contributing, please sure read Contributing Guidelines Code Conduct. general questions concerns IPUMS data, check user forum send email ipums@umn.edu.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing IPUMS extract definition — add_to_extract","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"Add replace values existing ipums_extract object. function S3 generic whose behavior depend subclass (.e. collection) extract modified. add IPUMS Microdata extract definition, click . includes: IPUMS USA IPUMS CPS IPUMS International add IPUMS NHGIS extract definition, click function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"","code":"add_to_extract(extract, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"extract ipums_extract object. ... Additional arguments specifying extract fields values add extract definition. arguments available collection's define_extract_*() function can passed add_to_extract().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"object class extract containing modified extract definition","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"","code":"# Microdata extracts usa_extract <- define_extract_usa(   description = \"2013 ACS Data\",   samples = \"us2013a\",   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  # Add new samples and variables add_to_extract(   usa_extract,   samples = c(\"us2014a\", \"us2015a\"),   variables = var_spec(\"MARST\", data_quality_flags = TRUE) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (3 total) us2013a, us2014a, us2015a #> Variables: (4 total) SEX, AGE, YEAR, MARST  # Update existing variables add_to_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"1\") ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (1 total) us2013a #> Variables: (3 total) SEX, AGE, YEAR  # Modify/add multiple variables add_to_extract(   usa_extract,   variables = list(     var_spec(\"SEX\", case_selections = \"1\"),     var_spec(\"RELATE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (1 total) us2013a #> Variables: (4 total) SEX, AGE, YEAR, RELATE  # NHGIS extracts nhgis_extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\"),     geog_levels = \"county\"   ) )  # Add a new dataset or time series table add_to_extract(   nhgis_extract,   datasets = ds_spec(     \"1980_STF1\",     data_tables = \"NT1A\",     geog_levels = c(\"county\", \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Dataset: 1980_STF1 #>   Tables: NT1A #>   Geog Levels: county, state  # Update existing datasets/time series tables add_to_extract(   nhgis_extract,   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"state\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county, state  # Modify/add multiple datasets or time series tables add_to_extract(   nhgis_extract,   time_series_tables = list(     tst_spec(\"CW3\", geog_levels = \"state\"),     tst_spec(\"CW4\", geog_levels = \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state #>  #> Time Series Table: CW4 #>   Geog Levels: state  # Values that can only take a single value are replaced add_to_extract(nhgis_extract, data_format = \"fixed_width\")$data_format #> [1] \"fixed_width\""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"Add new values IPUMS NHGIS extract definition. fields optional, omitted, unchanged. Supplying value fields take single value, description data_format, replace existing value supplied value. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_nhgis(). complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values IPUMS NHGIS extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"","code":"# S3 method for nhgis_extract add_to_extract(   extract,   description = NULL,   datasets = NULL,   time_series_tables = NULL,   geographic_extents = NULL,   shapefiles = NULL,   breakdown_and_data_type_layout = NULL,   tst_layout = NULL,   data_format = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"extract ipums_extract object. description Description extract. datasets List ds_spec objects created ds_spec() containing specifications datasets include extract request. See examples. dataset already exists extract, new specifications added already exist dataset. time_series_tables List tst_spec objects created tst_spec() containing specifications time series tables include extract request. time series table already exists extract, new specifications added already exist time series table. geographic_extents Vector geographic extents use datasets extract definition (instance, obtain data within particular state). Use \"*\" select available extents. Required datasets included extract definition include geog_levels require extent selection. See get_metadata_nhgis() determine geographic level requires extent selection. time writing, NHGIS supports extent selection blocks block groups. shapefiles Names shapefiles include extract request. breakdown_and_data_type_layout desired layout datasets multiple data types breakdown values. \"single_file\" (default) keeps data types breakdown values one file \"separate_files\" splits data type breakdown value file Required datasets included extract definition consist multiple data types (instance, estimates margins error) multiple breakdown values specified. See get_metadata_nhgis() determine whether requested dataset multiple data types. tst_layout desired layout time_series_tables included extract definition. \"time_by_column_layout\" (wide format, default): rows correspond geographic units, columns correspond different times time series \"time_by_row_layout\" (long format): rows correspond single geographic unit single point time \"time_by_file_layout\": data different times provided separate files Required extract definition includes time_series_tables. data_format desired format extract data file. \"csv_no_header\" (default) includes minimal header first row \"csv_header\" includes second, descriptive header row. \"fixed_width\" provides data fixed width format Note default, read_nhgis() removes additional header row \"csv_header\" files. Required extract definition includes datasets time_series_tables. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"modified nhgis_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"extract fields take single value, add_to_extract() replace existing value new value provided field. necessary first remove value using remove_from_extract(). supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"","code":"extract <- define_extract_nhgis(   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"county\") )  # Add a new dataset or time series table to the extract add_to_extract(   extract,   datasets = ds_spec(\"1990_STF2a\", \"NPA1\", \"county\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Dataset: 1990_STF2a #>   Tables: NPA1 #>   Geog Levels: county  add_to_extract(   extract,   time_series_tables = tst_spec(\"A00\", \"state\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: A00 #>   Geog Levels: state  # If a dataset/time series table name already exists in the definition # its specification will be modified by adding the new specifications to # the existing ones add_to_extract(   extract,   datasets = ds_spec(\"1990_STF1\", \"NP4\", \"nation\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP4 #>   Geog Levels: county, nation  # You can add new datasets and modify existing ones simultaneously by # providing a list of `ds_spec` objects add_to_extract(   extract,   datasets = list(     ds_spec(\"1990_STF1\", \"NP4\", \"nation\"),     ds_spec(\"1990_STF2a\", \"NPA1\", \"county\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP4 #>   Geog Levels: county, nation #>  #> Dataset: 1990_STF2a #>   Tables: NPA1 #>   Geog Levels: county  # Values that can only take a single value are replaced add_to_extract(extract, data_format = \"fixed_width\")$data_format #> [1] \"fixed_width\""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"Add new values replace existing values IPUMS microdata extract definition. fields optional, omitted, unchanged. Supplying value fields take single value, description data_format, replace existing value supplied value. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values IPUMS microdata extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"","code":"# S3 method for micro_extract add_to_extract(   extract,   description = NULL,   samples = NULL,   variables = NULL,   data_format = NULL,   data_structure = NULL,   rectangular_on = NULL,   case_select_who = NULL,   data_quality_flags = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"extract ipums_extract object. description Description extract. samples Vector samples include extract request. Use get_sample_info() identify sample IDs given collection. variables Character vector variable names list var_spec objects created var_spec() containing specifications variables include extract. variable already exists extract, specifications added already exist variable. data_format Format output extract data file. Either \"fixed_width\" \"csv\". Note \"stata\", \"spss\", \"sas9\" also accepted, file formats supported ipumsr data-reading functions. data_structure Data structure output extract data. \"rectangular\" provides person records requested household information attached respective household members. \"hierarchical\" provides household records followed person records. Defaults \"rectangular\". rectangular_on data_structure \"rectangular\", records rectangularize. Currently \"P\" (person records) supported. Defaults \"P\" data_structure \"rectangular\" NULL otherwise. case_select_who Indication interpret case selections included variables extract definition. \"individuals\" includes records individuals match specified case selections. \"households\" includes records members household contains individual matches specified case selections. Defaults \"individuals\". Use var_spec() add case selections specific variables. data_quality_flags Set TRUE include data quality flags applicable variables extract definition. override data_quality_flags specification individual variables definition. Use var_spec() add data quality flags specific variables. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"modified micro_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state. modify variable-specific parameters variables already exist extract, create new variable specification var_spec().","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"","code":"extract <- define_extract_usa(   description = \"2013 ACS Data\",   samples = \"us2013a\",   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  # Add a single sample add_to_extract(extract, samples = \"us2014a\") #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # Add samples and variables extract2 <- add_to_extract(   extract,   samples = \"us2014a\",   variables = c(\"MARST\", \"BIRTHYR\") )  # Modify specifications for variables in the extract by using `var_spec()` # with the existing variable name: add_to_extract(   extract,   samples = \"us2014a\",   variables = var_spec(\"SEX\", case_selections = \"2\") ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # You can make multiple modifications or additions by providing a list # of `var_spec()` objects: add_to_extract(   extract,   samples = \"us2014a\",   variables = list(     var_spec(\"RACE\", attached_characteristics = \"mother\"),     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"RELATE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (5 total) SEX, AGE, YEAR, RACE, RELATE  # Values that only take a single value are replaced add_to_extract(extract, description = \"New description\")$description #> [1] \"New description\""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an extract request for an IPUMS microdata collection — define_extract-micro","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"Define parameters IPUMS microdata extract request submitted via IPUMS API. Currently supported microdata collections include: IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() Learn IPUMS API vignette(\"ipums-api\") microdata extract definitions vignette(\"ipums-api-micro\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"","code":"define_extract_usa(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )  define_extract_cps(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )  define_extract_ipumsi(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"description Description extract. samples Vector samples include extract request. Use get_sample_info() identify sample IDs given collection. variables Vector variable names list detailed variable specifications include extract request. Use var_spec() create var_spec object containing detailed variable specification. See examples. data_format Format output extract data file. Either \"fixed_width\" \"csv\". Note \"stata\", \"spss\", \"sas9\" also accepted, file formats supported ipumsr data-reading functions. Defaults \"fixed_width\". data_structure Data structure output extract data. \"rectangular\" provides person records requested household information attached respective household members. \"hierarchical\" provides household records followed person records. Defaults \"rectangular\". rectangular_on data_structure \"rectangular\", records rectangularize. Currently \"P\" (person records) supported. Defaults \"P\" data_structure \"rectangular\" NULL otherwise. case_select_who Indication interpret case selections included variables extract definition. \"individuals\" includes records individuals match specified case selections. \"households\" includes records members household contains individual matches specified case selections. Defaults \"individuals\". Use var_spec() add case selections specific variables. data_quality_flags Set TRUE include data quality flags applicable variables extract definition. override data_quality_flags specification individual variables definition. Use var_spec() add data quality flags specific variables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"object class micro_extract containing extract definition.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"","code":"usa_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  usa_extract #> Unsubmitted IPUMS USA extract  #> Description: 2013-2014 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # Use `var_spec()` to created detailed variable specifications: usa_extract <- define_extract_usa(   description = \"Example USA extract definition\",   samples = c(\"us2013a\", \"us2014a\"),   variables = var_spec(     \"SEX\",     case_selections = \"2\",     attached_characteristics = c(\"mother\", \"father\")   ) )  # For multiple variables, provide a list of `var_spec` objects and/or # variable names. cps_extract <- define_extract_cps(   description = \"Example CPS extract definition\",   samples = c(\"cps2020_02s\", \"cps2020_03s\"),   variables = list(     var_spec(\"AGE\", data_quality_flags = TRUE),     var_spec(\"SEX\", case_selections = \"2\"),     \"RACE\"   ) )  cps_extract #> Unsubmitted IPUMS CPS extract  #> Description: Example CPS extract definition #>  #> Samples: (2 total) cps2020_02s, cps2020_03s #> Variables: (3 total) AGE, SEX, RACE  # To recycle specifications to many variables, it may be useful to # create variables prior to defining the extract: var_names <- c(\"AGE\", \"SEX\")  my_vars <- purrr::map(   var_names,   ~ var_spec(.x, attached_characteristics = \"mother\") )  ipumsi_extract <- define_extract_ipumsi(   description = \"Extract definition with predefined variables\",   samples = c(\"br2010a\", \"cl2017a\"),   variables = my_vars )  # Extract specifications can be indexed by name names(ipumsi_extract$samples) #> [1] \"br2010a\" \"cl2017a\"  names(ipumsi_extract$variables) #> [1] \"AGE\" \"SEX\"  ipumsi_extract$variables$AGE #> $name #> [1] \"AGE\" #>  #> $attached_characteristics #> [1] \"mother\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"        if (FALSE) { # Use the extract definition to submit an extract request to the API submit_extract(usa_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an IPUMS extract object — define_extract","title":"Define an IPUMS extract object — define_extract","text":"Specify parameters new IPUMS extract request object submitted via IPUMS API. extract request contains specifications required obtain particular set data IPUMS collection. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":"supported-collections","dir":"Reference","previous_headings":"","what":"Supported collections","title":"Define an IPUMS extract object — define_extract","text":"Currently, ipumsr supports extract definitions following collections: IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() IPUMS NHGIS: define_extract_nhgis()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an IPUMS extract object — define_extract","text":"functions produce ipums_extract object subclass based collection corresponding extract request. core ipumsr API client tools designed handle objects.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an IPUMS NHGIS extract request — define_extract_nhgis","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"Define parameters IPUMS NHGIS extract request submitted via IPUMS API. Use get_metadata_nhgis() browse identify data sources use NHGIS extract definitions. general information, see NHGIS data source overview FAQ. Learn IPUMS API vignette(\"ipums-api\") NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"","code":"define_extract_nhgis(   description = \"\",   datasets = NULL,   time_series_tables = NULL,   shapefiles = NULL,   geographic_extents = NULL,   breakdown_and_data_type_layout = NULL,   tst_layout = NULL,   data_format = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"description Description extract. datasets List dataset specifications datasets include extract request. Use ds_spec() create ds_spec object containing dataset specification. See examples. time_series_tables List time series table specifications time series tables include extract request. Use tst_spec() create tst_spec object containing time series table specification. See examples. shapefiles Names shapefiles include extract request. geographic_extents Vector geographic extents use datasets extract definition (instance, obtain data within particular state). Use \"*\" select available extents. Required datasets included extract definition include geog_levels require extent selection. See get_metadata_nhgis() determine geographic level requires extent selection. time writing, NHGIS supports extent selection blocks block groups. breakdown_and_data_type_layout desired layout datasets multiple data types breakdown values. \"single_file\" (default) keeps data types breakdown values one file \"separate_files\" splits data type breakdown value file Required datasets included extract definition consist multiple data types (instance, estimates margins error) multiple breakdown values specified. See get_metadata_nhgis() determine whether requested dataset multiple data types. tst_layout desired layout time_series_tables included extract definition. \"time_by_column_layout\" (wide format, default): rows correspond geographic units, columns correspond different times time series \"time_by_row_layout\" (long format): rows correspond single geographic unit single point time \"time_by_file_layout\": data different times provided separate files Required extract definition includes time_series_tables. data_format desired format extract data file. \"csv_no_header\" (default) includes minimal header first row \"csv_header\" includes second, descriptive header row. \"fixed_width\" provides data fixed width format Note default, read_nhgis() removes additional header row \"csv_header\" files. Required extract definition includes datasets time_series_tables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"object class nhgis_extract containing extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"NHGIS extract definition must include least one dataset, time series table, shapefile specification. Create NHGIS dataset specification ds_spec(). dataset must associated selection data_tables geog_levels. datasets also support selection years breakdown_values. Create NHGIS time series table specification tst_spec(). time series table must associated selection geog_levels may optionally associated selection years. See examples vignette(\"ipums-api-nhgis\") details specifying datasets time series tables NHGIS extract definition.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"","code":"# Extract definition for tables from an NHGIS dataset # Use `ds_spec()` to create an NHGIS dataset specification nhgis_extract <- define_extract_nhgis(   description = \"Example NHGIS extract\",   datasets = ds_spec(     \"1990_STF3\",     data_tables = \"NP57\",     geog_levels = c(\"county\", \"tract\")   ) )  nhgis_extract #> Unsubmitted IPUMS NHGIS extract  #> Description: Example NHGIS extract #>  #> Dataset: 1990_STF3 #>   Tables: NP57 #>   Geog Levels: county, tract  # Use `tst_spec()` to create an NHGIS time series table specification define_extract_nhgis(   description = \"Example NHGIS extract\",   time_series_tables = tst_spec(\"CL8\", geog_levels = \"county\"),   tst_layout = \"time_by_row_layout\" ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example NHGIS extract #>  #> Time Series Table: CL8 #>   Geog Levels: county  # To request multiple datasets, provide a list of `ds_spec` objects define_extract_nhgis(   description = \"Extract definition with multiple datasets\",   datasets = list(     ds_spec(\"2014_2018_ACS5a\", \"B01001\", c(\"state\", \"county\")),     ds_spec(\"2015_2019_ACS5a\", \"B01001\", c(\"state\", \"county\"))   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract definition with multiple datasets #>  #> Dataset: 2014_2018_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county #>  #> Dataset: 2015_2019_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county  # If you need to specify the same table or geographic level for # many datasets, you may want to make a set of datasets before defining # your extract request: dataset_names <- c(\"2014_2018_ACS5a\", \"2015_2019_ACS5a\")  dataset_spec <- purrr::map(   dataset_names,   ~ ds_spec(     .x,     data_tables = \"B01001\",     geog_levels = c(\"state\", \"county\")   ) )  define_extract_nhgis(   description = \"Extract definition with multiple datasets\",   datasets = dataset_spec ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract definition with multiple datasets #>  #> Dataset: 2014_2018_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county #>  #> Dataset: 2015_2019_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county  # You can request datasets, time series tables, and shapefiles in the same # definition: define_extract_nhgis(   description = \"Extract with datasets and time series tables\",   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"county\"),   time_series_tables = tst_spec(\"CL6\", \"state\"),   shapefiles = \"us_county_1990_tl2008\" ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract with datasets and time series tables #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: CL6 #>   Geog Levels: state #>  #> Shapefiles: us_county_1990_tl2008  # Extract specifications can be indexed by name names(nhgis_extract$datasets) #> [1] \"1990_STF3\"  nhgis_extract$datasets[[\"1990_STF3\"]] #> $name #> [1] \"1990_STF3\" #>  #> $data_tables #> [1] \"NP57\" #>  #> $geog_levels #> [1] \"county\" \"tract\"  #>  #> attr(,\"class\") #> [1] \"ds_spec\"    \"ipums_spec\" \"list\"        if (FALSE) { # Use the extract definition to submit an extract request to the API submit_extract(nhgis_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Download a completed IPUMS data extract — download_extract","title":"Download a completed IPUMS data extract — download_extract","text":"Download IPUMS data extract via IPUMS API write disk. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download a completed IPUMS data extract — download_extract","text":"","code":"download_extract(   extract,   download_dir = getwd(),   overwrite = FALSE,   progress = TRUE,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download a completed IPUMS data extract — download_extract","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). download_dir Path directory files written. Defaults current working directory. overwrite TRUE, overwrite conflicting files already exist download_dir. Defaults FALSE. progress TRUE, output progress bar showing status download request. Defaults TRUE. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download a completed IPUMS data extract — download_extract","text":"path(s) files required read data requested extract, invisibly. NHGIS, paths named either \"data\" (tabular data files) \"shape\" (spatial data files) indicate type data file contains.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download a completed IPUMS data extract — download_extract","text":"NHGIS extracts, data files GIS files (shapefiles) saved separate .zip archives. download_extract() return character vector including file paths downloaded files. microdata extracts, file path downloaded .xml DDI file returned, sufficient reading data provided associated .gz data file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download a completed IPUMS data extract — download_extract","text":"","code":"usa_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(usa_extract)  downloadable_extract <- wait_for_extract(submitted_extract)  # For microdata, the path to the DDI .xml codebook file is provided. usa_xml_file <- download_extract(downloadable_extract)  # Load with a `read_ipums_micro_*()` function usa_data <- read_ipums_micro(usa_xml_file)  # You can also download previous extracts with their collection and number: nhgis_files <- download_extract(\"nhgis:1\")  # NHGIS extracts return a path to both the tabular and spatial data files, # as applicable. nhgis_data <- read_nhgis(data = nhgis_files[\"data\"])  # Load NHGIS spatial data nhgis_geog <- read_ipums_sf(data = nhgis_files[\"shape\"]) }"},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"Provide specifications individual datasets time series tables defining IPUMS NHGIS extract request. Use get_metadata_nhgis() identify available values dataset time series table specification parameters. Learn NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"","code":"ds_spec(   name,   data_tables = NULL,   geog_levels = NULL,   years = NULL,   breakdown_values = NULL )  tst_spec(name, geog_levels = NULL, years = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"name Name dataset time series table. data_tables Vector summary tables retrieve given dataset. geog_levels Geographic levels (e.g. \"county\" \"state\") obtain data given dataset time series table. years Years obtain data given dataset time series table. time series tables, years selected default. datasets, use \"*\" select available years. Use get_metadata_nhgis() determine dataset allows year selection. breakdown_values Breakdown values apply given dataset.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"ds_spec tst_spec object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"general, data_tables geog_levels required dataset specifications, geog_levels required time series table specifications. However, possible make temporary specification incomplete dataset time series table omitting values. supports syntax used modifying existing extract (see add_to_extract() remove_from_extract()).","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"","code":"dataset <- ds_spec(   \"2013_2017_ACS5a\",   data_tables = c(\"B00001\", \"B01002\"),   geog_levels = \"state\" )  tst <- tst_spec(   \"CW5\",   geog_levels = c(\"county\", \"tract\"),   years = \"1990\" )  # Use variable specifications in an extract definition: define_extract_nhgis(   description = \"Example extract\",   datasets = dataset,   time_series_tables = tst ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example extract #>  #> Dataset: 2013_2017_ACS5a #>   Tables: B00001, B01002 #>   Geog Levels: state #>  #> Time Series Table: CW5 #>   Geog Levels: county, tract #>   Years: 1990"},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"Convert tibble extract definition specifications list ipums_extract objects vice versa. Support tabular extract history deprecated. Instead, use get_extract_history() obtain definitions recently-submitted extracts list. identify past extracts meet certain criteria, iterate list.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"","code":"extract_tbl_to_list(extract_tbl, validate = TRUE)  extract_list_to_tbl(extract_list)"},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"extract_tbl tibble (data.frame) containing specifications one ipums_extract objects. validate Logical value indicating whether check output ipums_extract objects contains valid complete extract definition. Defaults TRUE. extract_list list ipums_extract objects single ipums_extract object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"extract_tbl_to_list(), list length equal number extracts represented extract_tbl. extract_list_to_tbl(), tibble representing specifications extract requests represented extract_list.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":null,"dir":"Reference","previous_headings":"","what":"Browse definitions of previously submitted extract requests — get_extract_history","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"Retrieve definitions arbitrary number previously submitted extract requests given IPUMS collection, starting recent extract request. check status particular extract request, use get_extract_info(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"","code":"get_extract_history(   collection = NULL,   how_many = 10,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"collection Character string IPUMS collection retrieve extract history. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections(). how_many number extract requests retrieve information. Defaults 10 recent extracts. delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve extract history without exceeding API rate limit. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"list ipums_extract objects","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"","code":"if (FALSE) { # Get information for most recent extract requests. # By default gets the most recent 10 extracts get_extract_history(\"usa\")  # Return only the most recent 3 extract definitions get_extract_history(\"cps\", how_many = 3)  # To get the most recent extract (for instance, if you have forgotten its # extract number), use `get_last_extract_info()` get_last_extract_info(\"nhgis\") }  # To browse your extract history by particular criteria, you can # loop through the extract objects. We'll create a sample list of 2 extracts: extract1 <- define_extract_usa(   description = \"2013 ACS\",   samples = \"us2013a\",   variables = var_spec(     \"SEX\",     case_selections = \"2\",     data_quality_flags = TRUE   ) )  extract2 <- define_extract_usa(   description = \"2014 ACS\",   samples = \"us2014a\",   variables = list(     var_spec(\"RACE\"),     var_spec(       \"SEX\",       case_selections = \"1\",       data_quality_flags = FALSE     )   ) )  extracts <- list(extract1, extract2)  # `purrr::keep()`` is particularly useful for filtering: purrr::keep(extracts, ~ \"RACE\" %in% names(.x$variables)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2014 ACS #>  #> Samples: (1 total) us2014a #> Variables: (2 total) RACE, SEX #>   purrr::keep(extracts, ~ grepl(\"2014 ACS\", .x$description)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2014 ACS #>  #> Samples: (1 total) us2014a #> Variables: (2 total) RACE, SEX #>   # You can also filter on variable-specific criteria purrr::keep(extracts, ~ isTRUE(.x$variables[[\"SEX\"]]$data_quality_flags)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX #>   # To filter based on all variables in an extract, you'll need to # create a nested loop. For instance, to find all extracts that have # any variables with data_quality_flags: purrr::keep(   extracts,   function(extract) {     any(purrr::map_lgl(       names(extract$variables),       function(var) isTRUE(extract$variables[[var]]$data_quality_flags)     ))   } ) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX #>   # To peruse your extract history without filtering, `purrr::map()` is more # useful purrr::map(extracts, ~ names(.x$variables)) #> [[1]] #> [1] \"SEX\" #>  #> [[2]] #> [1] \"RACE\" \"SEX\"  #>   purrr::map(extracts, ~ names(.x$samples)) #> [[1]] #> [1] \"us2013a\" #>  #> [[2]] #> [1] \"us2014a\" #>   purrr::map(extracts, ~ .x$variables[[\"RACE\"]]$case_selections) #> [[1]] #> NULL #>  #> [[2]] #> NULL #>   # Once you have identified a past extract, you can easily download or # resubmit it if (FALSE) { extracts <- get_extract_history(\"nhgis\")  extract <- purrr::keep(   extracts,   ~ \"CW3\" %in% names(.x$time_series_tables) )  download_extract(extract[[1]]) }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve the definition and latest status of an extract request — get_extract_info","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"Retrieve latest status extract request. get_last_extract_info() convenience function retrieve recent extract given collection. browse definitions previously submitted extract requests, see get_extract_history(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"","code":"get_extract_info(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))  get_last_extract_info(collection = NULL, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key(). collection Character string IPUMS collection retrieve extract history. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"ipums_extract object.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"","code":"my_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(my_extract)  # Get latest info for the request associated with a given `ipums_extract` # object: updated_extract <- get_extract_info(submitted_extract)  updated_extract$status  # Or specify the extract collection and number: get_extract_info(\"usa:1\") get_extract_info(c(\"usa\", 1))  # If you have a default collection, you can use the extract number alone: set_ipums_default_collection(\"nhgis\") get_extract_info(1)  # To get the most recent extract (for instance, if you have forgotten its # extract number), use `get_last_extract_info()` get_last_extract_info(\"nhgis\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"Retrieve information available NHGIS data sources, including datasets, data tables (summary tables), time series tables, shapefiles (GIS files). retrieve summary metadata available data sources particular type, use type argument. retrieve detailed metadata single data source, use dataset, data_table, time_series_table argument. See metadata availability section information metadata provided data type. general information, see NHGIS data source overview FAQ. Learn IPUMS API vignette(\"ipums-api\") NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"","code":"get_metadata_nhgis(   type = NULL,   dataset = NULL,   data_table = NULL,   time_series_table = NULL,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"type One \"datasets\", \"data_tables\", \"time_series_tables\", \"shapefiles\" indicating type summary metadata retrieve. Leave NULL requesting metadata single dataset, data_table, time_series_table. dataset Name individual dataset retrieve metadata. data_table Name individual data table retrieve metadata. provided, associated dataset must also specified. time_series_table Name individual time series table retrieve metadata. delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve metadata records without exceeding API rate limit. used type provided. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"type provided, tibble summary metadata data sources provided type. Otherwise, named list metadata specified dataset, data_table, time_series_table.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"metadata-availability","dir":"Reference","previous_headings":"","what":"Metadata availability","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"following sections summarize metadata fields provided data type. Summary metadata include subset fields provided individual data sources.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"datasets-","dir":"Reference","previous_headings":"","what":"Datasets:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier dataset. value used refer dataset interacting IPUMS API. group: group datasets dataset belongs. instance, 5 separate datasets part \"2015 American Community Survey\" group. description: short description dataset. sequence: Order dataset appear metadata API extracts. has_multiple_data_types: Logical value indicating whether multiple data types exist dataset. example, ACS datasets include estimates margins error. data_tables: tibble containing names, codes, descriptions data tables available dataset. geog_levels: tibble containing names, descriptions, extent information geographic levels available dataset. has_geog_extent_selection field contains logical values indicating whether extent selection allowed (required) associated geographic level. See geographic_instances . breakdowns: tibble containing names, types, descriptions, breakdown values breakdowns available dataset. years: vector years dataset available. field present dataset available multiple years. Note ACS datasets considered available multiple years. geographic_instances: tibble containing names descriptions valid geographic extents dataset. field present least one dataset's geog_levels allows geographic extent selection.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"data-tables-","dir":"Reference","previous_headings":"","what":"Data tables:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier data table within dataset. value used refer data table interacting IPUMS API. description: short description data table. universe: statistical population measured data table (e.g. persons, families, occupied housing units, etc.) nhgis_code: code identifying data table extract. Variables extract data include column names prefixed code. sequence: Order data table appear metadata API extracts. dataset_name: Name dataset data table belongs. n_variables: Number variables included data table. variables: tibble containing variable descriptions codes variables included data table","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"time-series-tables-","dir":"Reference","previous_headings":"","what":"Time series tables:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier time series table. value used refer time series table interacting IPUMS API. description: short description time series table. geographic_integration: method time series table aligns geographic units across time. \"Nominal\" integration indicates geographic units aligned name (disregarding changes unit boundaries). \"Standardized\" integration indicates data multiple time points standardized indicated year's census units. information, click . sequence: Order time series table appear metadata API extracts. time_series: tibble containing names descriptions individual time series available time series table. years: tibble containing information available data years time series table. geog_levels: tibble containing names descriptions geographic levels available time series table.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"shapefiles-","dir":"Reference","previous_headings":"","what":"Shapefiles:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier shapefile. value used refer shapefile interacting IPUMS API. year: survey year shapefile's represented areas used tabulations, may different vintage represented areas. information, click . geographic_level: geographic level shapefile. extent: geographic extent covered shapefile. basis: derivation source shapefile. sequence: Order shapefile appear metadata API extracts.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"","code":"if (FALSE) { library(dplyr)  # Get summary metadata for all available sources of a given data type get_metadata_nhgis(\"datasets\")  # Filter to identify data sources of interest by their metadata values all_tsts <- get_metadata_nhgis(\"time_series_tables\")  tsts <- all_tsts %>%   filter(     grepl(\"Children\", description),     grepl(\"Families\", description),     geographic_integration == \"Standardized to 2010\"   )  tsts$name  # Get detailed metadata for a single source with its associated argument: cs5_meta <- get_metadata_nhgis(time_series_table = \"CS5\") cs5_meta$geog_levels  # Use the available values when defining an NHGIS extract request define_extract_nhgis(   time_series_tables = tst_spec(\"CS5\", geog_levels = \"state\") )  # Detailed metadata is also provided for datasets and data tables get_metadata_nhgis(dataset = \"1990_STF1\") get_metadata_nhgis(data_table = \"NP1\", dataset = \"1990_STF1\")  # Iterate over data sources to retrieve detailed metadata for several # records. For instance, to get variable metadata for a set of data tables: tables <- c(\"NP1\", \"NP2\", \"NP10\")  var_meta <- purrr::map(   tables,   function(dt) {     dt_meta <- get_metadata_nhgis(dataset = \"1990_STF1\", data_table = dt)      # This ensures you avoid hitting rate limit for large numbers of tables     Sys.sleep(1)      dt_meta$variables   } ) }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_recent_extracts_info_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Get information on recent extracts — get_recent_extracts_info_list","title":"Get information on recent extracts — get_recent_extracts_info_list","text":"Functionality accessing recent extract information moved get_extract_history(). Please use function instead. Additionally, support tabular extract history deprecated. identify past extracts meet certain criteria, iterate list provided get_extract_history().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_recent_extracts_info_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get information on recent extracts — get_recent_extracts_info_list","text":"","code":"get_recent_extracts_info_list(   collection = NULL,   how_many = 10,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )  get_recent_extracts_info_tbl(   collection = NULL,   how_many = 10,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":null,"dir":"Reference","previous_headings":"","what":"List available samples for IPUMS microdata collections — get_sample_info","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"Retrieve sample IDs descriptions IPUMS microdata collections. Currently supported microdata collections : IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"","code":"get_sample_info(   collection = NULL,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"collection Character string IPUMS collection retrieve sample IDs. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections(). delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve sample IDs without exceeding API rate limit. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"tibble containing sample IDs descriptions indicated collection.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"","code":"if (FALSE) { get_sample_info(\"usa\") get_sample_info(\"cps\") get_sample_info(\"ipumsi\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"Analogous dplyr::bind_rows(), preserves labelled attributes provided IPUMS data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"","code":"ipums_bind_rows(..., .id = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"... Data frames tibbles combine. argument can data frame list data frames. binding, columns matched name. Missing columns filled NA. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"Returns type first input. Either data.frame, tbl_df, grouped_df","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"","code":"file <- ipums_example(\"nhgis0712_csv.zip\")  d1 <- read_nhgis(   file,   file_select = 1,   verbose = FALSE )  d2 <- read_nhgis(   file,   file_select = 2,   verbose = FALSE )  # Variables have associated label attributes: ipums_var_label(d1$PMSAA) #> [1] \"Primary Metropolitan Statistical Area Code\"  # Preserve labels when binding data sources: d <- ipums_bind_rows(d1, d2) ipums_var_label(d$PMSAA) #> [1] \"Primary Metropolitan Statistical Area Code\"  # dplyr `bind_rows()` drops labels: d <- dplyr::bind_rows(d1, d2) ipums_var_label(d$PMSAA) #> [1] NA"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":null,"dir":"Reference","previous_headings":"","what":"Callback classes — ipums_callback","title":"Callback classes — ipums_callback","text":"classes used define callback behaviors use read_ipums_micro_chunked(). based callback classes readr, adapted include handling implicit decimal values variable/value labeling use IPUMS microdata extracts.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Callback classes — ipums_callback","text":"IpumsSideEffectCallback Callback function used side effects, results returned. Initialize function takes 2 arguments. first argument (x) correspond data chunk second (pos) correspond position first observation chunk. function returns FALSE, chunks read. IpumsDataFrameCallback Callback function combines results chunk single output data.frame (similar) object. Initialize way IpumsSideEffectCallback. provided function return object inherits data.frame. results application callback function added output data.frame. IpumsListCallback Callback function returns list, element contains result single chunk. Initialize IpumsSideEffectCallback. IpumsBiglmCallback Callback function performs linear regression dataset chunks using biglm package. Initialize function takes 2 arguments: first argument correspond formula specifying regression model. second correspond function prepares data running regression analysis. function follows conventions functions used callbacks. additional arguments passed function passed biglm. IpumsChunkCallback (Advanced) Callback interface definition. callback functions IPUMS data inherit class, use private method ipumsify data handle implicit decimals value labels.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> IpumsChunkCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() hipread::ChunkCallback$finally() hipread::ChunkCallback$initialize() hipread::ChunkCallback$receive() hipread::ChunkCallback$result()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsChunkCallback$set_ipums_fields() IpumsChunkCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsChunkCallback$set_ipums_fields(   data_structure,   ddi,   var_attrs,   rt_ddi = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone-","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsChunkCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-1","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsSideEffectCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-1","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$finally() hipread::ChunkCallback$result() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-1","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsSideEffectCallback$new() IpumsSideEffectCallback$receive() IpumsSideEffectCallback$continue() IpumsSideEffectCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$continue()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--1","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-2","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsDataFrameCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-2","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-2","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsDataFrameCallback$new() IpumsDataFrameCallback$receive() IpumsDataFrameCallback$result() IpumsDataFrameCallback$finally() IpumsDataFrameCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-8","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$result()"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-9","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$finally()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--2","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-10","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-3","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsListCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-3","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-3","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsListCallback$new() IpumsListCallback$receive() IpumsListCallback$result() IpumsListCallback$finally() IpumsListCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-11","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-12","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-13","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$result()"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-14","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$finally()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--3","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-15","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-4","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsBiglmCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-4","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() hipread::ChunkCallback$finally() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-4","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsBiglmCallback$new() IpumsBiglmCallback$receive() IpumsBiglmCallback$result() IpumsBiglmCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-16","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$new(model, prep = function(x, pos) x, ...)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-17","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-18","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$result()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--4","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-19","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":null,"dir":"Reference","previous_headings":"","what":"Collect data into R session with IPUMS attributes — ipums_collect","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"Convenience wrapper around dplyr's collect() set_ipums_var_attributes(). Use attach variable labels collecting data database.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"","code":"ipums_collect(data, ddi, var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"data dplyr tbl object (generally tbl_lazy object stored database). ddi ipums_ddi object created read_ipums_ddi(). var_attrs Variable attributes add output. Defaults available attributes. See set_ipums_var_attributes() details.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"local tibble requested attributes attached.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":null,"dir":"Reference","previous_headings":"","what":"List IPUMS data collections — ipums_data_collections","title":"List IPUMS data collections — ipums_data_collections","text":"List IPUMS data collections corresponding codes used IPUMS API. Note data collections yet API support. Currently, ipumsr supports extract definitions following collections: IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") IPUMS NHGIS (\"nhgis\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List IPUMS data collections — ipums_data_collections","text":"","code":"ipums_data_collections()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List IPUMS data collections — ipums_data_collections","text":"tibble four columns containing full collection name, type data collection provides, collection code used IPUMS API, status API support collection.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List IPUMS data collections — ipums_data_collections","text":"","code":"ipums_data_collections() #> # A tibble: 14 × 4 #>    collection_name     collection_type code_for_api api_support #>    <chr>               <chr>           <chr>        <lgl>       #>  1 IPUMS USA           microdata       usa          TRUE        #>  2 IPUMS CPS           microdata       cps          TRUE        #>  3 IPUMS International microdata       ipumsi       TRUE        #>  4 IPUMS NHGIS         aggregate data  nhgis        TRUE        #>  5 IPUMS IHGIS         aggregate data  ihgis        FALSE       #>  6 IPUMS ATUS          microdata       atus         FALSE       #>  7 IPUMS AHTUS         microdata       ahtus        FALSE       #>  8 IPUMS MTUS          microdata       mtus         FALSE       #>  9 IPUMS DHS           microdata       dhs          FALSE       #> 10 IPUMS PMA           microdata       pma          FALSE       #> 11 IPUMS MICS          microdata       mics         FALSE       #> 12 IPUMS NHIS          microdata       nhis         FALSE       #> 13 IPUMS MEPS          microdata       meps         FALSE       #> 14 IPUMS Higher Ed     microdata       highered     FALSE"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":null,"dir":"Reference","previous_headings":"","what":"ipums_ddi class — ipums_ddi-class","title":"ipums_ddi class — ipums_ddi-class","text":"ipums_ddi class provides data structure storing metadata information contained IPUMS codebook files. objects primarily used loading IPUMS data, can also used explore metadata IPUMS extract. microdata projects, information provided DDI codebook (.xml) files. NHGIS, information provided .txt codebook files. codebook file contains metadata extract files , including file name, file path, extract date well information variables present data, including variable names, descriptions, data types, implied decimals, positions fixed-width files. information used correctly parse IPUMS fixed-width files attach additional variable metadata data upon load. Note codebook metadata NHGIS extracts can also stored ipums_ddi object, even though codebooks distributed .txt files, .xml files. files adhere standards DDI codebook files, ipums_ddi fields left blank reading NHGIS codebooks.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"creating-an-ipums-ddi-object","dir":"Reference","previous_headings":"","what":"Creating an ipums_ddi object","title":"ipums_ddi class — ipums_ddi-class","text":"create ipums_ddi object IPUMS microdata extract, use read_ipums_ddi(). create ipums_ddi object IPUMS NHGIS extract, use read_nhgis_codebook()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"loading-data","dir":"Reference","previous_headings":"","what":"Loading data","title":"ipums_ddi class — ipums_ddi-class","text":"load data associated ipums_ddi object, use read_ipums_micro(), read_ipums_micro_chunked(), read_ipums_micro_yield()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"view-metadata","dir":"Reference","previous_headings":"","what":"View metadata","title":"ipums_ddi class — ipums_ddi-class","text":"Use ipums_var_info() explore variable-level metadata variables included dataset. Use ipums_file_info() explore file-level metadata extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":null,"dir":"Reference","previous_headings":"","what":"Get path to IPUMS example datasets — ipums_example","title":"Get path to IPUMS example datasets — ipums_example","text":"Construct file path example extracts included ipumsr. data used package examples can used experiment ipumsr functionality.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get path to IPUMS example datasets — ipums_example","text":"","code":"ipums_example(path = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get path to IPUMS example datasets — ipums_example","text":"path Name file. NULL, available example files listed.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get path to IPUMS example datasets — ipums_example","text":"path specific example file vector available files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get path to IPUMS example datasets — ipums_example","text":"","code":"# List all available example files ipums_example() #>  [1] \"cps_00097.dat.gz\"          \"cps_00097.xml\"             #>  [3] \"cps_00157.dat.gz\"          \"cps_00157.xml\"             #>  [5] \"cps_00158.csv.gz\"          \"cps_00158.xml\"             #>  [7] \"cps_00159.dat.gz\"          \"cps_00159.xml\"             #>  [9] \"cps_00160.dat.gz\"          \"cps_00160.xml\"             #> [11] \"nhgis0712_csv.zip\"         \"nhgis0712_shape_small.zip\" #> [13] \"nhgis0730_fixed.zip\"       \"nhgis0731_csv.zip\"         #> [15] \"nhgis0972_csv.zip\"         \"nhgis0972_shape_small.zip\"  # Get path to a specific example file file <- ipums_example(\"cps_00157.xml\")  read_ipums_micro(file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> # A tibble: 7,668 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883         #>  2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800         #>  3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missi… #>  4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015         #>  5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552         #>  6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375         #>  7  1962    107 3 [March]   4355. 19 [Iowa]           1  4355. 999999999 [N.I.U… #>  8  1962    107 3 [March]   4355. 19 [Iowa]           2  1386.         0         #>  9  1962    107 3 [March]   4355. 19 [Iowa]           3  1629.       600         #> 10  1962    107 3 [March]   4355. 19 [Iowa]           4  1432. 999999999 [N.I.U… #> # ℹ 7,658 more rows"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":null,"dir":"Reference","previous_headings":"","what":"ipums_extract class — ipums_extract-class","title":"ipums_extract class — ipums_extract-class","text":"ipums_extract class provides data structure storing extract definition status IPUMS data extract request. submitted unsubmitted extract requests stored ipums_extract objects. ipums_extract objects divided microdata aggregate data classes, also include collection-specific extract subclass accommodate differences extract options content across collections. Currently supported collections : IPUMS microdata (\"micro_extract\") IPUMS USA (\"usa_extract\") IPUMS CPS (\"cps_extract\") IPUMS International (\"ipumsi_extract\") IPUMS aggregate data (\"agg_extract\") IPUMS NHGIS (\"nhgis_extract\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"properties","dir":"Reference","previous_headings":"","what":"Properties","title":"ipums_extract class — ipums_extract-class","text":"Objects class ipums_extract : class attribute form c(\"{collection}_extract\", \"{collection_type}_extract\", \"ipums_extract\"). instance, c(\"cps_extract\", \"micro_extract\", \"ipums_extract\"). base type \"list\". names attribute character vector length underlying list. ipums_extract objects include several core fields identifying extract status: collection: collection extract request. description: description extract request. submitted: logical indicating whether extract request submitted IPUMS API processing. download_links: links downloadable data, extract request completed time last checked. number: number extract request. collection, uniquely identifies extract request given user. status: status extract request time last checked. One \"unsubmitted\", \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\".","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"creating-an-extract","dir":"Reference","previous_headings":"","what":"Creating an extract","title":"ipums_extract class — ipums_extract-class","text":"Create ipums_extract object scratch appropriate define_extract_*() function. functions take form define_extract_{collection}. Use get_extract_info() get latest status submitted extract request. Use get_extract_history() obtain extract definitions previously-submitted extract requests.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"submitting-an-extract","dir":"Reference","previous_headings":"","what":"Submitting an extract","title":"ipums_extract class — ipums_extract-class","text":"Use submit_extract() submit extract request processing IPUMS API. Use wait_for_extract() periodically check status submitted extract request ready download. Use is_extract_ready() manually check whether submitted extract request ready download.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"downloading-an-extract","dir":"Reference","previous_headings":"","what":"Downloading an extract","title":"ipums_extract class — ipums_extract-class","text":"Download data contained completed extract download_extract().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"saving-an-extract","dir":"Reference","previous_headings":"","what":"Saving an extract","title":"ipums_extract class — ipums_extract-class","text":"Save extract JSON-formatted file save_extract_as_json(). Create ipums_extract object saved JSON-formatted definition define_extract_from_json().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Get file information for an IPUMS extract — ipums_file_info","title":"Get file information for an IPUMS extract — ipums_file_info","text":"Get information IPUMS project, date, notes, conditions, citation requirements extract based ipums_ddi object. ipums_conditions() convenience function provides conditions citation information recently loaded dataset.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get file information for an IPUMS extract — ipums_file_info","text":"","code":"ipums_file_info(object, type = NULL)  ipums_conditions(object = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get file information for an IPUMS extract — ipums_file_info","text":"object ipums_ddi object. ipums_conditions(), leave NULL display conditions recently loaded dataset. type Type file information display. NULL, loads types. Otherwise, one \"ipums_project\", \"extract_date\", \"extract_notes\", \"conditions\" \"citation\".","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get file information for an IPUMS extract — ipums_file_info","text":"ipums_file_info(), type = NULL, named list metadata information. Otherwise, string containing requested information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get file information for an IPUMS extract — ipums_file_info","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  ipums_file_info(ddi) #> $ipums_project #> [1] \"IPUMS CPS\" #>  #> $extract_date #> [1] \"2023-07-10\" #>  #> $extract_notes #> [1] \"User-provided description:  Reproducing cps00006\" #>  #> $conditions #> [1] \"Users of IPUMS-CPS data must agree to abide by the conditions of use. A user's license is valid for one year and may be renewed.  Users must agree to the following conditions:\\n\\n(1) No fees may be charged for use or distribution of the data.  All persons are granted a limited license to use these data, but you may not charge a fee for the data if you distribute it to others.\\n\\n(2) Cite IPUMS appropriately.  For information on proper citation,  refer to the citation requirement section of this DDI document.\\n\\n(3) Tell us about any work you do using the IPUMS.  Publications, research  reports, or presentations making use of IPUMS-CPS should be added to our  Bibliography. Continued funding for the IPUMS depends on our ability to  show our sponsor agencies that researchers are using the data for productive  purposes.\\n\\n(4) Use it for GOOD -- never for EVIL.\" #>  #> $citation #> [1] \"Publications and research reports based on the IPUMS-CPS database must cite it appropriately. The citation should include the following:\\n\\nSarah Flood, Miriam King, Renae Rodgers, Steven Ruggles, J. Robert Warren and Michael Westberry. Integrated Public Use Microdata Series, Current Population Survey: Version 10.0 [dataset]. Minneapolis, MN: IPUMS, 2022. https://doi.org/10.18128/D030.V10.0\\n\\nThe licensing agreement for use of IPUMS-CPS data requires that users supply us with the title and full citation for any publications, research reports, or educational materials making use of the data or documentation. Please add your citation to the IPUMS bibliography: http://bibliography.ipums.org/\" #>"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":null,"dir":"Reference","previous_headings":"","what":"List files contained within a zipped IPUMS extract — ipums_list_files","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"Identify files can read IPUMS extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"","code":"ipums_list_files(   file,   file_select = NULL,   types = NULL,   data_layer = deprecated(),   shape_layer = deprecated(),   raster_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"file Path .zip archive containing IPUMS extract examined. file_select path file contains multiple files, tidyselect selection identifying files included output. files match provided expression included. less useful, can also provided string specifying exact file name integer match files index position. types One \"data\" \"shape\" indicating type files include output. \"data\" refers tabular data sources, \"shape\" refers spatial data sources. use \"raster\" deprecated removed future release. data_layer, shape_layer, raster_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"tibble containing types names available files.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"","code":"nhgis_file <- ipums_example(\"nhgis0712_csv.zip\")  # 2 available files in this extract ipums_list_files(nhgis_file) #> # A tibble: 2 × 2 #>   type  file                                        #>   <chr> <chr>                                       #> 1 data  nhgis0712_csv/nhgis0712_ds135_1990_pmsa.csv #> 2 data  nhgis0712_csv/nhgis0712_ds136_1990_pmsa.csv  # Look for files that match a particular pattern: ipums_list_files(nhgis_file, file_select = matches(\"ds136\")) #> # A tibble: 1 × 2 #>   type  file                                        #>   <chr> <chr>                                       #> 1 data  nhgis0712_csv/nhgis0712_ds136_1990_pmsa.csv"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Join tabular data to geographic boundaries — ipums_shape_join","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"functions analogous dplyr's joins, except : operate data frame sf object retain variable attributes provided IPUMS files loaded ipumsr data-reading functions handle minor incompatibilities attributes spatial tabular data emerge IPUMS files","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"","code":"ipums_shape_left_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_right_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_inner_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_full_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"data tibble data frame. Typically, contain data aggregated specific geographic level. shape_data sf object loaded read_ipums_sf(). Character vector variables join . See dplyr::left_join() syntax. suffix non-joined duplicate variables two data sources, suffixes added output disambiguate . character vector length 2. Defaults adding \"SHAPE\" suffix duplicated variables shape_file. verbose TRUE, display information geometries unmatched join.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"sf object containing joined data","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"","code":"data <- read_nhgis(   ipums_example(\"nhgis0972_csv.zip\"),   verbose = FALSE )  sf_data <- read_ipums_sf(ipums_example(\"nhgis0972_shape_small.zip\")) joined_data <- ipums_shape_inner_join(data, sf_data, by = \"GISJOIN\")  colnames(joined_data) #>  [1] \"GISJOIN\"    \"YEAR\"       \"STUSAB\"     \"CMSA\"       \"DIVISIONA\"  #>  [6] \"MSA_CMSAA\"  \"PMSA\"       \"PMSAA\"      \"REGIONA\"    \"STATEA\"     #> [11] \"AREALAND\"   \"AREAWAT\"    \"ANPSADPI\"   \"FUNCSTAT\"   \"INTPTLAT\"   #> [16] \"INTPTLNG\"   \"PSADC\"      \"D6Z001\"     \"D6Z002\"     \"D6Z003\"     #> [21] \"D6Z004\"     \"D6Z005\"     \"D6Z006\"     \"D6Z007\"     \"D6Z008\"     #> [26] \"PMSASHAPE\"  \"MSACMSA\"    \"ALTCMSA\"    \"GISJOIN2\"   \"SHAPE_AREA\" #> [31] \"SHAPE_LEN\"  \"GISJOIN3\"   \"geometry\""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Get contextual information about variables in an IPUMS data source — ipums_var_info","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"Summarize variable metadata variables found ipums_ddi object data frame. Provides descriptions variable content (var_label var_desc) well labels particular values variable (val_labels). ipums_var_info() produces tibble summary multiple variables . ipums_var_label(), ipums_var_desc(), ipums_val_labels() provide specific metadata single variable.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"","code":"ipums_var_info(object, vars = NULL)  ipums_var_label(object, var = NULL)  ipums_var_desc(object, var = NULL)  ipums_val_labels(object, var = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"object ipums_ddi object, data frame containing variable metadata (produced ipumsr data-reading functions), haven::labelled() vector single column data frame. vars, var tidyselect selection identifying variable(s) include output. ipums_var_info() allows selection multiple variables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"ipums_var_info(), tibble containing variable information. Otherwise, length-1 character vector requested variable information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"ipums_var_info(), provided object haven::labelled() vector (.e. single column data frame), summary output include variable label, variable description, value labels, applicable. data frame, information provided variables present data indicated vars. ipums_ddi object, summary also include information used reading data disk, including start/end positions columns fixed-width file, implied decimals, variable types. Providing ipums_ddi object robust way access variable metadata, many data processing operations remove attributes data frame-like objects.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  # Info for all variables in a data source ipums_var_info(ddi) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  # Metadata for individual variables ipums_var_desc(ddi, MONTH) #> [1] \"MONTH indicates the calendar month of the CPS interview.\"  ipums_var_label(ddi, MONTH) #> [1] \"Month\"  ipums_val_labels(ddi, MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <dbl> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December   # NHGIS also supports variable-level metadata, though many fields # are not relevant and remain blank: cb <- read_nhgis_codebook(ipums_example(\"nhgis0972_csv.zip\"))  ipums_var_info(cb) #> # A tibble: 25 × 10 #>    var_name  var_label      var_desc val_labels code_instr start end   imp_decim #>    <chr>     <chr>          <chr>    <list>     <chr>      <lgl> <lgl>     <dbl> #>  1 GISJOIN   GIS Join Matc… \"\"       <tibble>   \"\"         NA    NA            0 #>  2 YEAR      Data File Year \"\"       <tibble>   \"\"         NA    NA            0 #>  3 STUSAB    State/US Abbr… \"\"       <tibble>   \"\"         NA    NA            0 #>  4 CMSA      Consolidated … \"\"       <tibble>   \"\"         NA    NA            0 #>  5 DIVISIONA Division Code  \"\"       <tibble>   \"\"         NA    NA            0 #>  6 MSA_CMSAA Metropolitan … \"\"       <tibble>   \"\"         NA    NA            0 #>  7 PMSA      Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  8 PMSAA     Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  9 REGIONA   Region Code    \"\"       <tibble>   \"\"         NA    NA            0 #> 10 STATEA    State Code     \"\"       <tibble>   \"\"         NA    NA            0 #> # ℹ 15 more rows #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":null,"dir":"Reference","previous_headings":"","what":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"given ipums_ddi object data frame, display metadata contents RStudio viewer pane. includes extract-level information well metadata variables included input object. also possible save output external HTML file without launching RStudio viewer.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"","code":"ipums_view(x, out_file = NULL, launch = TRUE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"x ipums_ddi object data frame IPUMS attributes attached. Note file-level information (e.g. extract notes) available x ipums_ddi object. out_file Optional location save output HTML file. NULL, makes temporary file. launch Logical indicating whether launch HTML file RStudio viewer pane. TRUE, RStudio rstudioapi must available.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"file path output HTML file (invisibly, launch = TRUE)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"ipums_view() requires htmltools, shiny, DT packages installed. launch = TRUE, RStudio rstudioapi package must also available. Note launch = FALSE out_file unspecified, output file written temporary directory. operating systems may unable open HTML file temporary directory; suggest manually specify out_file location case.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  if (FALSE) { ipums_view(ddi) ipums_view(ddi, \"codebook.html\", launch = FALSE) }"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":null,"dir":"Reference","previous_headings":"","what":"Launch a browser window to an IPUMS metadata page — ipums_website","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"Launch documentation webpage given IPUMS project variable. project can provided form ipums_ddi object can manually specified. provides access extensive variable metadata may contained within ipums_ddi object . Note IPUMS projects (e.g. IPUMS NHGIS) variable-specific pages. cases, ipums_website() launch project's main data selection page.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"","code":"ipums_website(   x,   var = NULL,   launch = TRUE,   verbose = TRUE,   homepage_if_missing = FALSE,   project = deprecated(),   var_label = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"x ipums_ddi object name IPUMS project. See ipums_data_collections() supported projects. var Name variable load. NULL, provides URL project's main data selection site. launch TRUE, launch browser window metadata webpage. Otherwise, return URL webpage. verbose TRUE, produce warnings invalid URL specifications detected. homepage_if_missing TRUE, return IPUMS homepage IPUMS project x recognized. project Please use x instead. var_label Variable label provided var. typically obtained input ipums_ddi object unlikely needed.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"URL IPUMS webpage indicated project variable (invisibly launch = TRUE)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"launch = TRUE, need valid registration specified project successfully launch webpage. IPUMS variables found webpages exactly match variable names included completed extract files (ipums_ddi objects). Therefore, may projects variables ipums_website() launch page different variable invalid page.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  if (FALSE) { # Launch webpage for particular variable ipums_website(ddi, \"MONTH\") }  # Can also specify an IPUMS project instead of an `ipums_ddi` object ipums_website(\"IPUMS CPS\", var = \"RECTYPE\", launch = FALSE) #> [1] \"https://cps.ipums.org/cps-action/variables/RECTYPE\"  # Shorthand project names from `ipums_data_collections()` are also accepted: ipums_website(\"ipumsi\", var = \"YEAR\", launch = FALSE) #> [1] \"https://international.ipums.org/international-action/variables/YEAR\""},{"path":"http://tech.popdata.org/ipumsr/reference/ipumsr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","title":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","text":"easy way work census, survey, geographic data provided IPUMS R. Generate download data IPUMS API load IPUMS files R associated metadata make analysis easier. IPUMS data describing 1.4 billion individuals drawn 750 censuses surveys available free charge IPUMS website https://www.ipums.org.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipumsr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","text":"Maintainer: Derek Burk ipums+cran@umn.edu Authors: Greg Freedman Ellis Finn Roberts contributors: Joe Grover [contributor] Dan Ehrlich [contributor] Renae Rodgers [contributor] Institute Social Research Data Innovation ipums@umn.edu [copyright holder]","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":null,"dir":"Reference","previous_headings":"","what":"Report on observations dropped during a join — join_failures","title":"Report on observations dropped during a join — join_failures","text":"Helper display observations matched joining tabular spatial data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Report on observations dropped during a join — join_failures","text":"","code":"join_failures(join_results)"},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Report on observations dropped during a join — join_failures","text":"join_results data frame just created ipums shape join.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Report on observations dropped during a join — join_failures","text":"list data frames, first element (shape) includes observations dropped shapefile second (data) includes observations dropped data file.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a label placeholder object — lbl","title":"Make a label placeholder object — lbl","text":"Define new label/value pair. use functions like lbl_relabel() lbl_add().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a label placeholder object — lbl","text":"","code":"lbl(...)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a label placeholder object — lbl","text":"... Either one two arguments specifying label (.lbl) value (.val) use new label pair. arguments named, must named .val /.lbl. single unnamed value passed, used .lbl new label. two unnamed values passed, used .val .lbl, respectively.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a label placeholder object — lbl","text":"label_placeholder object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Make a label placeholder object — lbl","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make a label placeholder object — lbl","text":"","code":"# Label placeholder with no associated value lbl(\"New label\") #> $.val #> NULL #>  #> $.lbl #> [1] \"New label\" #>  #> attr(,\"class\") #> [1] \"lbl_placeholder\"  # Label placeholder with a value/label pair lbl(10, \"New label\") #> $.val #> [1] 10 #>  #> $.lbl #> [1] \"New label\" #>  #> attr(,\"class\") #> [1] \"lbl_placeholder\"  # Use placeholders as inputs to other label handlers x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  x <- lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") )  lbl_relabel(x, lbl(9999, \"Missing\") ~ .val > 900) #> <labelled<double>[6]> #> [1]  100  200  105 9999 9999  230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>   9999 Missing"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":null,"dir":"Reference","previous_headings":"","what":"Add labels for unlabelled values — lbl_add","title":"Add labels for unlabelled values — lbl_add","text":"Add labels values already labelled vector.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add labels for unlabelled values — lbl_add","text":"","code":"lbl_add(x, ...)  lbl_add_vals(x, labeller = as.character, vals = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add labels for unlabelled values — lbl_add","text":"x labelled vector ... Arbitrary number label placeholders created lbl() indicating value/label pairs add. labeller function takes values added argument returns labels associate values. default, uses values converting character. vals Vector values labelled. NULL, labels unlabelled values exist data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add labels for unlabelled values — lbl_add","text":"labelled vector","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add labels for unlabelled values — lbl_add","text":"","code":"x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  # Add new labels manually lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") ) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU  # Add labels for all unlabelled values lbl_add_vals(x) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100     100 #>    105     105 #>    200     200 #>    230     230 #>    990 Unknown #>    999     NIU  # Update label names while adding lbl_add_vals(x, labeller = ~ paste0(\"$\", .)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU  # Add labels for select values lbl_add_vals(x, vals = c(100, 200)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100     100 #>    200     200 #>    990 Unknown #>    999     NIU"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":null,"dir":"Reference","previous_headings":"","what":"Clean unused labels — lbl_clean","title":"Clean unused labels — lbl_clean","text":"Remove labels appear data. converting labelled values factor, avoids creation additional factor levels.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clean unused labels — lbl_clean","text":"","code":"lbl_clean(x)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clean unused labels — lbl_clean","text":"x labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clean unused labels — lbl_clean","text":"labelled vector","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Clean unused labels — lbl_clean","text":"","code":"x <- haven::labelled(   c(1, 2, 3, 1, 2, 3, 1, 2, 3),   c(Q1 = 1, Q2 = 2, Q3 = 3, Q4 = 4) )  lbl_clean(x) #> <labelled<double>[9]> #> [1] 1 2 3 1 2 3 1 2 3 #>  #> Labels: #>  value label #>      1    Q1 #>      2    Q2 #>      3    Q3  # Compare the factor levels of the normal and cleaned labels after coercion as_factor(lbl_clean(x)) #> [1] Q1 Q2 Q3 Q1 Q2 Q3 Q1 Q2 Q3 #> Levels: Q1 Q2 Q3  as_factor(x) #> [1] Q1 Q2 Q3 Q1 Q2 Q3 Q1 Q2 Q3 #> Levels: Q1 Q2 Q3 Q4"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":null,"dir":"Reference","previous_headings":"","what":"Define labels for an unlabelled vector — lbl_define","title":"Define labels for an unlabelled vector — lbl_define","text":"Create labelled vector unlabelled vector using lbl_relabel() syntax, allowing grouping multiple values single label. Values assigned label remain unlabelled.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define labels for an unlabelled vector — lbl_define","text":"","code":"lbl_define(x, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define labels for an unlabelled vector — lbl_define","text":"x unlabelled vector ... Arbitrary number two-sided formulas. left hand side label placeholder created lbl(). right hand side function taking .val evaluates TRUE cases receive label specified left hand side. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define labels for an unlabelled vector — lbl_define","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Define labels for an unlabelled vector — lbl_define","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define labels for an unlabelled vector — lbl_define","text":"","code":"age <- c(10, 12, 16, 18, 20, 22, 25, 27)  # Group age values into two label groups. # Values not captured by the right hand side functions remain unlabelled lbl_define(   age,   lbl(1, \"Pre-college age\") ~ .val < 18,   lbl(2, \"College age\") ~ .val >= 18 & .val <= 22 ) #> <labelled<double>[8]> #> [1]  1  1  1  2  2  2 25 27 #>  #> Labels: #>  value           label #>      1 Pre-college age #>      2     College age"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert labelled data values to NA — lbl_na_if","title":"Convert labelled data values to NA — lbl_na_if","text":"Convert data values labelled vector NA based value labels associated vector. Ignores values label.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert labelled data values to NA — lbl_na_if","text":"","code":"lbl_na_if(x, .predicate)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert labelled data values to NA — lbl_na_if","text":"x labelled vector .predicate function taking .val .lbl arguments returns TRUE values converted NA. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert labelled data values to NA — lbl_na_if","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Convert labelled data values to NA — lbl_na_if","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert labelled data values to NA — lbl_na_if","text":"","code":"x <- haven::labelled(   c(10, 10, 11, 20, 30, 99, 30, 10),   c(Yes = 10, `Yes - Logically Assigned` = 11, No = 20, Maybe = 30, NIU = 99) )  # Convert labelled values greater than 90 to `NA` lbl_na_if(x, function(.val, .lbl) .val >= 90) #> <labelled<double>[8]> #> [1] 10 10 11 20 30 NA 30 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     30                    Maybe  # Can use purrr-style notation lbl_na_if(x, ~ .lbl %in% c(\"Maybe\")) #> <labelled<double>[8]> #> [1] 10 10 11 20 NA 99 NA 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     99                      NIU  # Or refer to named function na_function <- function(.val, .lbl) .val >= 90 lbl_na_if(x, na_function) #> <labelled<double>[8]> #> [1] 10 10 11 20 30 NA 30 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     30                    Maybe"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":null,"dir":"Reference","previous_headings":"","what":"Modify value labels for a labelled vector — lbl_relabel","title":"Modify value labels for a labelled vector — lbl_relabel","text":"Update mapping values labels labelled vector. functions allow simultaneously update data values existing value labels. Modifying data values directly result updated value labels. Use lbl_relabel() manually specify new value/label mappings. allows addition new labels. Use lbl_collapse() collapse detailed labels general categories. Values can grouped together associated individual labels already exist labelled vector. Unlabelled values converted NA.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Modify value labels for a labelled vector — lbl_relabel","text":"","code":"lbl_relabel(x, ...)  lbl_collapse(x, .fun)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Modify value labels for a labelled vector — lbl_relabel","text":"x labelled vector ... Arbitrary number two-sided formulas. left hand side label placeholder created lbl() value already exists data. right hand side function taking .val .lbl arguments evaluates TRUE cases receive label specified left hand side. Can provided anonymous function formula. See Details section. .fun function taking .val .lbl arguments returns value associated existing label vector. Input values function relabeled label function's output value. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Modify value labels for a labelled vector — lbl_relabel","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Modify value labels for a labelled vector — lbl_relabel","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Modify value labels for a labelled vector — lbl_relabel","text":"","code":"x <- haven::labelled(   c(10, 10, 11, 20, 21, 30, 99, 30, 10),   c(     Yes = 10, `Yes - Logically Assigned` = 11,     No = 20, Unlikely = 21, Maybe = 30, NIU = 99   ) )  # Convert cases with value 11 to value 10 and associate with 10's label lbl_relabel(x, 10 ~ .val == 11) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 30 99 30 10 #>  #> Labels: #>  value    label #>     10      Yes #>     20       No #>     21 Unlikely #>     30    Maybe #>     99      NIU lbl_relabel(x, lbl(\"Yes\") ~ .val == 11) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 30 99 30 10 #>  #> Labels: #>  value    label #>     10      Yes #>     20       No #>     21 Unlikely #>     30    Maybe #>     99      NIU  # To relabel using new value/label pairs, use `lbl()` to define a new pair lbl_relabel(   x,   lbl(10, \"Yes/Yes-ish\") ~ .val %in% c(10, 11),   lbl(90, \"???\") ~ .val == 99 | .lbl == \"Maybe\" ) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 90 90 90 10 #>  #> Labels: #>  value       label #>     10 Yes/Yes-ish #>     20          No #>     21    Unlikely #>     90         ???  # Collapse labels to create new label groups lbl_collapse(x, ~ (.val %/% 10) * 10) #> <labelled<double>[9]> #> [1] 10 10 10 20 20 30 90 30 10 #>  #> Labels: #>  value label #>     10   Yes #>     20    No #>     30 Maybe #>     90   NIU  # These are equivalent lbl_collapse(x, ~ ifelse(.val == 10, 11, .val)) #> <labelled<double>[9]> #> [1] 11 11 11 20 21 30 99 30 11 #>  #> Labels: #>  value                    label #>     11 Yes - Logically Assigned #>     20                       No #>     21                 Unlikely #>     30                    Maybe #>     99                      NIU lbl_relabel(x, 11 ~ .val == 10) #> <labelled<double>[9]> #> [1] 11 11 11 20 21 30 99 30 11 #>  #> Labels: #>  value                    label #>     11 Yes - Logically Assigned #>     20                       No #>     21                 Unlikely #>     30                    Maybe #>     99                      NIU"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":null,"dir":"Reference","previous_headings":"","what":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"Reads metadata IPUMS extract DDI codebook ipums_ddi object. metadata contains parsing instructions associated fixed-width data file, contextual labels variables values data, general extract information. See Downloading IPUMS files information downloading IPUMS DDI codebook files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"","code":"read_ipums_ddi(   ddi_file,   lower_vars = FALSE,   file_select = deprecated(),   data_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"ddi_file Path DDI .xml file downloaded IPUMS. See Downloading IPUMS files . lower_vars Logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. data_layer, file_select Reading DDI files contained .zip archive deprecated. Please provide full path .xml file loaded ddi_file.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"ipums_ddi object metadata information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"DDI codebook (.xml) file provided IPUMS microdata extracts can downloaded IPUMS extract interface (collections) within R using IPUMS API. using IPUMS extract interface: Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"","code":"# Example codebook file ddi_file <- ipums_example(\"cps_00157.xml\")  # Load data into an `ipums_ddi` obj ddi <- read_ipums_ddi(ddi_file)  # Use the object to load its associated data cps <- read_ipums_micro(ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375           # Or get metadata information directly ipums_var_info(ddi) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  ipums_file_info(ddi)[1:2] #> $ipums_project #> [1] \"IPUMS CPS\" #>  #> $extract_date #> [1] \"2023-07-10\" #>   # If variable metadata have been lost from a data source, reattach from # its corresponding `ipums_ddi` object: cps <- zap_ipums_attributes(cps)  ipums_var_label(cps$STATEFIP) #> [1] NA  cps <- set_ipums_var_attributes(cps, ddi$var_info)  ipums_var_label(cps$STATEFIP) #> [1] \"State (FIPS code)\""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract — read_ipums_micro","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"Read microdata dataset downloaded IPUMS extract system. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro() read_ipums_micro_list() differ handling extracts contain multiple record types. See Data structures . Note Stata, SAS, SPSS file formats supported ipumsr readers. Convert extract fixed-width CSV format, see haven help loading files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"","code":"read_ipums_micro(   ddi,   vars = NULL,   n_max = Inf,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list(   ddi,   vars = NULL,   n_max = Inf,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. n_max maximum number lines read. read_ipums_micro_list(), applies splitting records list components. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. lower_vars = TRUE vars specified, vars reference lowercase column names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"read_ipums_micro() returns single tibble object. read_ipums_micro_list() returns list tibble objects one entry record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro() reads data tibble row represents single record, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. read_ipums_micro_list() reads data list tibble objects, list element contains one record type. list element named corresponding record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"","code":"# Codebook for rectangular example file cps_rect_ddi_file <- ipums_example(\"cps_00157.xml\")  # Load data based on codebook file info cps <- read_ipums_micro(cps_rect_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375           # Can also load data from a pre-existing `ipums_ddi` object # (This may be useful to retain codebook metadata even if lost from data # during processing) ddi <- read_ipums_ddi(cps_rect_ddi_file) cps <- read_ipums_micro(ddi, verbose = FALSE)  # Codebook for hierarchical example file cps_hier_ddi_file <- ipums_example(\"cps_00159.xml\")  # Read in \"long\" format to get a single data frame read_ipums_micro(cps_hier_ddi_file, verbose = FALSE) #> # A tibble: 11,053 × 9 #>    RECTYPE     YEAR SERIAL MONTH    ASECWTH STATEFIP PERNUM ASECWT INCTOT        #>    <chr+lbl>  <dbl>  <dbl> <int+lb>   <dbl> <int+lb>  <dbl>  <dbl> <dbl+lbl>     #>  1 H [Househ…  1962     80  3 [Mar…   1476. 55 [Wis…     NA    NA  NA            #>  2 P [Person…  1962     80 NA           NA  NA            1  1476.  4.88e3       #>  3 P [Person…  1962     80 NA           NA  NA            2  1471.  5.8 e3       #>  4 P [Person…  1962     80 NA           NA  NA            3  1579.  1.00e9 [Mis… #>  5 H [Househ…  1962     82  3 [Mar…   1598. 27 [Min…     NA    NA  NA            #>  6 P [Person…  1962     82 NA           NA  NA            1  1598.  1.40e4       #>  7 H [Househ…  1962     83  3 [Mar…   1707. 27 [Min…     NA    NA  NA            #>  8 P [Person…  1962     83 NA           NA  NA            1  1707.  1.66e4       #>  9 H [Househ…  1962     84  3 [Mar…   1790. 27 [Min…     NA    NA  NA            #> 10 P [Person…  1962     84 NA           NA  NA            1  1790.  6.38e3       #> # ℹ 11,043 more rows  # Read in \"list\" format and you get a list of multiple data frames cps_list <- read_ipums_micro_list(cps_hier_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps_list$PERSON) #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                          head(cps_list$HOUSEHOLD) #> # A tibble: 6 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #> 5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #> 6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]       # Use the `%<-%` operator from zeallot to unpack into separate objects c(household, person) %<-% read_ipums_micro_list(cps_hier_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(person) #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                          head(household) #> # A tibble: 6 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #> 5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #> 6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Read microdata dataset downloaded IPUMS extract system chunks. Use functions read file large store memory single time. file processed chunks given size, provided callback function applied chunk. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro_chunked() read_ipums_micro_list_chunked() differ handling extracts contain multiple record types. See Data structures . Note Stata, SAS, SPSS file formats supported ipumsr readers. Convert extract fixed-width CSV format, see haven help loading files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"","code":"read_ipums_micro_chunked(   ddi,   callback,   chunk_size = 10000,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list_chunked(   ddi,   callback,   chunk_size = 10000,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . callback ipums_callback object, function converted IpumsSideEffectCallback object. Callback functions include data (x) position (pos) arguments. See examples. chunk_size Integer number observations read per chunk. Higher values use RAM, typically result faster processing. Defaults 10,000. vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. Note reading chunks .csv .csv.gz file, callback function called variable names converted lowercase, thus reference uppercase variable names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Depends provided callback object. See ipums_callback.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro_chunked() reads chunk data tibble row represents single record, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. provided callback function therefore operate tibble object. read_ipums_micro_list_chunked() reads chunk data list tibble objects, list element contains one record type. list element named corresponding record type. provided callback function therefore operate list object. case, chunk size references total number rows across record types, rather record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"","code":"suppressMessages(library(dplyr))  # Example codebook file cps_rect_ddi_file <- ipums_example(\"cps_00157.xml\")  # Function to extract Minnesota cases from CPS example # (This can also be accomplished by including case selections # in an extract definition) # # Function must take `x` and `pos` to refer to data and row position, # respectively. filter_mn <- function(x, pos) {   x[x$STATEFIP == 27, ] }  # Initialize callback filter_mn_callback <- IpumsDataFrameCallback$new(filter_mn)  # Process data in chunks, filtering to MN cases in each chunk read_ipums_micro_chunked(   cps_rect_ddi_file,   callback = filter_mn_callback,   chunk_size = 1000,   verbose = FALSE ) #> # A tibble: 2,362 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT    #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl> #>  1  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598. 14015     #>  2  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707. 16552     #>  3  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.  6375     #>  4  1962    122 3 [March]   3603. 27 [Minnesota]      1  3603. 15550     #>  5  1962    122 3 [March]   3603. 27 [Minnesota]      2  3603.     0     #>  6  1962    122 3 [March]   3603. 27 [Minnesota]      3  4243.  3443     #>  7  1962    122 3 [March]   3603. 27 [Minnesota]      4  3920.   255     #>  8  1962    122 3 [March]   3603. 27 [Minnesota]      5  3689.   135     #>  9  1962    857 3 [March]   1832. 27 [Minnesota]      1  1832.   624     #> 10  1962    857 3 [March]   1832. 27 [Minnesota]      2  1832.  3600     #> # ℹ 2,352 more rows  # Tabulate INCTOT average by state without storing full dataset in memory read_ipums_micro_chunked(   cps_rect_ddi_file,   callback = IpumsDataFrameCallback$new(     function(x, pos) {       x %>%         mutate(           INCTOT = lbl_na_if(             INCTOT,             ~ grepl(\"Missing|N.I.U.\", .lbl)           )         ) %>%         filter(!is.na(INCTOT)) %>%         group_by(STATEFIP = as_factor(STATEFIP)) %>%         summarize(INCTOT_SUM = sum(INCTOT), n = n(), .groups = \"drop\")     }   ),   chunk_size = 1000,   verbose = FALSE ) %>%   group_by(STATEFIP) %>%   summarize(avg_inc = sum(INCTOT_SUM) / sum(n)) #> # A tibble: 5 × 2 #>   STATEFIP     avg_inc #>   <fct>          <dbl> #> 1 Iowa           2252. #> 2 Minnesota      2500. #> 3 North Dakota   2800. #> 4 South Dakota   1641. #> 5 Wisconsin      2733.  # `x` will be a list when using `read_ipums_micro_list_chunked()` read_ipums_micro_list_chunked(   ipums_example(\"cps_00159.xml\"),   callback = IpumsSideEffectCallback$new(function(x, pos) {     print(       paste0(         nrow(x$PERSON), \" persons and \",         nrow(x$HOUSEHOLD), \" households in this chunk.\"       )     )   }),   chunk_size = 1000,   verbose = FALSE ) #> [1] \"699 persons and 301 households in this chunk.\" #> [1] \"701 persons and 299 households in this chunk.\" #> [1] \"693 persons and 307 households in this chunk.\" #> [1] \"685 persons and 315 households in this chunk.\" #> [1] \"696 persons and 304 households in this chunk.\" #> [1] \"691 persons and 309 households in this chunk.\" #> [1] \"695 persons and 305 households in this chunk.\" #> [1] \"691 persons and 309 households in this chunk.\" #> [1] \"694 persons and 306 households in this chunk.\" #> [1] \"692 persons and 308 households in this chunk.\" #> [1] \"692 persons and 308 households in this chunk.\" #> [1] \"39 persons and 14 households in this chunk.\" #> NULL  # Using the biglm package, you can even run a regression without storing # the full dataset in memory if (requireNamespace(\"biglm\")) {   lm_results <- read_ipums_micro_chunked(    ipums_example(\"cps_00160.xml\"),    IpumsBiglmCallback$new(      INCTOT ~ AGE + HEALTH, # Model formula      function(x, pos) {        x %>%          mutate(            INCTOT = lbl_na_if(              INCTOT,              ~ grepl(\"Missing|N.I.U.\", .lbl)            ),            HEALTH = as_factor(HEALTH)          )      }    ),    chunk_size = 1000,    verbose = FALSE  )   summary(lm_results) } #> Loading required namespace: biglm #> Large data regression model: biglm(INCTOT ~ AGE + HEALTH, data, ...) #> Sample size =  8194  #>                        Coef        (95%        CI)        SE      p #> (Intercept)      25351.6183  21728.2210  28975.016 1811.6986 0.0000 #> AGE                499.7783    427.5196    572.037   36.1293 0.0000 #> HEALTHVery good  -2135.1060  -5431.8110   1161.599 1648.3525 0.1952 #> HEALTHGood      -10480.2543 -14052.9835  -6907.525 1786.3646 0.0000 #> HEALTHFair      -23091.1061 -28274.2254 -17907.987 2591.5596 0.0000 #> HEALTHPoor      -34341.0066 -42611.9852 -26070.028 4135.4893 0.0000"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"Read microdata dataset downloaded IPUMS extract system object can read operate group (\"yield\") lines time. Use functions read file large store memory single time. represent flexible implementation read_ipums_micro_chunked() using R6. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro_yield() read_ipums_micro_list_yield() differ handling extracts contain multiple record types. See Data structures . Note functions support fixed-width (.dat) data files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"read_ipums_micro_yield(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list_yield(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. lower_vars = TRUE vars specified, vars reference lowercase column names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"HipYield R6 object (see Details section)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods-summary-","dir":"Reference","previous_headings":"","what":"Methods summary:","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"functions return HipYield R6 object following methods: yield(n = 10000) reads next \"yield\" data. read_ipums_micro_yield(), returns tibble n rows. read_ipums_micro_list_yield(), returns list tibbles total n rows across list elements. fewer n rows left data, returns remaining rows. rows left data, returns NULL. reset() resets data next yield read data start. is_done() returns logical indicating whether rows file read. cur_pos contains next row number read (1-indexed).","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro_yield() produces object yields data tibble whose rows represent single records, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. read_ipums_micro_list_yield() produces object yields data list tibble objects, list element contains one record type. list element named corresponding record type. case, using yield(), n refers total number rows across record types, rather record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"super-classes","dir":"Reference","previous_headings":"","what":"Super classes","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield -> hipread::HipLongYield -> IpumsLongYield","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield$is_done() hipread::HipYield$reset()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"IpumsLongYield$new() IpumsLongYield$yield()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsLongYield$new(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsLongYield$yield(n = 10000)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"super-classes-1","dir":"Reference","previous_headings":"","what":"Super classes","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield -> hipread::HipListYield -> IpumsListYield","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods-1","dir":"Reference","previous_headings":"","what":"Methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield$is_done() hipread::HipYield$reset()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"public-methods-1","dir":"Reference","previous_headings":"","what":"Public methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"IpumsListYield$new() IpumsListYield$yield()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsListYield$new(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsListYield$yield(n = 10000)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"# Create an IpumsLongYield object long_yield <- read_ipums_micro_yield(ipums_example(\"cps_00157.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Yield the first 10 rows of the data long_yield$yield(10) #> # A tibble: 10 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883         #>  2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800         #>  3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missi… #>  4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015         #>  5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552         #>  6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375         #>  7  1962    107 3 [March]   4355. 19 [Iowa]           1  4355. 999999999 [N.I.U… #>  8  1962    107 3 [March]   4355. 19 [Iowa]           2  1386.         0         #>  9  1962    107 3 [March]   4355. 19 [Iowa]           3  1629.       600         #> 10  1962    107 3 [March]   4355. 19 [Iowa]           4  1432. 999999999 [N.I.U…  # Yield the next 20 rows of the data long_yield$yield(20) #> # A tibble: 20 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962    108 3 [March]   1479. 19 [Iowa]           1  1479.     12300         #>  2  1962    108 3 [March]   1479. 19 [Iowa]           2  1482.         0         #>  3  1962    122 3 [March]   3603. 27 [Minnesota]      1  3603.     15550         #>  4  1962    122 3 [March]   3603. 27 [Minnesota]      2  3603.         0         #>  5  1962    122 3 [March]   3603. 27 [Minnesota]      3  4243.      3443         #>  6  1962    122 3 [March]   3603. 27 [Minnesota]      4  3920.       255         #>  7  1962    122 3 [March]   3603. 27 [Minnesota]      5  3689.       135         #>  8  1962    124 3 [March]   4104. 55 [Wisconsin]      1  4104.     15000         #>  9  1962    124 3 [March]   4104. 55 [Wisconsin]      2  1487.      3550         #> 10  1962    124 3 [March]   4104. 55 [Wisconsin]      3  1450.       692         #> 11  1962    124 3 [March]   4104. 55 [Wisconsin]      4  1441.         0         #> 12  1962    125 3 [March]   2182. 55 [Wisconsin]      1  2182.      4470         #> 13  1962    126 3 [March]   1826. 55 [Wisconsin]      1  1826. 999999999 [N.I.U… #> 14  1962    126 3 [March]   1826. 55 [Wisconsin]      2  1629.         0         #> 15  1962    761 3 [March]   1751. 19 [Iowa]           1  1751.      7300         #> 16  1962    761 3 [March]   1751. 19 [Iowa]           2  1751.      3700         #> 17  1962    762 3 [March]   1874. 19 [Iowa]           1  1874.      2534         #> 18  1962    762 3 [March]   1874. 19 [Iowa]           2  1874.         0         #> 19  1962    763 3 [March]   1874. 19 [Iowa]           1  1874.      1591         #> 20  1962    764 3 [March]   1724. 19 [Iowa]           1  1724.      8002          # Check the current position after yielding 30 rows long_yield$cur_pos #> [1] 31  # Reset to the beginning of the file long_yield$reset()  # Use a loop to flexibly process the data in pieces. Count all Minnesotans: total_mn <- 0  while (!long_yield$is_done()) {   cur_data <- long_yield$yield(1000)   total_mn <- total_mn + sum(as_factor(cur_data$STATEFIP) == \"Minnesota\") }  total_mn #> [1] 2362  # Can also read hierarchical data as list: list_yield <- read_ipums_micro_list_yield(ipums_example(\"cps_00159.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Yield size is based on total rows for all list elements list_yield$yield(10) #> $HOUSEHOLD #> # A tibble: 4 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #>  #> $PERSON #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                         #>"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Read spatial data from an IPUMS extract — read_ipums_sf","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"Read spatial data file (also referred GIS file shapefile) IPUMS extract sf object sf package.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"","code":"read_ipums_sf(   shape_file,   file_select = NULL,   vars = NULL,   encoding = NULL,   bind_multiple = FALSE,   add_layer_var = NULL,   verbose = FALSE,   shape_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"shape_file Path single .shp file .zip archive containing least one .shp file. See Details section. file_select shape_file .zip archive contains multiple files, expression identifying files load. Accepts character string specifying file name, tidyselect selection, index position. multiple files selected, bind_multiple must equal TRUE. vars Names variables include output. Accepts character vector names tidyselect selection. NULL, includes variables file. encoding Encoding use reading shape file. NULL, defaults \"latin1\" unless file includes .cpg metadata file encoding information. default value generally appropriate. bind_multiple TRUE shape_file contains multiple .shp files, row-bind files single sf object. Useful shape_file contains multiple files represent geographic units different extents (e.g. block-level data multiple states). add_layer_var TRUE, add variable output data indicating file row originates . Defaults FALSE unless bind_multiple = TRUE multiple files exist shape_file. column name always prefixed \"layer\", adjusted avoid name conflicts another column named \"layer\" already exists data. verbose TRUE report additional progress information load. shape_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"sf object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"IPUMS products provide shapefiles \"nested\" .zip archive. , shapefile (including .shp well accompanying files) compressed archive, collection shapefiles provided extract also compressed single .zip archive. read_ipums_sf() designed handle structure. However, files altered internal .zip archive contains multiple shapefiles, function throw error. case, may need manually unzip downloaded file loading R.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"","code":"# Example shapefile from NHGIS shape_ex1 <- ipums_example(\"nhgis0972_shape_small.zip\") data_ex1 <- read_nhgis(ipums_example(\"nhgis0972_csv.zip\"), verbose = FALSE)  sf_data <- read_ipums_sf(shape_ex1)  sf_data #> Simple feature collection with 71 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 9 #>    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>    <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #>  1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #>  2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #>  3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #>  4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #>  5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #>  6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #>  7 2960  1602    14      G2960   2960      2387760183.   241350. G16022960 #>  8 5190  5602    70      G5190   5190      2939483018.   872897. G56025190 #>  9 1125  2082    34      G1125   1125      1946034315.   199097. G20821125 #> 10 1120  1122    07      G1120   1120      4715670489.   922769. G11221120 #> # ℹ 61 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>  # To combine spatial data with tabular data without losing the attributes # included in the tabular data, use an ipums shape join: ipums_shape_full_join(data_ex1, sf_data, by = \"GISJOIN\") #> Simple feature collection with 71 features and 32 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 33 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G3280    1990 CT     41    NA             3282 Hartford… 3280  NA      NA     #>  2 G5760    1990 CT     70    NA             5602 Norwalk,… 5760  NA      NA     #>  3 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #>  4 G1920    1990 TX     31    NA             1922 Dallas, … 1920  NA      NA     #>  5 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  6 G1640    1990 ##     21    NA             1642 Cincinna… 1640  NA      NA     #>  7 G2960    1990 IN     14    NA             1602 Gary--Ha… 2960  NA      NA     #>  8 G5190    1990 NJ     70    NA             5602 Monmouth… 5190  NA      NA     #>  9 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #> 10 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #> # ℹ 61 more rows #> # ℹ 23 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>, PMSASHAPE <chr>, MSACMSA <chr>, ALTCMSA <chr>, #> #   GISJOIN2 <chr>, SHAPE_AREA <dbl>, SHAPE_LEN <dbl>, GISJOIN3 <chr>, #> #   geometry <MULTIPOLYGON [m]>  shape_ex2 <- ipums_example(\"nhgis0712_shape_small.zip\")  # Shapefiles are provided in .zip archives that may contain multiple # files. Select a single file with `file_select`: read_ipums_sf(shape_ex2, file_select = matches(\"us_pmsa_1990\")) #> Simple feature collection with 71 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 9 #>    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>    <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #>  1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #>  2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #>  3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #>  4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #>  5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #>  6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #>  7 2960  1602    14      G2960   2960      2387760183.   241350. G16022960 #>  8 5190  5602    70      G5190   5190      2939483018.   872897. G56025190 #>  9 1125  2082    34      G1125   1125      1946034315.   199097. G20821125 #> 10 1120  1122    07      G1120   1120      4715670489.   922769. G11221120 #> # ℹ 61 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>  # Or row-bind files with `bind_multiple`. This may be useful for files of # the same geographic level that cover different extents) read_ipums_sf(   shape_ex2,   file_select = matches(\"us_pmsa\"),   bind_multiple = TRUE ) #> Simple feature collection with 144 features and 9 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1493412 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 144 × 10 #>    layer    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2 SHAPE_AREA SHAPE_LEN GISJOIN3 #>    <chr>    <chr> <chr>   <chr>   <chr>   <chr>         <dbl>     <dbl> <chr>    #>  1 US_pmsa… 3280  3282    41      G3280   3280        2.84e 9   320921. G328232… #>  2 US_pmsa… 5760  5602    70      G5760   5760        2.37e 8   126226. G560257… #>  3 US_pmsa… 1145  3362    42      G1145   1145        3.73e 9   489789. G336211… #>  4 US_pmsa… 1920  1922    31      G1920   1920        1.21e10   543164. G192219… #>  5 US_pmsa… 0080  1692    28      G0080   0080        2.40e 9   218892. G169200… #>  6 US_pmsa… 1640  1642    21      G1640   1640        5.61e 9   415671. G164216… #>  7 US_pmsa… 2960  1602    14      G2960   2960        2.39e 9   241350. G160229… #>  8 US_pmsa… 5190  5602    70      G5190   5190        2.94e 9   872897. G560251… #>  9 US_pmsa… 1125  2082    34      G1125   1125        1.95e 9   199097. G208211… #> 10 US_pmsa… 1120  1122    07      G1120   1120        4.72e 9   922769. G112211… #> # ℹ 134 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"Read tabular data from an NHGIS extract — read_nhgis","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"Read csv fixed-width (.dat) file downloaded NHGIS extract system. read spatial data NHGIS extract, use read_ipums_sf().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"","code":"read_nhgis(   data_file,   file_select = NULL,   vars = NULL,   col_types = NULL,   n_max = Inf,   guess_max = min(n_max, 1000),   do_file = NULL,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   remove_extra_header = TRUE,   verbose = TRUE,   data_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"data_file Path .zip archive containing NHGIS extract single file NHGIS extract. file_select data_file .zip archive contains multiple files, expression identifying file load. Accepts character vector specifying file name, tidyselect selection, index position. must uniquely identify file. vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. col_types One NULL, cols() specification string. NULL, column types inferred values first guess_max rows column. Alternatively, can use compact string representation specify column types: c = character = integer n = number d = double l = logical f = factor D = date T = date time t = time ? = guess _ - = skip See read_delim() details. n_max Maximum number lines read. guess_max .csv files, maximum number lines use guessing column types. never use number lines read. do_file fixed-width files, path .file associated provided data_file. .file contains parsing instructions data file. default, looks path data_file .file name. See Details section . var_attrs Variable attributes add codebook (.txt) file included extract. Defaults available attributes. See set_ipums_var_attributes() details. remove_extra_header TRUE, remove additional descriptive header row included NHGIS .csv files. header row usually needed contains similar information included \"label\" attribute data column (var_attrs includes \"var_label\"). verbose Logical controlling whether display output loading data. TRUE, displays IPUMS conditions, progress bar, column types. Otherwise, suppressed. overridden readr.show_progress readr.show_col_types options, set. data_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"tibble containing data found data_file","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Read tabular data from an NHGIS extract — read_nhgis","text":".file included downloading NHGIS fixed-width extract contains necessary metadata (e.g. column positions implicit decimals) correctly parse data file. read_nhgis() uses information parse recode fixed-width data appropriately. longer access .file, consider resubmitting extract produced data. can also change desired data format produce .csv file, require additional metadata files loaded. resubmitting existing extract via IPUMS API, see vignette(\"ipums-api\", package = \"ipumsr\").","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"","code":"# Example files csv_file <- ipums_example(\"nhgis0972_csv.zip\") fw_file <- ipums_example(\"nhgis0730_fixed.zip\")  # Provide the .zip archive directly to load the data inside: read_nhgis(csv_file) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 71 Columns: 25 #> ── Column specification ──────────────────────────────────────────────────────── #> Delimiter: \",\" #> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F... #> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003... #> lgl  (3): DIVISIONA, REGIONA, STATEA #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA             4472 Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA             2162 Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA             1602 Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA             6282 Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA             5602 Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA             5602 Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>  # For extracts that contain multiple files, use `file_select` to specify # a single file to load. This accepts a tidyselect expression: read_nhgis(fw_file, file_select = matches(\"ds239\"), verbose = FALSE) #> # A tibble: 1 × 114 #>   YEAR  STUSAB NATION NATIONA AIHHTLI MEMI  PCI   GEOID NAME_E AJWBE001 AJWBE002 #>   <chr> <chr>  <chr>  <chr>   <chr>   <chr> <chr> <chr> <chr>     <dbl>    <dbl> #> 1 2014… US     Unite… 1       NA      NA    NA    0100… Unite…   3.23e8   1.59e8 #> # ℹ 103 more variables: AJWBE003 <dbl>, AJWBE004 <dbl>, AJWBE005 <dbl>, #> #   AJWBE006 <dbl>, AJWBE007 <dbl>, AJWBE008 <dbl>, AJWBE009 <dbl>, #> #   AJWBE010 <dbl>, AJWBE011 <dbl>, AJWBE012 <dbl>, AJWBE013 <dbl>, #> #   AJWBE014 <dbl>, AJWBE015 <dbl>, AJWBE016 <dbl>, AJWBE017 <dbl>, #> #   AJWBE018 <dbl>, AJWBE019 <dbl>, AJWBE020 <dbl>, AJWBE021 <dbl>, #> #   AJWBE022 <dbl>, AJWBE023 <dbl>, AJWBE024 <dbl>, AJWBE025 <dbl>, #> #   AJWBE026 <dbl>, AJWBE027 <dbl>, AJWBE028 <dbl>, AJWBE029 <dbl>, …  # Or an index position: read_nhgis(fw_file, file_select = 2, verbose = FALSE) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>  # For CSV files, column types are inferred from the data. You can # manually specify column types with `col_types`. This may be useful for # geographic codes, which should typically be interpreted as character values read_nhgis(csv_file, col_types = list(MSA_CMSAA = \"c\"), verbose = FALSE) #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>     <chr>     <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA        1692      Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA        4472      Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA        2162      Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA        1602      Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA        6282      Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA        5602      Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA        1122      Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA        2082      Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA        3362      Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA        5602      Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>  # Fixed-width files are parsed with the correct column positions # and column types automatically: read_nhgis(fw_file, file_select = contains(\"ts\"), verbose = FALSE) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>  # You can also read in a subset of the data file: read_nhgis(   csv_file,   n_max = 15,   vars = c(GISJOIN, YEAR, D6Z002),   verbose = FALSE ) #> # A tibble: 15 × 3 #>    GISJOIN  YEAR D6Z002 #>    <chr>   <dbl>  <dbl> #>  1 G0080    1990  11593 #>  2 G0360    1990  95737 #>  3 G0440    1990   8988 #>  4 G0620    1990   8982 #>  5 G0845    1990   1814 #>  6 G0875    1990  20476 #>  7 G1120    1990  58143 #>  8 G1125    1990   9467 #>  9 G1145    1990   6774 #> 10 G1160    1990   9710 #> 11 G1170    1990   3209 #> 12 G1200    1990   3551 #> 13 G1280    1990  12072 #> 14 G1600    1990 111582 #> 15 G1640    1990  37225"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":null,"dir":"Reference","previous_headings":"","what":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"Read variable metadata contained .txt codebook file included NHGIS extracts ipums_ddi object. NHGIS variable metadata adhere standards microdata DDI files, ipums_ddi fields populated. function marked experimental determine whether may robust way standardize codebook DDI reading across IPUMS collections.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"","code":"read_nhgis_codebook(cb_file, file_select = NULL, raw = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"cb_file Path .zip archive containing NHGIS extract NHGIS codebook (.txt) file. file_select cb_file .zip archive directory contains multiple codebook files, expression identifying file read. Accepts character string specifying file name, tidyselect selection, index position file. Ignored cb_file path single codebook file. raw TRUE, return character vector containing lines cb_file rather ipums_ddi object. Defaults FALSE.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"raw = FALSE, ipums_ddi object information variables contained data extract associated given cb_file. raw = TRUE, character vector one element line given cb_file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"","code":"# Example file nhgis_file <- ipums_example(\"nhgis0972_csv.zip\")  # Read codebook as an `ipums_ddi` object: codebook <- read_nhgis_codebook(nhgis_file)  # Variable-level metadata about the contents of the data file: ipums_var_info(codebook) #> # A tibble: 25 × 10 #>    var_name  var_label      var_desc val_labels code_instr start end   imp_decim #>    <chr>     <chr>          <chr>    <list>     <chr>      <lgl> <lgl>     <dbl> #>  1 GISJOIN   GIS Join Matc… \"\"       <tibble>   \"\"         NA    NA            0 #>  2 YEAR      Data File Year \"\"       <tibble>   \"\"         NA    NA            0 #>  3 STUSAB    State/US Abbr… \"\"       <tibble>   \"\"         NA    NA            0 #>  4 CMSA      Consolidated … \"\"       <tibble>   \"\"         NA    NA            0 #>  5 DIVISIONA Division Code  \"\"       <tibble>   \"\"         NA    NA            0 #>  6 MSA_CMSAA Metropolitan … \"\"       <tibble>   \"\"         NA    NA            0 #>  7 PMSA      Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  8 PMSAA     Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  9 REGIONA   Region Code    \"\"       <tibble>   \"\"         NA    NA            0 #> 10 STATEA    State Code     \"\"       <tibble>   \"\"         NA    NA            0 #> # ℹ 15 more rows #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  ipums_var_label(codebook, \"PMSA\") #> [1] \"Primary Metropolitan Statistical Area Name\"  # If variable metadata have been lost from a data source, reattach from # the corresponding `ipums_ddi` object: nhgis_data <- read_nhgis(nhgis_file, verbose = FALSE)  nhgis_data <- zap_ipums_attributes(nhgis_data) ipums_var_label(nhgis_data$PMSA) #> [1] NA  nhgis_data <- set_ipums_var_attributes(nhgis_data, codebook$var_info) ipums_var_label(nhgis_data$PMSA) #> [1] \"Primary Metropolitan Statistical Area Name\"  # You can also load the codebook in raw format to display in the console codebook_raw <- read_nhgis_codebook(nhgis_file, raw = TRUE)  # Use `cat` for human-readable output cat(codebook_raw[1:20], sep = \"\\n\") #> -------------------------------------------------------------------------------- #> Codebook for NHGIS data file 'nhgis0972_ds135_1990_pmsa' #> -------------------------------------------------------------------------------- #>   #> Contents #>     - Data Summary #>     - Data Dictionary #>     - Citation and Use #>   #> Additional documentation on NHGIS data sources is available at:  #>     https://www.nhgis.org/documentation/tabular-data  #>   #> -------------------------------------------------------------------------------- #> Data Summary #> -------------------------------------------------------------------------------- #>   #> Year:             1990 #> Geographic level: Consolidated Metropolitan Statistical Area--Primary Metropolitan Statistical Area #> Dataset:          1990 Census: SSTF 9 - Housing Characteristics of New Units #>    NHGIS code:    1990_SSTF09"},{"path":"http://tech.popdata.org/ipumsr/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. haven as_factor, .labelled, zap_labels lifecycle deprecated readr problems, spec tidyselect all_of, any_of, contains, ends_with, everything, last_col, matches, num_range, one_of, starts_with zeallot %<-%","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing IPUMS extract definition — remove_from_extract","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"Remove values specific fields existing ipums_extract object. function S3 generic whose behavior depend subclass (.e. collection) extract modified. remove IPUMS Microdata extract definition, click . includes: IPUMS USA IPUMS CPS IPUMS International remove IPUMS NHGIS extract definition, click function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values extract, see add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"","code":"remove_from_extract(extract, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"extract ipums_extract object. ... Additional arguments specifying extract fields values remove extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"object class extract containing modified extract definition","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"","code":"# Microdata extracts usa_extract <- define_extract_usa(   description = \"USA example\",   samples = c(\"us2013a\", \"us2014a\"),   variables = list(     var_spec(\"AGE\"),     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"YEAR\")   ) )  # Remove variables from an extract definition remove_from_extract(   usa_extract,   samples = \"us2014a\",   variables = c(\"AGE\", \"SEX\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (1 total) us2013a #> Variables: (1 total) YEAR  # Remove detailed specifications for an existing variable remove_from_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"2\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) AGE, SEX, YEAR  # NHGIS extracts nhgis_extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"county\"   ),   time_series_tables = tst_spec(\"A00\", geog_levels = \"county\") )  # Remove an existing dataset or time series table remove_from_extract(nhgis_extract, datasets = \"1990_STF1\") #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Time Series Table: A00 #>   Geog Levels: county  # Remove detailed specifications from an existing dataset or # time series table remove_from_extract(   nhgis_extract,   datasets = ds_spec(\"1990_STF1\", data_tables = \"NP1\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP2, NP3 #>   Geog Levels: county #>  #> Time Series Table: A00 #>   Geog Levels: county"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"Remove existing values IPUMS microdata extract definition. fields optional, omitted, unchanged. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values IPUMS microdata extract definition, see add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"","code":"# S3 method for micro_extract remove_from_extract(extract, samples = NULL, variables = NULL, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"extract ipums_extract object. samples Character vector sample names remove extract definition. variables Names variables remove extract definition. variable-specific fields indicated variables also removed. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"modified micro_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state. retain variable modifying particular specifications, first remove variable, add new specification using add_to_extract().","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"","code":"usa_extract <- define_extract_usa(   description = \"USA example\",   samples = c(\"us2013a\", \"us2014a\"),   variables = list(     var_spec(\"AGE\", data_quality_flags = TRUE),     var_spec(\"SEX\", case_selections = \"1\"),     \"RACE\"   ) )  # Providing names of samples or variables will remove them and # all of their associated specifications from the extract: remove_from_extract(   usa_extract,   samples = \"us2014a\",   variables = c(\"AGE\", \"RACE\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX  # To remove detailed specifications from a variable, indicate the # specifications to remove within `var_spec()`. The # named variable will be retained in the extract, but modified by # removing the indicated specifications. remove_from_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"1\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) AGE, SEX, RACE  # To make multiple modifications, use a list of `var_spec()` objects. remove_from_extract(   usa_extract,   variables = list(     var_spec(\"SEX\", case_selections = \"1\"),     var_spec(\"AGE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (2 total) SEX, RACE"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"Remove existing values IPUMS NHGIS extract definition. fields optional, omitted, unchanged. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_nhgis(). complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values IPUMS NHGIS extract definition, use add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"","code":"# S3 method for nhgis_extract remove_from_extract(   extract,   datasets = NULL,   time_series_tables = NULL,   geographic_extents = NULL,   shapefiles = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"extract ipums_extract object. datasets Dataset specifications remove extract definition. data_tables, geog_levels, years, breakdown_values associated specified datasets also removed. time_series_tables Names time series tables remove extract definition. geog_levels years associated  specified time_series_tables also removed. geographic_extents Geographic extents remove extract definition. shapefiles Shapefiles remove extract definition. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"modified nhgis_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"extract fields rendered irrelevant modifying extract automatically removed. (instance, time_series_tables removed extract, tst_layout also removed.) Thus, necessary explicitly remove values. supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"","code":"extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"county\"   ),   time_series_tables = list(     tst_spec(\"CW3\", c(\"state\", \"county\")),     tst_spec(\"CW5\", c(\"state\", \"county\"))   ) )  # Providing names of datasets or time series tables will remove them and # all of their associated specifications from the extract: remove_from_extract(   extract,   time_series_tables = c(\"CW3\", \"CW5\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP3 #>   Geog Levels: county  # To remove detailed specifications from a dataset or time series table, # use `ds_spec()` or `tst_spec()`. The named dataset or time series table # will be retained in the extract, but modified by removing the indicated # specifications: remove_from_extract(   extract,   datasets = ds_spec(\"1990_STF1\", data_tables = c(\"NP2\", \"NP3\")) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state, county #>  #> Time Series Table: CW5 #>   Geog Levels: state, county  # To make multiple modifications, use a list of `ds_spec()` or `tst_spec()` # objects: remove_from_extract(   extract,   time_series_tables = list(     tst_spec(\"CW3\", geog_levels = \"county\"),     tst_spec(\"CW5\", geog_levels = \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP3 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state #>  #> Time Series Table: CW5 #>   Geog Levels: county"},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":null,"dir":"Reference","previous_headings":"","what":"Store an extract definition in JSON format — save_extract_as_json","title":"Store an extract definition in JSON format — save_extract_as_json","text":"Read write ipums_extract object JSON file contains extract definition specifications. Use functions store copy extract definition outside R environment /share extract definition another registered IPUMS user. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Store an extract definition in JSON format — save_extract_as_json","text":"","code":"save_extract_as_json(extract, file, overwrite = FALSE)  define_extract_from_json(extract_json)"},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Store an extract definition in JSON format — save_extract_as_json","text":"extract ipums_extract object. file File path write JSON-formatted extract definition. overwrite TRUE, overwrite file already exists. Defaults FALSE. extract_json Path file containing JSON-formatted extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Store an extract definition in JSON format — save_extract_as_json","text":"ipums_extract object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"api-version-compatibility","dir":"Reference","previous_headings":"","what":"API Version Compatibility","title":"Store an extract definition in JSON format — save_extract_as_json","text":"v0.6.0, ipumsr supports IPUMS API version 2. stored extract definition made using version beta version 1 IPUMS API, able load using define_extract_from_json(). API version request stored saved JSON file. (\"api_version\" \"version\" field JSON file, request likely made version beta version 1.) extract definition originally made user account know corresponding extract number, use get_extract_info() obtain definition compliant IPUMS API version 2. can save definition JSON save_extract_as_json(). Otherwise, need update JSON file compliant IPUMS API version 2. general, require renaming JSON fields written snake_case camelCase. instance, \"data_tables\" become \"dataTables\", \"data_format\" become \"dataFormat\", . also need change \"api_version\" field \"version\" set equal 2. unable create valid extract modifying file, may recreate definition manually using appropriate define_extract_*() function. See IPUMS developer documentation details API versioning breaking changes introduced version 2.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Store an extract definition in JSON format — save_extract_as_json","text":"","code":"my_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  extract_json_path <- file.path(tempdir(), \"usa_extract.json\") save_extract_as_json(my_extract, file = extract_json_path)  copy_of_my_extract <- define_extract_from_json(extract_json_path)  identical(my_extract, copy_of_my_extract) #> [1] TRUE  file.remove(extract_json_path) #> [1] TRUE"},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":null,"dir":"Reference","previous_headings":"","what":"tidyselect selection language in ipumsr — selection_language","title":"tidyselect selection language in ipumsr — selection_language","text":"Slightly modified implementation tidyselect selection language ipumsr.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"syntax","dir":"Reference","previous_headings":"","what":"Syntax","title":"tidyselect selection language in ipumsr — selection_language","text":"general, selection language ipumsr operates tidyselect. applicable, variables can selected : character vector variable names (c(\"var1\", \"var2\")) bare vector variable names (c(var1, var2)) selection helper tidyselect (starts_with(\"var\")). See list helpers.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"primary-differences","dir":"Reference","previous_headings":"","what":"Primary differences","title":"tidyselect selection language in ipumsr — selection_language","text":"tidyselect selection generally intended use column variables data.frame-like objects. contrast, ipumsr allows selection language syntax cases well (instance, selecting files within .zip archive). ipumsr functions indicate whether support selection language. Selection () consistently supported.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"selection-helpers-from-tidyselect-","dir":"Reference","previous_headings":"","what":"Selection helpers (from tidyselect)","title":"tidyselect selection language in ipumsr — selection_language","text":"var1:var10: variables lying var1 left var10 right. starts_with(\"\"): names start \"\" ends_with(\"z\"): names end \"z\" contains(\"b\"): names contain \"b\" matches(\"x.y\"): names match regular expression x.y num_range(x, 1:4): names following pattern x1, x2, ..., x4 all_of(vars)/any_of(vars): matches names stored character vector vars. all_of(vars) error variables present; any_of(vars) match just variables exist. everything(): variables last_col(): furthest column right Operators combining selections: !selection: variables match selection selection1 & selection2: variables included selection1 selection2 selection1 | selection2: variables match either selection1 selection2","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"tidyselect selection language in ipumsr — selection_language","text":"","code":"cps_file <- ipums_example(\"cps_00157.xml\")  # Load 3 variables by name read_ipums_micro(   cps_file,   vars = c(\"YEAR\", \"MONTH\", \"PERNUM\"),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     PERNUM #>    <dbl> <int+lbl>  <dbl> #>  1  1962 3 [March]      1 #>  2  1962 3 [March]      2 #>  3  1962 3 [March]      3 #>  4  1962 3 [March]      1 #>  5  1962 3 [March]      1 #>  6  1962 3 [March]      1 #>  7  1962 3 [March]      1 #>  8  1962 3 [March]      2 #>  9  1962 3 [March]      3 #> 10  1962 3 [March]      4 #> # ℹ 7,658 more rows  # \"Bare\" variables are supported read_ipums_micro(   cps_file,   vars = c(YEAR, MONTH, PERNUM),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     PERNUM #>    <dbl> <int+lbl>  <dbl> #>  1  1962 3 [March]      1 #>  2  1962 3 [March]      2 #>  3  1962 3 [March]      3 #>  4  1962 3 [March]      1 #>  5  1962 3 [March]      1 #>  6  1962 3 [March]      1 #>  7  1962 3 [March]      1 #>  8  1962 3 [March]      2 #>  9  1962 3 [March]      3 #> 10  1962 3 [March]      4 #> # ℹ 7,658 more rows  # Standard tidyselect selectors are also supported read_ipums_micro(cps_file, vars = starts_with(\"ASEC\"), verbose = FALSE) #> # A tibble: 7,668 × 2 #>    ASECWTH ASECWT #>      <dbl>  <dbl> #>  1   1476.  1476. #>  2   1476.  1471. #>  3   1476.  1579. #>  4   1598.  1598. #>  5   1707.  1707. #>  6   1790.  1790. #>  7   4355.  4355. #>  8   4355.  1386. #>  9   4355.  1629. #> 10   4355.  1432. #> # ℹ 7,658 more rows  # Selection methods can be combined read_ipums_micro(   cps_file,   vars = c(YEAR, MONTH, contains(\"INC\")),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     INCTOT                                #>    <dbl> <int+lbl> <dbl+lbl>                             #>  1  1962 3 [March]      4883                             #>  2  1962 3 [March]      5800                             #>  3  1962 3 [March] 999999998 [Missing. (1962-1964 only)] #>  4  1962 3 [March]     14015                             #>  5  1962 3 [March]     16552                             #>  6  1962 3 [March]      6375                             #>  7  1962 3 [March] 999999999 [N.I.U.]                    #>  8  1962 3 [March]         0                             #>  9  1962 3 [March]       600                             #> 10  1962 3 [March] 999999999 [N.I.U.]                    #> # ℹ 7,658 more rows  read_ipums_micro(   cps_file,   vars = starts_with(\"S\") & ends_with(\"P\"),   verbose = FALSE ) #> # A tibble: 7,668 × 1 #>    STATEFIP       #>    <int+lbl>      #>  1 55 [Wisconsin] #>  2 55 [Wisconsin] #>  3 55 [Wisconsin] #>  4 27 [Minnesota] #>  5 27 [Minnesota] #>  6 27 [Minnesota] #>  7 19 [Iowa]      #>  8 19 [Iowa]      #>  9 19 [Iowa]      #> 10 19 [Iowa]      #> # ℹ 7,658 more rows  # Other selection arguments also support this syntax. # For instance, load a particular file based on a tidyselect match: read_nhgis(   ipums_example(\"nhgis0731_csv.zip\"),   file_select = contains(\"nominal_state\"),   verbose = FALSE ) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":null,"dir":"Reference","previous_headings":"","what":"Set your IPUMS API key — set_ipums_api_key","title":"Set your IPUMS API key — set_ipums_api_key","text":"Set IPUMS API key value associated IPUMS_API_KEY environment variable. key can stored duration session future sessions. saved future sessions, added .Renviron file home directory. choose save key .Renviron, function create backup copy file modifying. function modeled census_api_key() function tidycensus. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set your IPUMS API key — set_ipums_api_key","text":"","code":"set_ipums_api_key(api_key, save = overwrite, overwrite = FALSE, unset = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set your IPUMS API key — set_ipums_api_key","text":"api_key API key associated user account. save TRUE, save key use future sessions adding .Renviron file home directory. Defaults FALSE, unless overwrite = TRUE. overwrite TRUE, overwrite existing value IPUMS_API_KEY .Renviron file provided api_key. Defaults FALSE. unset TRUE, remove existing value IPUMS_API_KEY environment .Renviron file home directory.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set your IPUMS API key — set_ipums_api_key","text":"value api_key, invisibly.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":null,"dir":"Reference","previous_headings":"","what":"Set your default IPUMS collection — set_ipums_default_collection","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"Set default IPUMS collection value associated IPUMS_DEFAULT_COLLECTION environment variable. environment variable exists, IPUMS API functions require collection specification use value IPUMS_DEFAULT_COLLECTION, unless another collection indicated. default collection can stored duration session future sessions. saved future sessions, added .Renviron file home directory. choose save key .Renviron, function create backup copy file modifying. function modeled census_api_key() function tidycensus. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"","code":"set_ipums_default_collection(   collection = NULL,   save = overwrite,   overwrite = FALSE,   unset = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"collection Character string collection set default collection. collection must currently supported IPUMS API. list codes used refer collection, see ipums_data_collections(). save TRUE, save default collection use future sessions adding .Renviron file home directory. Defaults FALSE, unless overwrite = TRUE. overwrite TRUE, overwrite existing value IPUMS_DEFAULT_COLLECTION .Renviron file provided collection. Defaults FALSE. unset TRUE, remove existing value IPUMS_DEFAULT_COLLECTION environment .Renviron file home directory.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"value collection, invisibly.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"","code":"set_ipums_default_collection(\"nhgis\") #> The environment variable IPUMS_DEFAULT_COLLECTION has been set. To save it for future sessions, set `save = TRUE`.  if (FALSE) { # Extract info will now be retrieved for the default collection: get_last_extract_info() get_extract_history()  is_extract_ready(1) get_extract_info(1)  # Equivalent to: get_extract_info(\"nhgis:1\") get_extract_info(c(\"nhgis\", 1))  # Other collections can be specified explicitly # Doing so does not alter the default collection is_extract_ready(\"usa:2\") }  # Remove the variable from the environment and .Renviron, if saved set_ipums_default_collection(unset = TRUE) #> Unsetting environment variable IPUMS_DEFAULT_COLLECTION."},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":null,"dir":"Reference","previous_headings":"","what":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"Add variable attributes ipums_ddi object data frame. provide contextual information variables values contained data columns. ipumsr data-reading functions automatically add attributes. However, data processing operations may remove attributes, may wish store data external database support attributes. cases, use function manually attach information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"","code":"set_ipums_var_attributes(   data,   var_info,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"data tibble data frame var_info ipums_ddi object data frame containing variable information. Variable information can obtained calling ipums_var_info() ipums_ddi object. var_attrs Variable attributes DDI add columns output data. Defaults available attributes.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"data, variable attributes attached","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"Attribute val_labels adds haven_labelled class corresponding value labels applicable variables. haven_labelled class, see vignette(\"semantics\", package = \"haven\"). Attribute var_label adds short summary variable's contents \"label\" attribute. label viewable RStudio Viewer. Attribute var_desc adds longer description variable's contents \"var_desc\" attribute, available. Variable information attached data column name. column names data match found var_info, attributes added.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"","code":"ddi_file <- ipums_example(\"cps_00157.xml\")  # Load metadata into `ipums_ddi` object ddi <- read_ipums_ddi(ddi_file)  # Load data cps <- read_ipums_micro(ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Data includes variable metadata: ipums_var_desc(cps$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\"  # Some operations remove attributes, even if they do not alter the data: cps$INCTOT <- ifelse(TRUE, cps$INCTOT, NA) ipums_var_desc(cps$INCTOT) #> [1] NA  # We can reattach metadata from the separate `ipums_ddi` object: cps <- set_ipums_var_attributes(cps, ddi) ipums_var_desc(cps$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Submit an extract request via the IPUMS API — submit_extract","title":"Submit an extract request via the IPUMS API — submit_extract","text":"Submit extract request via IPUMS API return ipums_extract object containing extract definition newly-assigned extract request number. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Submit an extract request via the IPUMS API — submit_extract","text":"","code":"submit_extract(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Submit an extract request via the IPUMS API — submit_extract","text":"extract ipums_extract object. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Submit an extract request via the IPUMS API — submit_extract","text":"ipums_extract object containing extract definition newly-assigned extract number submitted extract. Note unspecified extract fields may populated default values therefore change slightly upon submission.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Submit an extract request via the IPUMS API — submit_extract","text":"","code":"my_extract <- define_extract_cps(   description = \"2018-2019 CPS Data\",   samples = c(\"cps2018_05s\", \"cps2019_05s\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { # Store your submitted extract request to obtain the extract number submitted_extract <- submit_extract(my_extract)  submitted_extract$number  # This is useful for checking the extract request status get_extract_info(submitted_extract)  # You can always get the latest status, even if you forget to store the # submitted extract request object submitted_extract <- get_last_extract_info(\"cps\")  # You can also check if submitted extract is ready is_extract_ready(submitted_extract)  # Or have R check periodically and download when ready downloadable_extract <- wait_for_extract(submitted_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"Provide specifications individual variables defining IPUMS microdata extract request. Currently, additional specifications available IPUMS samples. Learn microdata extract definitions vignette(\"ipums-api-micro\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"","code":"var_spec(   name,   case_selections = NULL,   case_selection_type = NULL,   attached_characteristics = NULL,   data_quality_flags = NULL,   preselected = NULL )  samp_spec(name)"},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"name Name sample variable. case_selections character vector values given variable used select cases. Values specified exactly appear \"CODES\" tab given variable web-based extract builder, including zero-padding (e.g. see \"CODES\" tab IPUMS CPS variable EDUC). case_selection_type One \"general\" \"detailed\" indicating whether values case_selections matched general detailed codes given variable. variables detailed codes. See IPUMS USA variable RACE example variable general detailed codes. Defaults \"general\" case_selections specified. attached_characteristics Whose characteristics attached, ? Accepted values \"mother\", \"father\", \"spouse\", \"head\", combination. Specifying attached characteristics add variables extract contain values given variable specified household members (e.g. variable \"AGE_MOM\" added \"mother\" specified variable \"AGE\"). data_quality_flags Logical indicating whether include data quality flags given variable. default, data quality flags included. preselected Logical indicating whether variable preselected. needed external use.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"var_spec samp_spec object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"","code":"var1 <- var_spec(   \"SCHOOL\",   case_selections = c(\"1\", \"2\"),   data_quality_flags = TRUE )  var2 <- var_spec(   \"RACE\",   case_selections = c(\"140\", \"150\"),   case_selection_type = \"detailed\",   attached_characteristics = c(\"mother\", \"spouse\") )  # Use variable specifications in a microdata extract definition: extract <- define_extract_usa(   description = \"Example extract\",   samples = \"us2017b\",   variables = list(var1, var2) )  extract$variables$SCHOOL #> $name #> [1] \"SCHOOL\" #>  #> $case_selections #> [1] \"1\" \"2\" #>  #> $data_quality_flags #> [1] TRUE #>  #> $case_selection_type #> [1] \"general\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"        extract$variables$RACE #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"140\" \"150\" #>  #> $attached_characteristics #> [1] \"mother\" \"spouse\" #>  #> $case_selection_type #> [1] \"detailed\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Wait for an extract request to finish processing — wait_for_extract","title":"Wait for an extract request to finish processing — wait_for_extract","text":"Wait extract request finish periodically checking status via IPUMS API complete. is_extract_ready() convenience function check extract ready download without committing R session waiting extract completion. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wait for an extract request to finish processing — wait_for_extract","text":"","code":"wait_for_extract(   extract,   initial_delay_seconds = 0,   max_delay_seconds = 300,   timeout_seconds = 10800,   verbose = TRUE,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )  is_extract_ready(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wait for an extract request to finish processing — wait_for_extract","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). initial_delay_seconds Seconds wait first status check. wait time automatically increase 10 seconds successive check. max_delay_seconds Maximum interval wait status checks. wait interval reaches value, checks continue occur max_delay_seconds intervals extract complete timeout_seconds reached. Defaults 300 seconds (5 minutes). timeout_seconds Maximum total number seconds continue waiting extract throwing error. Defaults 10,800 seconds (3 hours). verbose TRUE, print status updates R console beginning wait interval upon extract completion. Defaults TRUE. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wait for an extract request to finish processing — wait_for_extract","text":"wait_for_extract(), ipums_extract object containing extract definition URLs download extract files. is_extract_ready(), logical value indicating whether extract ready download.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wait for an extract request to finish processing — wait_for_extract","text":"status submitted extract one \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\". ready download, extract must \"completed\" status. However, requests \"completed\" may still unavailable download, extracts expire removed IPUMS servers set period time (72 hours microdata collections, 2 weeks IPUMS NHGIS). Therefore, functions also check download_links field extract request determine data available download. extract expired (, completed download links longer available), functions warn extract request must resubmitted.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wait for an extract request to finish processing — wait_for_extract","text":"","code":"my_extract <- define_extract_ipumsi(   description = \"Botswana data\",   samples = c(\"bw2001a\", \"bw2011a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(my_extract)  # Wait for a particular extract request to complete by providing its # associated `ipums_extract` object: downloadable_extract <- wait_for_extract(submitted_extract)  # Or by specifying the collection and number for the extract request: downloadable_extract <- wait_for_extract(\"ipumsi:1\")  # If you have a default collection, you can use the extract number alone: set_ipums_default_collection(\"ipumsi\")  downloadable_extract <- wait_for_extract(1)  # Use `download_extract()` to download the completed extract: files <- download_extract(downloadable_extract)  # Use `is_extract_ready()` if you don't want to tie up your R session by # waiting for completion is_extract_ready(\"usa:1\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"Remove label attributes (value labels, variable labels, variable descriptions) data frame vector.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"","code":"zap_ipums_attributes(x)"},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"x data frame labelled vector (instance, data frame column)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"object type x without \"val_labels\", \"var_label\", \"var_desc\" attributes.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"","code":"cps <- read_ipums_micro(ipums_example(\"cps_00157.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  attributes(cps$YEAR) #> $label #> [1] \"Survey year\" #>  #> $var_desc #> [1] \"YEAR reports the year in which the survey was conducted.  YEARP is repeated on person records.\" #>  attributes(zap_ipums_attributes(cps$YEAR)) #> NULL  cps <- zap_ipums_attributes(cps) attributes(cps$YEAR) #> NULL attributes(cps$INCTOT) #> NULL"},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-070","dir":"Changelog","previous_headings":"","what":"ipumsr 0.7.0","title":"ipumsr 0.7.0","text":"CRAN release: 2023-10-20","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"function-retirements-0-7-0","dir":"Changelog","previous_headings":"","what":"Function retirements","title":"ipumsr 0.7.0","text":"ipumsr longer suggests raster, rgdal, sp (#23). Removing dependencies requires retirement several previously deprecated functions: read_terra_*() functions read_ipums_sp() (use read_ipums_sf() load spatial data sf format) read_ipums_codebook() (use read_nhgis_codebook() load NHGIS codebook) ipums_list_*() helper functions (instead, use ipums_list_files()) read_nhgis_sf() read_nhgis_sp() (instead, use read_ipums_sf() read_nhgis() load spatial tabular data separately, join ipums_shape_join_*() function)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"other-updates-0-7-0","dir":"Changelog","previous_headings":"","what":"Other updates","title":"ipumsr 0.7.0","text":"Fixes bug ipums_view() content display properly viewer pane (#19) RStudio now explicitly required launch files viewer pane using ipums_view(). R console users can still generate stand-alone HTML files can viewed different browser. Updates UI fixes various bugs ipums_website() (#54): Accepted project names now consistent provided ipums_ddi objects. Shorthand project names used IPUMS API also accepted. Users can now use syntax regardless whether providing ipums_ddi object project name. project argument deprecated. MacOS now supported var argument longer required. Omitting var launch URL homepage specified IPUMS project. homepage_if_missing argument now defaults FALSE. var_label argument deprecated Updates IPUMS projects listed ipums_data_collections(). Various documentation updates","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-063","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.3","title":"ipumsr 0.6.3","text":"CRAN release: 2023-09-01 ability read IPUMS DDI file contained within zip archive using read_ipums_ddi() deprecated. Users must now load DDI files providing direct path uncompressed .xml file. resolves inconsistency behavior read_ipums_micro_*() functions provided DDI file path compared ipums_ddi object created read_ipums_ddi(). ability read IPUMS files providing path containing directory deprecated. affects: read_nhgis() read_ipums_sf() read_nhgis_codebook() read_ipums_ddi() ipums_list_files() functions now require either zip archive (exception read_ipums_ddi()—see ) direct file path input. consequence read_ipums_sf() bind_multiple = TRUE requires zip archive input multiple files read combined. Documentation updates clarity","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-062","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.2","title":"ipumsr 0.6.2","text":"CRAN release: 2023-08-22 Fixes CRAN checks.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-061","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.1","title":"ipumsr 0.6.1","text":"CRAN release: 2023-08-18 Fixes CRAN checks.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-060","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.0","title":"ipumsr 0.6.0","text":"CRAN release: 2023-07-21","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-api-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS API","title":"ipumsr 0.6.0","text":"ipumsr now supports IPUMS API version 2, longer supports either beta version version 1 IPUMS API. means extract definitions saved JSON format longer compatible ipumsr via define_extract_from_json(). load extract definitions created previous API versions, two options: Rewrite extract definition represented JSON file using define_extract_*() function relevant IPUMS collection, update saved file save_extract_to_json(). Update JSON file converting snake_case fields camelCase. instance, \"data_format\" become \"dataFormat\". \"api_version\" field also need changed \"version\" set equal 2. See IPUMS developer documentation details API versioning breaking changes introduced version 2. ipums_extract object structure updated. IPUMS microdata projects, variables samples longer stored character vectors, lists. accommodates new API version 2 features (see ). Use names(x$variables) instead x$variables access variable (sample) names character vector. get_recent_extracts_info_*() functions deprecated. Additionally, tabular-formatted extract history longer supported, conversion functions extract_tbl_to_list() extract_list_to_tbl() therefore deprecated well. Use get_extract_history() browse previous extract definitions list format.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-readers-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS Readers","title":"ipumsr 0.6.0","text":"read_nhgis_sf() read_nhgis_sp() deprecated. Use read_ipums_sf() read_nhgis() load spatial tabular data, respectively. Join data ipums_shape_*_join() function. data_layer shape_layer arguments deprecated favor file_select throughout ipumsr. provides clarity intended purpose argument. Deprecated functions use original argument names remain unchanged. Support objects sp package deprecated upcoming retirement rgdal. Use read_ipums_sf() load spatial data sf object. convert Spatial* object, use sf::as_Spatial(). , see r-spatial’s post covering evolution several spatial packages. read_ipums_sf() longer defaults bind_multiple = TRUE. Individual ipums_list_*() functions moved ipums_list_files(). read_ipums_codebook() deprecated. Use read_nhgis_codebook() read NHGIS codebook files. IPUMS Terra codebook files longer supported (see ) Example files ipums_example() updated include new file names.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-terra-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS Terra","title":"ipumsr 0.6.0","text":"Support IPUMS Terra discontinued. includes deprecations read_terra_*() functions, types = \"raster\" option ipums_list_files(), read_ipums_codebook(). IPUMS Terra decommissioning, click .","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-api-0-6-0-1","dir":"Changelog","previous_headings":"New Features","what":"IPUMS API","title":"ipumsr 0.6.0","text":"Adds API support IPUMS NHGIS IPUMS International! Use define_extract_nhgis() create NHGIS extract definition. Use define_extract_ipumsi() create IPUMS International extract definition. Adds support IPUMS API version 2 features! includes: Detailed variable specifications IPUMS microdata extract definitions, including case selections, attached characteristics, data quality flags. Use var_spec() add specifications variables extract definition. Additional definition-wide parameters IPUMS microdata extracts, including data_quality_flags case_select_who. Hierarchical extracts IPUMS microdata extracts. Set data_structure = \"hiearchical\" create hierarchical extract definition. Year selection time series tables IPUMS NHGIS extract definitions. Use tst_spec() add year selections time series tables. Adds API support IPUMS NHGIS metadata Use get_metadata_nhgis() browse NHGIS data sources. Metadata available summary form datasets, data tables, time series tables, shapefiles well individual datasets, data tables, time series tables. Allows users set default IPUMS collection using set_ipums_default_collection(). Users default collection need specify IPUMS collection functions require ; instead, default collection used. convenience users rely primarily single IPUMS collection. wait_for_extract() wait intervals longer double status check. Instead, intervals increase 10 seconds subsequent check.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-readers-0-6-0-1","dir":"Changelog","previous_headings":"New Features","what":"IPUMS readers","title":"ipumsr 0.6.0","text":"Adds handling fixed-width NHGIS extracts read_nhgis(). read_nhgis_codebook() allows reading raw codebook lines (opposed extracting codebook information ipums_ddi object) setting raw = TRUE. Furthermore, var_info generated NHGIS codebook files updated include contextual information data variables. read_nhgis() now supports additional arguments refine data loading process. Users can now specify col_types manually read subset data file using vars n_max. read_nhgis() now allows users retain extra header row included NHGIS files. Set remove_extra_header = FALSE . general, information contained extra header attached data NHGIS codebook file, cases extra header may differ slightly information found codebook.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"miscellaneous-0-6-0","dir":"Changelog","previous_headings":"","what":"Miscellaneous","title":"ipumsr 0.6.0","text":"Various bug fixes Updates documentation vignettes clarity","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-052","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.2","title":"ipumsr 0.5.2","text":"CRAN release: 2022-12-09 Add progress bar downloading extracts Removed validate argument extract revision functions improved warning messages invalid extract field names used arguments. Fixed bug preventing users providing API key directly submit_extract wait_for_extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-051","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.1","title":"ipumsr 0.5.1","text":"CRAN release: 2022-09-30 Added “Rmd Reproducible Research” template, sets workflow leverages IPUMS API facilitate sharing analysis. details, see blog post. Credit @ehrlichd template blog post! Moved raster package Suggests longer installed automatically install ipumsr. raster package required need read raster extracts IPUMS Terra collection, IPUMS Terra slated decommissioned shortly.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-050","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.0","title":"ipumsr 0.5.0","text":"CRAN release: 2022-06-04 Added functions interacting IPUMS API IPUMS USA IPUMS CPS. overview new functionality, see API vignette vignette(\"ipums-api\", package = \"ipumsr\"). Special thanks @robe2037, @renae-r, @ehrlichd work API functions!","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-045","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.5","title":"ipumsr 0.4.5","text":"CRAN release: 2020-07-21 Fixed bug causing read error labeled string variables (#61, thanks @chengchou). ipumsr now always uses haven::labelled() function create labelled vectors, order maintain compatibility developments haven vctrs packages (thanks @gergness!). ipumsr now requires R 3.5 greater, line new requirements package dependency raster.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-044","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.4","title":"ipumsr 0.4.4","text":"CRAN release: 2020-06-03 Modify lbl_define() test reflect changes haven’s labelled class definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-043","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.3","title":"ipumsr 0.4.3","text":"CRAN release: 2020-04-30 Add lbl_define() function enable use lbl_relabel() syntax creating new labelled vector unlabelled one (#51, thanks @chengchou). Remove pillar printing ipumsr, getting rid pesky warning (#47). Improved documentation lower_vars argument (#56, thanks @hrecht).","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-042","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.2","title":"ipumsr 0.4.2","text":"CRAN release: 2019-06-04 Incorporate bug fix knitr 1.23 affected encoding NHGIS vignette.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-041","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.1","title":"ipumsr 0.4.1","text":"CRAN release: 2019-05-15 Remove stringr & tidyr dependencies installation little easier (#41). Fix bug pillar printing haven’s labelled objects (#43)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-040","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.0","title":"ipumsr 0.4.0","text":"CRAN release: 2019-03-08 Add read_ipums_micro_yield() read_ipums_micro_list_yield() read data ‘yields’, concept similar ‘chunks’, little flexibility. See big data vignette (vignette(\"ipums-bigdata\", package = \"ipumsr\")) details. Fixed bug trying set variable attributes value labels (#34). Fixed bug implicit decimals double counted csv files. Argument rectype_convert removed longer anything. Fixed typo vignette “ipums-geography” (#37, @jacobkap). Creates pkgdown site (#38, @jacobkap).","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-030","dir":"Changelog","previous_headings":"","what":"ipumsr 0.3.0","title":"ipumsr 0.3.0","text":"CRAN release: 2018-09-27 Lots improvements users wish use “big data” sized IPUMS extracts. See vignette using command vignette(\"ipums-bigdata\", package = \"ipusmr\") full details. now chunked versions microdata reading functions let perform functions subsets data read (read_ipums_micro_chunked() & ipumsr::read_ipums_micro_list_chunked()) new function ipums_collect() combined dplyr::collect() set_ipums_attributes() add value variable labels data collected database. reading gzipped files, ipumsr longer store full text memory. Added pillar printing labelled classes tibbles. means label print labels alongside values printed tibble (subtle grey color terminal supports ). turn feature , use command `options(“ipumsr.show_pillar_labels” = FALSE). approach reading hierarchical data files much faster. Arguments read_ipums_sp() now order read_ipums_sf() read_ipums_sf() read_ipums_sp() gain 2 new arguments vars allows pick subset variables, add_layer_var lets add variable indicating layer came . can now use inside voice variable names new argument lower_vars read_ipums_ddi() read_ipums_micro() family functions variable names lower case. ipumsr compatible versions haven newer 2.0 (maintaining compatibility earlier versions). (#31)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-020","dir":"Changelog","previous_headings":"","what":"ipumsr 0.2.0","title":"ipumsr 0.2.0","text":"CRAN release: 2018-04-20 IPUMS Terra now officially supported! Read raster, area microdata extracts using functions read_terra_raster(), read_terra_raster_list(), read_terra_area(), read_terra_area_sf(), read_terra_micro() Add support keyvar DDI, (eventually) help link data across record types hierarchical extracts. effective, requires support ipums.org website, hopefully coming soon (#25 - thanks @mpadge!) Improved main vignette instructions Safari users (#27) Fix selecting columns csv extracts (#26 - thanks forum user JCambon_OIS!) Fixes ipums_list_*() family functions.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-011","dir":"Changelog","previous_headings":"","what":"ipumsr 0.1.1","title":"ipumsr 0.1.1","text":"CRAN release: 2017-12-15 Fixed bug ipums_shape_*_join functions using integer ID columns. (#16) Allow unzipped folders Safari macOS unzips folders default (#17) lbl_relabel behavior improved labels aren’t assigned sequentially (#21)","code":""}]
+[{"path":"http://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html","id":null,"dir":"","previous_headings":"","what":"Contributor Code of Conduct","title":"Contributor Code of Conduct","text":"contributors maintainers project, pledge respect people contribute reporting issues, posting feature requests, updating documentation, submitting pull requests patches, activities. committed making participation project harassment-free experience everyone, regardless level experience, gender, gender identity expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion. Examples unacceptable behavior participants include use sexual language imagery, derogatory comments personal attacks, trolling, public private harassment, insults, unprofessional conduct. Project maintainers right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct. Project maintainers follow Code Conduct may removed project team. Instances abusive, harassing, otherwise unacceptable behavior may reported opening issue contacting one project maintainers. Code Conduct adapted Contributor Covenant (http:contributor-covenant.org), version 1.0.0, available http://contributor-covenant.org/version/1/0/0/","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":null,"dir":"","previous_headings":"","what":"Contributing","title":"Contributing","text":"Thank considering improving project! participating, agree abide code conduct.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"issues-reporting-a-problem-or-suggestion","dir":"","previous_headings":"","what":"Issues (Reporting a problem or suggestion)","title":"Contributing","text":"’ve experience problem package, suggestion , please post issues tab. space meant questions directly related R package, questions related specific extract may better answered via email ipums@umn.edu (don’t worry making mistake, know tough tell difference). Since extracts large files, posting minimal reproducible examples may difficult. Therefore, helpful can provide much detail problem possible including code error message, project extract , variables selected, file type, etc. ’ll best answer question.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"pull-requests-making-changes-to-the-package","dir":"","previous_headings":"","what":"Pull Requests (Making changes to the package)","title":"Contributing","text":"appreciate pull requests follow guidelines: Make sure tests pass (add new ones possible). best conform code style package, currently based tidyverse style guide. See styler package easily catch stylistic errors. Please add name affiliation NOTICE.txt file. Summarize changes NEWS.md file.","code":""},{"path":"http://tech.popdata.org/ipumsr/CONTRIBUTING.html","id":"basics-of-pull-requests","dir":"","previous_headings":"","what":"Basics of Pull Requests","title":"Contributing","text":"’ve never worked R package , book R Packages Hadley Wickham great resource learning mechanics building R package contributing R packages github. Additionally, ’s great primer git github specifically. meantime, ’s quick step--step guide contributing project using RStudio: don’t already RStudio Git installed, can download . Fork repo (top right corner button github website). Clone repo RStudio’s toolbar: File > New Project > Verson Control > https://github.com/*YOUR_USER_NAME*/ipumsr/. Make changes local copy. Commit changes push github webiste using RStudio’s Git pane (push using green arrow). Submit pull request, selecting “compare across forks” option. Please include short message summarizing changes.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"supported-microdata-collections","dir":"Articles","previous_headings":"","what":"Supported microdata collections","title":"Microdata API Requests","text":"IPUMS provides several data collections classified microdata. Currently, following microdata collections supported IPUMS API (shown codes used refer ipumsr): IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") API support continue added collections future. See API documentation information upcoming additions API. addition microdata projects, IPUMS API also supports IPUMS NHGIS data. details obtaining IPUMS NHGIS data using ipumsr, see NHGIS-specific vignette. getting started, ’ll load ipumsr dplyr, helpful demo:","code":"library(ipumsr) library(dplyr)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"basic-ipums-microdata-concepts","dir":"Articles","previous_headings":"","what":"Basic IPUMS microdata concepts","title":"Microdata API Requests","text":"Every microdata extract definition must contain set requested samples variables. IPUMS microdata collection, sample refers distinct combination records variables. record set values describe characteristics single unit measurement (e.g. single person single household), variables define characteristics measured. single sample can contain multiple record types (e.g. person records, household records, activity records, ), correspond different units measurement. Note usage term “sample” correspond perfectly statistical sense subset individuals population. Many IPUMS samples samples statistical sense, “full-count” samples, meaning contain individuals population.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"ipums-microdata-metadata-forthcoming","dir":"Articles","previous_headings":"","what":"IPUMS microdata metadata (forthcoming)","title":"Microdata API Requests","text":"course, request samples variables, know codes API uses refer . samples, IPUMS API uses special codes don’t appear web-based extract builder. variables, API uses variable names appear web. IPUMS API yet provide comprehensive set metadata endpoints IPUMS microdata collections, users can use get_sample_info() function identify codes used refer specific samples communicating API. values listed name column correspond code use request sample creating extract definition submitted IPUMS API. can use basic functions dplyr filter metadata samples interest. instance, find IPUMS International samples Mexico, following: IPUMS intends add support accessing variable metadata via API future. , use web-based extract builder given collection find variable names availability sample. See IPUMS API documentation links extract builder microdata collection API support. Alternatively, made extract previously web interface, can use get_extract_info() identify variable names includes. See IPUMS API introduction details.","code":"cps_samps <- get_sample_info(\"cps\")  head(cps_samps) #> # A tibble: 6 × 2 #>   name        description          #>   <chr>       <chr>                #> 1 cps1962_03s IPUMS-CPS, ASEC 1962 #> 2 cps1963_03s IPUMS-CPS, ASEC 1963 #> 3 cps1964_03s IPUMS-CPS, ASEC 1964 #> 4 cps1965_03s IPUMS-CPS, ASEC 1965 #> 5 cps1966_03s IPUMS-CPS, ASEC 1966 #> 6 cps1967_03s IPUMS-CPS, ASEC 1967 ipumsi_samps <- get_sample_info(\"ipumsi\")  ipumsi_samps %>%   filter(grepl(\"Mexico\", description)) #> # A tibble: 70 × 2 #>    name    description        #>    <chr>   <chr>              #>  1 mx1960a Mexico 1960        #>  2 mx1970a Mexico 1970        #>  3 mx1990a Mexico 1990        #>  4 mx1995a Mexico 1995        #>  5 mx2000a Mexico 2000        #>  6 mx2005a Mexico 2005        #>  7 mx2010a Mexico 2010        #>  8 mx2015a Mexico 2015        #>  9 mx2005h Mexico 2005 Q1 LFS #> 10 mx2005i Mexico 2005 Q2 LFS #> # ℹ 60 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"defining-an-ipums-microdata-extract-request","dir":"Articles","previous_headings":"","what":"Defining an IPUMS microdata extract request","title":"Microdata API Requests","text":"IPUMS collection extract definition function used specify parameters new extract request scratch. functions take form define_extract_*(). microdata collections, : IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() define extract request, can specify data included extract indicate desired format layout. microdata collection extract definition function, uses syntax. examples vignette use multiple collections, syntax demonstrate can applied supported microdata collections. simple extract definition needs contain names samples variables include request: produces ipums_extract object containing extract request specifications ready submitted IPUMS API. request variable extract definition, resulting data extract include variable requested samples available. request variable available requested samples, IPUMS API throw informative error try submit request. Beyond just specifying samples variables, several additional options available refine data requested microdata extract request.","code":"cps_ext <- define_extract_cps(   description = \"Example CPS extract\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\") )  cps_ext #> Unsubmitted IPUMS CPS extract  #> Description: Example CPS extract #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (4 total) AGE, SEX, RACE, STATEFIP"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"detailed-variable-specifications","dir":"Articles","previous_headings":"","what":"Detailed variable specifications","title":"Microdata API Requests","text":"IPUMS API supports several detailed specification options can applied individual variables extract request: case selections, attached characteristics, data quality flags. describe options depth, ’ll introduce syntax used add extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"syntax","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Syntax","title":"Microdata API Requests","text":"add options variable, need introduce var_spec() helper function. var_spec() bundles selections given variable together single object (case, var_spec object): include specification extract, simply provide variables argument extract definition. multiple variables included, pass list var_spec objects: fact, investigate original extract object , ’ll notice variables automatically converted var_spec objects, even though provided character vectors: , var_spec object additional specifications produce default data given variable. , following equivalent: specified variables converted var_spec objects, can also pass list elements var_spec objects just variable names. convenient detailed specifications subset variables: (Samples also converted samp_spec objects, currently aren’t additional specifications available samples, reason use anything character vector samples argument.) Now ’ve covered basic syntax including detailed variable specifications, can describe available options depth.","code":"var <- var_spec(\"SEX\", case_selections = \"2\")  str(var) #> List of 3 #>  $ name               : chr \"SEX\" #>  $ case_selections    : chr \"2\" #>  $ case_selection_type: chr \"general\" #>  - attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" define_extract_cps(   description = \"Case selection example\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = list(     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"AGE\", attached_characteristics = \"head\")   ) ) #> Unsubmitted IPUMS CPS extract  #> Description: Case selection example #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (2 total) SEX, AGE str(cps_ext$variables) #> List of 4 #>  $ AGE     :List of 1 #>   ..$ name: chr \"AGE\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ SEX     :List of 1 #>   ..$ name: chr \"SEX\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ RACE    :List of 1 #>   ..$ name: chr \"RACE\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" #>  $ STATEFIP:List of 1 #>   ..$ name: chr \"STATEFIP\" #>   ..- attr(*, \"class\")= chr [1:3] \"var_spec\" \"ipums_spec\" \"list\" define_extract_cps(   description = \"Example CPS extract\",   samples = \"cps2018_03s\",   variables = \"AGE\" )  define_extract_cps(   description = \"Example CPS extract\",   samples = \"cps2018_03s\",   variables = var_spec(\"AGE\") ) define_extract_cps(   description = \"Case selection example\",   samples = c(\"cps2018_03s\", \"cps2019_03s\"),   variables = list(     var_spec(\"SEX\", case_selections = \"2\"),     \"AGE\"   ) ) #> Unsubmitted IPUMS CPS extract  #> Description: Case selection example #>  #> Samples: (2 total) cps2018_03s, cps2019_03s #> Variables: (2 total) SEX, AGE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"case-selections","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Case selections","title":"Microdata API Requests","text":"Case selections allow us limit data records match particular value specified variable. instance, following specification indicate records value \"27\" (Minnesota) \"19\" (Iowa) variable \"STATEFIP\" included: variables versions general detailed coding schemes. default, case selections interpreted refer general codes: variables detailed versions, can also select detailed codes. instance, IPUMS USA variable RACE available general detailed versions. wanted limit extract persons identifying “Two major races”, specifying case selection \"8\". However, wanted limit extract persons identifying “White Chinese” “White Japanese”, need specify detailed codes \"811\" \"812\". include case selections detailed codes, set case_selection_type = \"detailed\": noted , IPUMS intends add support accessing variable metadata via API future, users able query variable coding schemes right R sessions. , use IPUMS web interface given collection find general detailed variable codes purposes case selection. See IPUMS API documentation relevant links. default, case selection person-level variables produces data file includes individuals match specified values specified variables. ’s also possible use case selection include matching individuals members households, using case_select_who parameter. case_select_who parameter must case selections extract, thus set extract level rather var_spec level. include household members matching individuals, set case_select_who = \"households\" extract definition:","code":"var <- var_spec(\"STATEFIP\", case_selections = c(\"27\", \"19\")) var$case_selection_type #> [1] \"general\" # General case selection is the default var_spec(\"RACE\", case_selections = \"8\") #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"8\" #>  #> $case_selection_type #> [1] \"general\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"  # For detailed case selection, change the `case_selection_type` var_spec(   \"RACE\",   case_selections = c(\"811\", \"812\"),   case_selection_type = \"detailed\" ) #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"811\" \"812\" #>  #> $case_selection_type #> [1] \"detailed\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" define_extract_usa(   description = \"Household level case selection\",   samples = \"us2021a\",   variables = var_spec(\"RACE\", case_selections = \"8\"),   case_select_who = \"households\" ) #> Unsubmitted IPUMS USA extract  #> Description: Household level case selection #>  #> Samples: (1 total) us2021a #> Variables: (1 total) RACE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"attached-characteristics","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Attached characteristics","title":"Microdata API Requests","text":"IPUMS allows users create variables reflect characteristics household members. , use attached_characteristics argument var_spec(). instance, attach spouse’s SEX value record: add new variable (case, SEX_SP) output data contain sex person’s spouse (record exists, value 0). Multiple attached characteristics can attached single variable: Acceptable values \"spouse\", \"mother\", \"father\", \"head\".","code":"var_spec(\"SEX\", attached_characteristics = \"spouse\") #> $name #> [1] \"SEX\" #>  #> $attached_characteristics #> [1] \"spouse\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" var_spec(\"AGE\", attached_characteristics = c(\"mother\", \"father\")) #> $name #> [1] \"AGE\" #>  #> $attached_characteristics #> [1] \"mother\" \"father\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"data-quality-flags","dir":"Articles","previous_headings":"Detailed variable specifications","what":"Data quality flags","title":"Microdata API Requests","text":"variables IPUMS edited missing, illegible, inconsistent values. Data quality flags indicate values edited allocated. include data quality flags individual variable, use data_quality_flags argument var_spec(): produce new variable (QRACE) containing data quality flag given variable. add data quality flags variables , set data_quality_flags = TRUE extract definition directly: data quality flag corresponds one variables, codes flag vary based sample. See documentation IPUMS collection interest information data quality flag codes.","code":"var_spec(\"RACE\", data_quality_flags = TRUE) #> $name #> [1] \"RACE\" #>  #> $data_quality_flags #> [1] TRUE #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\" usa_ext <- define_extract_usa(   description = \"Data quality flags\",   samples = \"us2021a\",   variables = list(     var_spec(\"RACE\", case_selections = \"8\"),     var_spec(\"AGE\")   ),   data_quality_flags = TRUE )"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"data-structure-and-file-format","dir":"Articles","previous_headings":"","what":"Data structure and file format","title":"Microdata API Requests","text":"default, microdata extract definitions request data rectangular structure fixed-width file format. Rectangular data data person records included, household-level variables converted person-level variables copying values associated household record onto household members. instead create hierarchical extract, includes separate records households persons, set data_structure = \"hierarchical\" extract definition. See IPUMS data reading vignette information loading hierarchical data R. request file format fixed-width, adjust data_format argument. Note can request data variety formats (Stata, SPSS, etc.), ipumsr’s read_ipums_micro() function supports fixed-width csv files.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-micro.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"Microdata API Requests","text":"defined extract request, can submit extract processing: workflow submitting monitoring extract request downloading files complete described IPUMS API introduction.","code":"usa_ext_submitted <- submit_extract(usa_ext)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"basic-ipums-nhgis-concepts","dir":"Articles","previous_headings":"","what":"Basic IPUMS NHGIS concepts","title":"NHGIS API Requests","text":"IPUMS NHGIS supports 3 main types data products: datasets, time series tables, shapefiles. dataset contains collection data tables correspond particular tabulated summary statistic. dataset distinguished years, geographic levels, topics covers. instance, 2021 1-year data American Community Survey (ACS) encapsulated single dataset. cases, single census product split multiple datasets. time series table longitudinal data source links comparable statistics multiple U.S. censuses single bundle. table comprised one related time series, describes single summary statistic measured multiple times given geographic level. shapefile (GIS file) contains geographic data given geographic level year. Typically, files composed polygon geometries containing boundaries census reporting areas.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"ipums-nhgis-metadata","dir":"Articles","previous_headings":"","what":"IPUMS NHGIS metadata","title":"NHGIS API Requests","text":"course, make request data sources, know codes API uses refer . Fortunately, can browse metadata available IPUMS NHGIS data sources get_metadata_nhgis(). Users can view summary metadata available data sources given data type, detailed metadata specific data source name.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"summary-metadata","dir":"Articles","previous_headings":"IPUMS NHGIS metadata","what":"Summary metadata","title":"NHGIS API Requests","text":"see summary available sources given data product type, use type argument. returns data frame containing available datasets, data tables, time series tables, shapefiles. can use basic functions dplyr filter metadata records interest. instance, wanted find data sources related agriculture 1900 Census, filter group description: values listed name column correspond code use request dataset creating extract definition submitted IPUMS API. Similarly, time series tables: metadata fields consistent across different data types, , like geographic_integration, specific time series tables: Note time series tables, metadata fields stored list columns, entry data frame: filter columns, can use map_lgl() purrr. instance, find time series tables include data particular year: details working nested data frames, see tidyr article.","code":"ds <- get_metadata_nhgis(type = \"datasets\")  head(ds) #> # A tibble: 6 × 4 #>   name      group       description                              sequence #>   <chr>     <chr>       <chr>                                       <int> #> 1 1790_cPop 1790 Census Population Data [US, States & Counties]       101 #> 2 1800_cPop 1800 Census Population Data [US, States & Counties]       201 #> 3 1810_cPop 1810 Census Population Data [US, States & Counties]       301 #> 4 1820_cPop 1820 Census Population Data [US, States & Counties]       401 #> 5 1830_cPop 1830 Census Population Data [US, States & Counties]       501 #> 6 1840_cAg  1840 Census Agriculture Data [US, States & Counties]      601 ds %>%   filter(     group == \"1900 Census\",     grepl(\"Agriculture\", description)   ) #> # A tibble: 2 × 4 #>   name       group       description                                    sequence #>   <chr>      <chr>       <chr>                                             <int> #> 1 1900_cAg   1900 Census Agriculture Data [US, States & Counties]           1401 #> 2 1900_cPHAM 1900 Census Population, Housing, Agriculture & Manufactur…     1403 tst <- get_metadata_nhgis(\"time_series_tables\") head(tst) #> # A tibble: 6 × 7 #>   name  description         geographic_integration sequence time_series years    #>   <chr> <chr>               <chr>                     <dbl> <list>      <list>   #> 1 A00   Total Population    Nominal                    100. <tibble>    <tibble> #> 2 AV0   Total Population    Nominal                    100. <tibble>    <tibble> #> 3 B78   Total Population    Nominal                    100. <tibble>    <tibble> #> 4 CL8   Total Population    Standardized to 2010       100. <tibble>    <tibble> #> 5 A57   Persons by Urban/R… Nominal                    101. <tibble>    <tibble> #> 6 A59   Persons by Urban/R… Nominal                    101. <tibble>    <tibble> #> # ℹ 1 more variable: geog_levels <list> tst$years[[1]] #> # A tibble: 24 × 3 #>    name  description sequence #>    <chr> <chr>          <int> #>  1 1790  1790               1 #>  2 1800  1800               2 #>  3 1810  1810               3 #>  4 1820  1820               4 #>  5 1830  1830               5 #>  6 1840  1840               6 #>  7 1850  1850               7 #>  8 1860  1860               8 #>  9 1870  1870              12 #> 10 1880  1880              22 #> # ℹ 14 more rows  tst$geog_levels[[1]] #> # A tibble: 2 × 3 #>   name   description   sequence #>   <chr>  <chr>            <int> #> 1 state  State                4 #> 2 county State--County       25 # Iterate over each `years` entry, identifying whether that entry # contains \"1840\" in its `name` column. tst %>%   filter(map_lgl(years, ~ \"1840\" %in% .x$name)) #> # A tibble: 2 × 7 #>   name  description        geographic_integration sequence time_series years    #>   <chr> <chr>              <chr>                     <dbl> <list>      <list>   #> 1 A00   Total Population   Nominal                    100. <tibble>    <tibble> #> 2 A08   Persons by Sex [2] Nominal                    102. <tibble>    <tibble> #> # ℹ 1 more variable: geog_levels <list>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"detailed-metadata","dir":"Articles","previous_headings":"IPUMS NHGIS metadata","what":"Detailed metadata","title":"NHGIS API Requests","text":"identified data source interest, can find detailed options providing name corresponding argument get_metadata_nhgis(): provides comprehensive list possible specifications input data source. instance, 1900_cAg dataset, 66 tables choose , 3 possible geographic levels: can also get detailed metadata individual data table. Since data tables belong specific datasets, need specified identify data table: Note name element one contains codes used interacting IPUMS API. nhgis_code element refers prefix attached individual variables output data, API throw error use extract definition. details interpreting provided metadata elements, see documentation get_metadata_nhgis(). Now identified options, can go ahead define extract request submit IPUMS API.","code":"cAg_meta <- get_metadata_nhgis(dataset = \"1900_cAg\") cAg_meta$data_tables #> # A tibble: 66 × 4 #>    name  nhgis_code description                           sequence #>    <chr> <chr>      <chr>                                    <int> #>  1 NT1   AWS        Total Population                             1 #>  2 NT2   AW3        Number of Farms                              2 #>  3 NT3   AXE        Average Farm Size                            3 #>  4 NT4   AXP        Farm Acreage                                 4 #>  5 NT5   AXZ        Farm Management                              5 #>  6 NT6   AYA        Race of Farmer                               6 #>  7 NT7   AYJ        Race of Farmer by Detailed Management        7 #>  8 NT8   AYK        Number of Farms                              8 #>  9 NT9   AYL        Farms with Buildings                         9 #> 10 NT10  AWT        Acres of Farmland                           10 #> # ℹ 56 more rows  cAg_meta$geog_levels #> # A tibble: 3 × 4 #>   name   description   has_geog_extent_selection sequence #>   <chr>  <chr>         <lgl>                        <int> #> 1 nation Nation        FALSE                            1 #> 2 state  State         FALSE                            4 #> 3 county State--County FALSE                           25 get_metadata_nhgis(dataset = \"1900_cAg\", data_table = \"NT2\") #> $name #> [1] \"NT2\" #>  #> $description #> [1] \"Number of Farms\" #>  #> $universe #> [1] \"Farms\" #>  #> $nhgis_code #> [1] \"AW3\" #>  #> $sequence #> [1] 2 #>  #> $dataset_name #> [1] \"1900_cAg\" #>  #> $variables #> # A tibble: 1 × 2 #>   description nhgis_code #>   <chr>       <chr>      #> 1 Total       AW3001"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"defining-an-ipums-nhgis-extract-request","dir":"Articles","previous_headings":"","what":"Defining an IPUMS NHGIS extract request","title":"NHGIS API Requests","text":"create extract definition containing specifications specific set IPUMS NHGIS data, use define_extract_nhgis(). define extract request, can specify data included extract indicate desired format layout.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"basic-extract-definitions","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"Basic extract definitions","title":"NHGIS API Requests","text":"Let’s say ’re interested getting state-level data number farms average size 1900_cAg dataset identified . can see metadata, data contained tables NT2 NT3:","code":"cAg_meta$data_tables #> # A tibble: 66 × 4 #>    name  nhgis_code description                           sequence #>    <chr> <chr>      <chr>                                    <int> #>  1 NT1   AWS        Total Population                             1 #>  2 NT2   AW3        Number of Farms                              2 #>  3 NT3   AXE        Average Farm Size                            3 #>  4 NT4   AXP        Farm Acreage                                 4 #>  5 NT5   AXZ        Farm Management                              5 #>  6 NT6   AYA        Race of Farmer                               6 #>  7 NT7   AYJ        Race of Farmer by Detailed Management        7 #>  8 NT8   AYK        Number of Farms                              8 #>  9 NT9   AYL        Farms with Buildings                         9 #> 10 NT10  AWT        Acres of Farmland                           10 #> # ℹ 56 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"dataset-specifications","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request > Basic extract definitions","what":"Dataset specifications","title":"NHGIS API Requests","text":"request data, need make explicit dataset specification. datasets must associated selection data tables geographic levels. can use ds_spec() helper function specify selections parameters. ds_spec() bundles selections given dataset together single object (case, ds_spec object): dataset specification can provided extract definition: Dataset specifications can also include selections years breakdown_values, available datasets.","code":"dataset <- ds_spec(   \"1900_cAg\",   data_tables = c(\"NT1\", \"NT2\"),   geog_levels = \"state\" )  str(dataset) #> List of 3 #>  $ name       : chr \"1900_cAg\" #>  $ data_tables: chr [1:2] \"NT1\" \"NT2\" #>  $ geog_levels: chr \"state\" #>  - attr(*, \"class\")= chr [1:3] \"ds_spec\" \"ipums_spec\" \"list\" nhgis_ext <- define_extract_nhgis(   description = \"Example farm data in 1900\",   datasets = dataset )  nhgis_ext #> Unsubmitted IPUMS NHGIS extract  #> Description: Example farm data in 1900 #>  #> Dataset: 1900_cAg #>   Tables: NT1, NT2 #>   Geog Levels: state"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"time-series-table-specifications","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request > Basic extract definitions","what":"Time series table specifications","title":"NHGIS API Requests","text":"Similarly, make request time series tables, use tst_spec() helper. makes tst_spec object containing time series table specification. Time series tables contain individual data tables, require geographic level selection, allow optional selection years:","code":"define_extract_nhgis(   description = \"Example time series table request\",   time_series_tables = tst_spec(     \"CW3\",     geog_levels = c(\"county\", \"tract\"),     years = c(\"1990\", \"2000\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example time series table request #>  #> Time Series Table: CW3 #>   Geog Levels: county, tract #>   Years: 1990, 2000"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"shapefile-specifications","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request > Basic extract definitions","what":"Shapefile specifications","title":"NHGIS API Requests","text":"Shapefiles don’t additional specification options, therefore can requested simply providing names:","code":"define_extract_nhgis(   description = \"Example shapefiles request\",   shapefiles = c(\"us_county_2021_tl2021\", \"us_county_2020_tl2020\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example shapefiles request #>  #> Shapefiles: us_county_2021_tl2021, us_county_2020_tl2020"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"invalid-specifications","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request > Basic extract definitions","what":"Invalid specifications","title":"NHGIS API Requests","text":"attempt define extract required specifications given dataset time series table throw error: Note still possible make invalid extract requests (instance, requesting dataset data table doesn’t exist). kind issue caught upon submission API, upon creation extract definition.","code":"define_extract_nhgis(   description = \"Invalid extract\",   datasets = ds_spec(\"1900_STF1\", data_tables = \"NP1\") ) #> Error in `validate_ipums_extract()`: #> ! Invalid `ds_spec` specification: #> ✖ `geog_levels` must not contain missing values."},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"more-complicated-extract-definitions","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"More complicated extract definitions","title":"NHGIS API Requests","text":"’s possible request data multiple datasets (time series tables) single extract definition. , pass list ds_spec tst_spec objects define_extract_nhgis(): extracts multiple datasets time series tables, may easier generate specifications independently creating extract request object. can quickly create multiple ds_spec objects iterating across specifications want include. , use purrr , also use loop: workflow also makes easy quickly update specifications future. instance, add 2017 ACS 1-year data extract definition , ’d need add \"2017_ACS1\" ds_names variable. iteration automatically add selected tables geog levels new dataset. (workflow works particularly well ACS datasets, often data table names across datasets.)","code":"define_extract_nhgis(   description = \"Slightly more complicated extract request\",   datasets = list(     ds_spec(\"2018_ACS1\", \"B01001\", \"state\"),     ds_spec(\"2019_ACS1\", \"B01001\", \"state\")   ),   shapefiles = c(\"us_state_2018_tl2018\", \"us_state_2019_tl2019\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Slightly more complicated extract request #>  #> Dataset: 2018_ACS1 #>   Tables: B01001 #>   Geog Levels: state #>  #> Dataset: 2019_ACS1 #>   Tables: B01001 #>   Geog Levels: state #>  #> Shapefiles: us_state_2018_tl2018, us_state_2019_tl2019 ds_names <- c(\"2019_ACS1\", \"2018_ACS1\") tables <- c(\"B01001\", \"B01002\") geogs <- c(\"county\", \"state\")  # For each dataset to include, create a specification with the # data tabels and geog levels indicated above datasets <- purrr::map(   ds_names,   ~ ds_spec(name = .x, data_tables = tables, geog_levels = geogs) )  nhgis_ext <- define_extract_nhgis(   description = \"Slightly more complicated extract request\",   datasets = datasets )  nhgis_ext #> Unsubmitted IPUMS NHGIS extract  #> Description: Slightly more complicated extract request #>  #> Dataset: 2019_ACS1 #>   Tables: B01001, B01002 #>   Geog Levels: county, state #>  #> Dataset: 2018_ACS1 #>   Tables: B01001, B01002 #>   Geog Levels: county, state"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"data-layout-and-file-format","dir":"Articles","previous_headings":"Defining an IPUMS NHGIS extract request","what":"Data layout and file format","title":"NHGIS API Requests","text":"IPUMS NHGIS extract definitions also support additional options modify layout format extract’s resulting data files. extracts contain time series tables, tst_layout argument indicates longitudinal data organized. extracts contain datasets multiple breakdowns data types, use breakdown_and_data_type_layout argument specify layout . common data sources contain estimates margins error, like ACS. File formats can specified data_format argument. IPUMS NHGIS currently distributes files csv fixed-width format. See documentation define_extract_nhgis() details options.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api-nhgis.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next steps","title":"NHGIS API Requests","text":"defined extract request, can submit extract processing: workflow submitting monitoring extract request downloading files complete described IPUMS API introduction.","code":"nhgis_ext_submitted <- submit_extract(nhgis_ext)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"api-availability","dir":"Articles","previous_headings":"","what":"API availability","title":"Introduction to the IPUMS API for R Users","text":"IPUMS extract support currently available via API following collections: IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS Note support includes data available via collection’s extract engine. Many collections provide additional data via direct download, products supported IPUMS API. IPUMS metadata support currently available via API following collections: IPUMS NHGIS API support continue added collections future. can check general API availability IPUMS collections ipums_data_collections(). Note tools ipumsr may necessarily support functionality currently supported IPUMS API. See API documentation information latest features.","code":"ipums_data_collections() #> # A tibble: 14 × 4 #>    collection_name     collection_type code_for_api api_support #>    <chr>               <chr>           <chr>        <lgl>       #>  1 IPUMS USA           microdata       usa          TRUE        #>  2 IPUMS CPS           microdata       cps          TRUE        #>  3 IPUMS International microdata       ipumsi       TRUE        #>  4 IPUMS NHGIS         aggregate data  nhgis        TRUE        #>  5 IPUMS IHGIS         aggregate data  ihgis        FALSE       #>  6 IPUMS ATUS          microdata       atus         FALSE       #>  7 IPUMS AHTUS         microdata       ahtus        FALSE       #>  8 IPUMS MTUS          microdata       mtus         FALSE       #>  9 IPUMS DHS           microdata       dhs          FALSE       #> 10 IPUMS PMA           microdata       pma          FALSE       #> 11 IPUMS MICS          microdata       mics         FALSE       #> 12 IPUMS NHIS          microdata       nhis         FALSE       #> 13 IPUMS MEPS          microdata       meps         FALSE       #> 14 IPUMS Higher Ed     microdata       highered     FALSE"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"set-key","dir":"Articles","previous_headings":"","what":"Set up your API key","title":"Introduction to the IPUMS API for R Users","text":"interact IPUMS API, ’ll need register access IPUMS project ’ll using. yet registered, can find links register API-supported IPUMS collections : IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS ’re registered, ’ll able create API key. default, ipumsr API functions assume key stored IPUMS_API_KEY environment variable. can also provide key directly functions, storing environment variable saves typing helps prevent inadvertently sharing key others (instance, GitHub). can save API key IPUMS_API_KEY environment variable set_ipums_api_key(). save key use future sessions, set save = TRUE. add API key .Renviron file user home directory. rest vignette assumes obtained API key stored IPUMS_API_KEY environment variable.","code":"# Save key in .Renviron for use across sessions set_ipums_api_key(\"paste-your-key-here\", save = TRUE)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"define","dir":"Articles","previous_headings":"","what":"Define an extract request","title":"Introduction to the IPUMS API for R Users","text":"IPUMS collection extract definition function used specify parameters new extract request scratch. functions take form define_extract_*(): define_extract_usa() define_extract_cps() define_extract_ipumsi() define_extract_nhgis() define extract request, can specify data included extract indicate desired format layout. instance, following defines simple IPUMS USA extract request AGE, SEX, RACE, STATEFIP, MARST variables 2018 2019 American Community Survey (ACS): exact extract definition options vary across collections, collections can used general workflow. details available extract definition options, see associated microdata NHGIS vignettes. purposes demonstrating overall workflow, continue work sample IPUMS USA extract definition created .","code":"usa_ext_def <- define_extract_usa(   description = \"USA extract for API vignette\",   samples = c(\"us2018a\", \"us2019a\"),   variables = c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\", \"MARST\") )  usa_ext_def #> Unsubmitted IPUMS USA extract  #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (5 total) AGE, SEX, RACE, STATEFIP, MARST"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"extract-request-objects","dir":"Articles","previous_headings":"Define an extract request","what":"Extract request objects","title":"Introduction to the IPUMS API for R Users","text":"define_extract_*() functions always produce ipums_extract object, can handled API functions (see ?ipums_extract). Furthermore, objects subclass particular collection associated. Many specifications given extract request object can accessed indexing object: ipums_extract objects also contain information extract request’s processing status assigned extract number, serves identifier extract request. Since extract request still unsubmitted, request number: obtain data requested extract definition, must first submit IPUMS API processing.","code":"class(usa_ext_def) #> [1] \"usa_extract\"   \"micro_extract\" \"ipums_extract\" \"list\" names(usa_ext_def$samples) #> [1] \"us2018a\" \"us2019a\"  names(usa_ext_def$variables) #> [1] \"AGE\"      \"SEX\"      \"RACE\"     \"STATEFIP\" \"MARST\"  usa_ext_def$data_format #> [1] \"fixed_width\" usa_ext_def$status #> [1] \"unsubmitted\"  usa_ext_def$number #> [1] NA"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"submit","dir":"Articles","previous_headings":"","what":"Submit an extract request","title":"Introduction to the IPUMS API for R Users","text":"submit extract definition, use submit_extract(). errors detected extract definition, submitted extract request returned assigned number status. Storing returned object can useful checking extract request’s status later. extract number stored returned object: Note fields submitted extract may automatically updated API upon submission. instance, microdata extracts, additional preselected variables may added extract even weren’t specified explicitly extract definition. forget store updated extract object returned submit_extract(), can use get_last_extract_info() helper request information recent extract request given collection:","code":"usa_ext_submitted <- submit_extract(usa_ext_def) #> Successfully submitted IPUMS USA extract number 348 usa_ext_submitted$number #> [1] 348  usa_ext_submitted$status #> [1] \"queued\" names(usa_ext_submitted$variables) #>  [1] \"YEAR\"     \"SAMPLE\"   \"SERIAL\"   \"CBSERIAL\" \"HHWT\"     \"CLUSTER\"  #>  [7] \"STATEFIP\" \"STRATA\"   \"GQ\"       \"PERNUM\"   \"PERWT\"    \"SEX\"      #> [13] \"AGE\"      \"MARST\"    \"RACE\" usa_ext_submitted <- get_last_extract_info(\"usa\")  usa_ext_submitted$number #> [1] 348"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"wait","dir":"Articles","previous_headings":"","what":"Wait for an extract request to complete","title":"Introduction to the IPUMS API for R Users","text":"may take time IPUMS servers process extract request. can ensure extract finished processing attempt download files using wait_for_extract(). polls API regularly processing completed (default, interval increases 10 seconds). returns ipums_extract object containing completed extract definition. Note wait_for_extract() tie R session extract ready download. fine strictly programmatic workflow, may frustrating working interactively, especially large extracts IPUMS servers busy. cases, can manually check whether extract ready download is_extract_ready(). long returns TRUE, able download extract’s files. detailed status check, provide extract’s collection number get_extract_info(). returns ipums_extract object reflecting requested extract definition current status. status submitted extract one \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\". Note extracts removed IPUMS servers set period time (72 hours microdata collections, 2 weeks IPUMS NHGIS). Therefore, extract \"completed\" status may still unavailable download. is_extract_ready() alert extract expired needs resubmitted. Simply use submit_extract() resubmit extract request. Note produce new extract (new extract number), even extract definition identical.","code":"usa_ext_complete <- wait_for_extract(usa_ext_submitted) #> Checking extract status... #> Waiting 10 seconds... #> Checking extract status... #> IPUMS USA extract 348 is ready to download.  usa_ext_complete$status #> [1] \"completed\"  # `download_links` should be populated if the extract is ready for download names(usa_ext_complete$download_links) #> [1] \"r_command_file\"     \"basic_codebook\"     \"data\"               #> [4] \"stata_command_file\" \"sas_command_file\"   \"spss_command_file\"  #> [7] \"ddi_codebook\" is_extract_ready(usa_ext_submitted) #> [1] TRUE usa_ext_submitted <- get_extract_info(usa_ext_submitted)  usa_ext_submitted$status #> [1] \"completed\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"download","dir":"Articles","previous_headings":"","what":"Download an extract","title":"Introduction to the IPUMS API for R Users","text":"extract finished processing, use download_extract() download extract’s data files local machine. return path downloaded file(s) required load data R. microdata collections, path DDI codebook (.xml) file, can used read associated data (contained .dat.gz file). NHGIS, path .zip archive containing requested data files /shapefiles. files produced download_extract() can passed directly reader functions provided ipumsr. instance, microdata projects: instead ’re working NHGIS extract, use read_nhgis() read_ipums_sf(). See associated vignette information loading IPUMS data R.","code":"# By default, downloads to your current working directory filepath <- download_extract(usa_ext_submitted) ddi <- read_ipums_ddi(filepath) micro_data <- read_ipums_micro(ddi)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"recent","dir":"Articles","previous_headings":"","what":"Get info on past extracts","title":"Introduction to the IPUMS API for R Users","text":"retrieve definition corresponding particular extract, provide collection number get_extract_info(). can provided either single string form \"collection:number\" length-2 vector: c(collection, number). Several API functions support syntax well. know made specific extract definition past, can’t remember exact number, can use get_extract_history() peruse recent extract requests particular collection. default, returns 10 recent extract requests list ipums_extract objects. can adjust many requests retrieve how_many argument: list ipums_extract objects, can operate API functions introduced already. can also iterate extract history find extracts particular characteristics. instance, can use purrr::keep() find extracts contain certain variable ready download: can use purrr::map() family browse certain values: regularly use single IPUMS collection, can save typing setting collection default. set_ipums_default_collection() save specified collection value IPUMS_DEFAULT_COLLECTION environment variable. default collection set, API functions use collection requests, assuming collection specified.","code":"usa_ext <- get_extract_info(\"usa:47\")  # Alternatively: usa_ext <- get_extract_info(c(\"usa\", 47))  usa_ext #> Submitted IPUMS USA extract number 47 #> Description: Test extract #>  #> Samples: (1 total) us2017b #> Variables: (8 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNUM, PERWT usa_extracts <- get_extract_history(\"usa\", how_many = 3)  usa_extracts #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,... #>  #> [[2]] #> Submitted IPUMS USA extract number 347 #> Description: Data from long ago #>  #> Samples: (1 total) us1880a #> Variables: (12 total) YEAR, SAMPLE, SERIAL, HHWT, CLUSTER, STRATA, G... #>  #> [[3]] #> Submitted IPUMS USA extract number 346 #> Description: Data from 2017 PRCS #>  #> Samples: (1 total) us2017b #> Variables: (9 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNU... is_extract_ready(usa_extracts[[2]]) #> [1] TRUE purrr::keep(usa_extracts, ~ \"MARST\" %in% names(.x$variables)) #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,...  purrr::keep(usa_extracts, is_extract_ready) #> [[1]] #> Submitted IPUMS USA extract number 348 #> Description: USA extract for API vignette #>  #> Samples: (2 total) us2018a, us2019a #> Variables: (15 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, CLUSTER,... #>  #> [[2]] #> Submitted IPUMS USA extract number 347 #> Description: Data from long ago #>  #> Samples: (1 total) us1880a #> Variables: (12 total) YEAR, SAMPLE, SERIAL, HHWT, CLUSTER, STRATA, G... #>  #> [[3]] #> Submitted IPUMS USA extract number 346 #> Description: Data from 2017 PRCS #>  #> Samples: (1 total) us2017b #> Variables: (9 total) YEAR, SAMPLE, SERIAL, CBSERIAL, HHWT, GQ, PERNU... purrr::map_chr(usa_extracts, ~ .x$description) #> [1] \"USA extract for API vignette\" \"Data from long ago\"           #> [3] \"Data from 2017 PRCS\" set_ipums_default_collection(\"usa\") # Set `save = TRUE` to store across sessions # Check the default collection: Sys.getenv(\"IPUMS_DEFAULT_COLLECTION\") #> [1] \"usa\"  # Most recent USA extract: usa_last <- get_last_extract_info()  # Request info on extract request \"usa:10\" usa_ext_10 <- get_extract_info(10)  # You can still request other collections as usual: cps_ext_10 <- get_extract_info(\"cps:10\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"share","dir":"Articles","previous_headings":"","what":"Share an extract definition","title":"Introduction to the IPUMS API for R Users","text":"One exciting feature enabled IPUMS API ability share standardized extract definition IPUMS users can create identical extract request . terms use IPUMS collections prohibit public redistribution IPUMS data, don’t prohibit sharing data extract definitions. ipumsr facilitates type sharing save_extract_as_json() define_extract_from_json(), read write ipums_extract objects standardized JSON-formatted file. point, can send usa_extract_10.json another user allow create duplicate ipums_extract object, can load submit API (long API access). Note code previous chunk assumes file saved current working directory. ’s saved somewhere else, replace \"usa_extract_10.json\" full path file.","code":"usa_ext_10 <- get_extract_info(\"usa:10\") save_extract_as_json(usa_ext_10, file = \"usa_extract_10.json\") clone_of_usa_ext_10 <- define_extract_from_json(\"usa_extract_10.json\") usa_ext_10_resubmitted <- submit_extract(clone_of_usa_ext_10)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"revise-a-previous-extract-request","dir":"Articles","previous_headings":"","what":"Revise a previous extract request","title":"Introduction to the IPUMS API for R Users","text":"Occasionally, may want modify existing extract definition (e.g. update analysis new data). easiest way add new specifications define_extract_*() code produced original extract definition. highly recommend save code somewhere can accessed updated future. However, cases original extract definition code exist (e.g. extract created using online IPUMS extract system). case, best approach view extract definition get_extract_info() create new extract definition (using define_extract_*() function) reproduces definition along desired modifications. may bit tedious complex extract definitions, one-time investment make future updates extract definition much easier. Previously, encouraged users use helpers add_to_extract() remove_from_extract() modifying extracts. now encourage re-write extract definitions improve reproducibility: extract definition code always clear stable written explicitly, rather based old extract number. two functions may retired future.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-api.html","id":"putting-it-all-together","dir":"Articles","previous_headings":"","what":"Putting it all together","title":"Introduction to the IPUMS API for R Users","text":"core API functions ipumsr compatible one another can combined single pipeline requests, downloads, reads extract data R data frame: Note NHGIS extracts contain data shapefiles, single file need selected reading, download_extract() return path file. instance, hypothetical nhgis_extract contains tabular spatial data: API workflow allow obtain IPUMS data without ever leaving R environment, also allows retain reproducible record process. makes much easier document workflow, collaborate researchers, update analysis future.","code":"usa_data <- define_extract_usa(   \"USA extract for API vignette\",   c(\"us2018a\", \"us2019a\"),   c(\"AGE\", \"SEX\", \"RACE\", \"STATEFIP\") ) %>%   submit_extract() %>%   wait_for_extract() %>%   download_extract() %>%   read_ipums_micro() nhgis_data <- download_extract(nhgis_extract) %>%   purrr::pluck(\"data\") %>% # Select only the tabular data file to read   read_nhgis()"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"Big IPUMS Data","text":"examples vignette rely helpful packages. haven’t already installed , can :","code":"# To run the full vignette, you'll also need the following packages. If they # aren't installed already, do so with: install.packages(\"biglm\") install.packages(\"DBI\") install.packages(\"RSQLite\") install.packages(\"dbplyr\") library(ipumsr) library(dplyr)"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"option-1-trade-money-for-convenience","dir":"Articles","previous_headings":"","what":"Option 1: Trade money for convenience","title":"Big IPUMS Data","text":"need work dataset ’s big RAM, simplest option get space. upgrading hardware isn’t option, paying cloud service like Amazon Microsoft Azure may worth considering. guides using R Amazon Microsoft Azure. course, option isn’t feasible users—case, updates data used analysis processing pipeline may required.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"remove-unused-data","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Remove unused data","title":"Big IPUMS Data","text":"easiest way reduce size extract drop unused samples variables. can done extract interface specific IPUMS project ’re using within R using IPUMS API (projects supported). using API, simply updated extract definition code exclude specifications longer need. , resubmit extract request download new files. See introduction IPUMS API information making extract requests ipumsr.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"select-cases","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Select cases","title":"Big IPUMS Data","text":"microdata projects, another good option reducing extract size select cases relevant research question, producing extract containing data particular subset values given variable. ’re using IPUMS API, can use var_spec() specify case selections variable extract definition. instance, following produce extract including records married women: ’re using online interface, Select Cases option available last page submitting extract request.","code":"define_extract_usa(   description = \"2013 ACS Data for Married Women\",   samples = \"us2013a\",   variables = list(     var_spec(\"MARST\", case_selections = \"1\"),     var_spec(\"SEX\", case_selections = \"2\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data for Married Women #>  #> Samples: (1 total) us2013a #> Variables: (2 total) MARST, SEX"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"use-a-sampled-subset-of-the-data","dir":"Articles","previous_headings":"Option 2: Reduce extract size","what":"Use a sampled subset of the data","title":"Big IPUMS Data","text":"Yet another option (also microdata projects) take random subsample data producing extract. Sampled data available via IPUMS API, can use Customize Sample Size option online interface . also appears final page submitting extract request. ’ve already submitted extract, can click REVISE link Download Revise Extracts page access features produce new data extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"option-3-process-the-data-in-pieces","dir":"Articles","previous_headings":"","what":"Option 3: Process the data in pieces","title":"Big IPUMS Data","text":"ipumsr provides two related options reading data sources increments: Chunked functions allow specify function called chunk data read well like chunks combined end. functions use readr framework reading chunked data. Yielded functions allow flexibility returning control user loading piece data. functions unique ipumsr fixed-width data.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"reading-chunked-data","dir":"Articles","previous_headings":"Option 3: Process the data in pieces","what":"Reading chunked data","title":"Big IPUMS Data","text":"Use read_ipums_micro_chunked() read_ipums_micro_list_chunked() read data chunks. analogous standard read_ipums_micro() read_ipums_micro_list() functions, allow specify function applied data chunk control results chunks combined. , ’ll use chunking outline solutions three common use-cases IPUMS data: tabulation, regression case selection. First, ’ll load example data. Note -sampled data example storage reasons; none output “results” reflected vignette considered legitimate!","code":"cps_ddi_file <- ipums_example(\"cps_00097.xml\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"chunked-tab","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading chunked data","what":"Chunked tabulation","title":"Big IPUMS Data","text":"Imagine wanted find percent people workforce grouped self-reported health. Since example extract small enough fit memory, load full dataset read_ipums_micro(), use lbl_relabel() relabel EMPSTAT variable binary variable, count people group. sake example, let’s imagine can store 1,000 rows memory time. case, need use chunked function, tabulate chunk, calculate counts across chunks. chunked functions apply user-defined callback function chunk. callback takes two arguments: x, represents data contained given chunk, pos, represents position chunk, expressed line input file chunk starts. Generally need use x, callback must always take arguments. case, callback implement processing steps demonstrated : Next, need create callback object, determine want combine ultimate results chunk. ipumsr provides three main types callback objects preserve variable metadata: IpumsDataFrameCallback combines results chunk together row binding together IpumsListCallback returns list one item per chunk containing results chunk. Use don’t want (can’t) immediately combine results. IpumsSideEffectCallback return results. Use callback function intended side effects (instance, saving results chunk disk). (ipumsr also provides fourth callback used running linear regression models discussed ). case, want row-bind data frames returned cb_function(), use IpumsDataFrameCallback. Callback objects R6 objects, don’t need familiar R6 use .2 initialize callback object, simply use $new(): point, ’re ready load data chunks. use read_ipums_micro_chunked() specify callback chunk size: Now data frame counts health work status within chunk. get full table, just need sum health work status one time:","code":"read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%   mutate(     HEALTH = as_factor(HEALTH),     AT_WORK = as_factor(       lbl_relabel(         EMPSTAT,         lbl(1, \"Yes\") ~ .lbl == \"At work\",         lbl(0, \"No\") ~ .lbl != \"At work\"       )     )   ) %>%   group_by(HEALTH, AT_WORK) %>%   summarize(n = n(), .groups = \"drop\") #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65 cb_function <- function(x, pos) {   x %>%     mutate(       HEALTH = as_factor(HEALTH),       AT_WORK = as_factor(         lbl_relabel(           EMPSTAT,           lbl(1, \"Yes\") ~ .lbl == \"At work\",           lbl(0, \"No\") ~ .lbl != \"At work\"         )       )     ) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = n(), .groups = \"drop\") } cb <- IpumsDataFrameCallback$new(cb_function) chunked_tabulations <- read_ipums_micro_chunked(   cps_ddi_file,   callback = cb,   chunk_size = 1000,   verbose = FALSE )  chunked_tabulations #> # A tibble: 209 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No        183 #>  2 Excellent Yes       147 #>  3 Very good No        134 #>  4 Very good Yes       217 #>  5 Good      No        111 #>  6 Good      Yes       105 #>  7 Fair      No         53 #>  8 Fair      Yes        22 #>  9 Poor      No         27 #> 10 Poor      Yes         1 #> # ℹ 199 more rows chunked_tabulations %>%   group_by(HEALTH, AT_WORK) %>%   summarize(n = sum(n), .groups = \"drop\") #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"chunked-reg","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading chunked data","what":"Chunked regression","title":"Big IPUMS Data","text":"biglm package, possible use R perform regression data large store memory . ipumsr package provides another callback designed make simple: IpumsBiglmCallback. example, ’ll conduct regression total hours worked (AHRSWORKT) outcome age (AGE) self-reported health (HEALTH) predictors. (Note intended code demonstration, ignore many complexities addressed real analyses.) running analysis full dataset, ’d first load data prepare variables analysis use model: , ’d provide model formula data lm: regression, 1,000 rows loaded time, work similar manner. First make IpumsBiglmCallback callback object. provide model formula well code used process data running regression: read data using read_ipums_micro_chunked(), passing callback just made.","code":"data <- read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%   mutate(     HEALTH = as_factor(HEALTH),     AHRSWORKT = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\"),     AT_WORK = as_factor(       lbl_relabel(         EMPSTAT,         lbl(1, \"Yes\") ~ .lbl == \"At work\",         lbl(0, \"No\") ~ .lbl != \"At work\"       )     )   ) %>%   filter(AT_WORK == \"Yes\") model <- lm(AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data = data) summary(model) #>  #> Call: #> lm(formula = AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data = data) #>  #> Residuals: #>     Min      1Q  Median      3Q     Max  #> -41.217  -4.734  -0.077   5.957  63.994  #>  #> Coefficients: #>                   Estimate Std. Error t value Pr(>|t|)     #> (Intercept)      5.2440289  1.1823985   4.435 9.31e-06 *** #> AGE              1.5868169  0.0573268  27.680  < 2e-16 *** #> I(AGE^2)        -0.0170043  0.0006568 -25.888  < 2e-16 *** #> HEALTHVery good -0.2550306  0.3276759  -0.778 0.436412     #> HEALTHGood      -0.9637395  0.3704123  -2.602 0.009289 **  #> HEALTHFair      -3.8899430  0.6629725  -5.867 4.58e-09 *** #> HEALTHPoor      -5.7597200  1.6197136  -3.556 0.000378 *** #> --- #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #>  #> Residual standard error: 12.88 on 8950 degrees of freedom #> Multiple R-squared:  0.08711,    Adjusted R-squared:  0.0865  #> F-statistic: 142.3 on 6 and 8950 DF,  p-value: < 2.2e-16 library(biglm) #> Loading required package: DBI  biglm_cb <- IpumsBiglmCallback$new(   model = AHRSWORKT ~ AGE + I(AGE^2) + HEALTH,   prep = function(x, pos) {     x %>%       mutate(         HEALTH = as_factor(HEALTH),         AHRSWORKT = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\"),         AT_WORK = as_factor(           lbl_relabel(             EMPSTAT,             lbl(1, \"Yes\") ~ .lbl == \"At work\",             lbl(0, \"No\") ~ .lbl != \"At work\"           )         )       ) %>%       filter(AT_WORK == \"Yes\")   } ) chunked_model <- read_ipums_micro_chunked(   cps_ddi_file,   callback = biglm_cb,   chunk_size = 1000,   verbose = FALSE )  summary(chunked_model) #> Large data regression model: biglm(AHRSWORKT ~ AGE + I(AGE^2) + HEALTH, data, ...) #> Sample size =  8957  #>                    Coef    (95%     CI)     SE      p #> (Intercept)      5.2440  2.8792  7.6088 1.1824 0.0000 #> AGE              1.5868  1.4722  1.7015 0.0573 0.0000 #> I(AGE^2)        -0.0170 -0.0183 -0.0157 0.0007 0.0000 #> HEALTHVery good -0.2550 -0.9104  0.4003 0.3277 0.4364 #> HEALTHGood      -0.9637 -1.7046 -0.2229 0.3704 0.0093 #> HEALTHFair      -3.8899 -5.2159 -2.5640 0.6630 0.0000 #> HEALTHPoor      -5.7597 -8.9991 -2.5203 1.6197 0.0004"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"reading-yielded-data","dir":"Articles","previous_headings":"Option 3: Process the data in pieces","what":"Reading yielded data","title":"Big IPUMS Data","text":"addition chunked reading, ipumsr also provides similar flexible “yielded” reading. read_ipums_micro_yield() read_ipums_micro_list_yield() grant freedom determining R code run chunks include ability multiple files open . Additionally, yields compatible bigglm function biglm, allows run glm models data larger memory. downside greater control yields API unique IPUMS data way work unusual R code.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"yielded-tabulation","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading yielded data","what":"Yielded tabulation","title":"Big IPUMS Data","text":"’ll compare yield chunked functions conducting tabulation example using yields. First, create yield object function read_ipums_micro_yield(): function returns R6 object contains methods reading data. important method yield() method return n rows data: Note row position data stored object, running code produce different rows data: Use cur_pos get current position data file: is_done() method tells us whether read entire file yet: preparation actual example, ’ll use reset() reset beginning data: Using yield() is_done(), can set processing pipeline. First, create empty placeholder tibble store results: , iterate data, yielding 1,000 rows time processing results chunked example. iteration end ’ve finished reading entire file.","code":"data <- read_ipums_micro_yield(cps_ddi_file, verbose = FALSE) # Return the first 10 rows of data data$yield(10) #> # A tibble: 10 × 14 #>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl> #>  1  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        1 2.01e13   308. #>  2  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        2 2.01e13   217. #>  3  2011     33 3 [Marc… 2.01e13 1 [ASEC]    308. 1 [No]        3 2.01e13   249. #>  4  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        1 2.01e13   266. #>  5  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        2 2.01e13   266. #>  6  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        3 2.01e13   265. #>  7  2011     46 3 [Marc… 2.01e13 1 [ASEC]    266. 1 [No]        4 2.01e13   296. #>  8  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        1 2.01e13   241. #>  9  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        2 2.01e13   241. #> 10  2011     64 3 [Marc… 2.01e13 1 [ASEC]    241. 1 [No]        3 2.01e13   278. #> # ℹ 4 more variables: AGE <int+lbl>, EMPSTAT <int+lbl>, AHRSWORKT <dbl+lbl>, #> #   HEALTH <int+lbl> # Return the next 10 rows of data data$yield(10) #> # A tibble: 10 × 14 #>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl> #>  1  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        1 0         373. #>  2  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        2 0         373. #>  3  2011     82 3 [Marc… 0       1 [ASEC]    373. 1 [No]        3 0         326. #>  4  2011     86 3 [Marc… 2.01e13 1 [ASEC]    554. 1 [No]        1 2.01e13   554. #>  5  2011    104 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        1 2.01e13   543. #>  6  2011    104 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        2 2.01e13   543. #>  7  2011    106 3 [Marc… 2.01e13 1 [ASEC]    543. 1 [No]        1 2.01e13   543. #>  8  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        1 2.01e13   271. #>  9  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        2 2.01e13   271. #> 10  2011    137 3 [Marc… 2.01e13 1 [ASEC]    271. 1 [No]        3 2.01e13   365. #> # ℹ 4 more variables: AGE <int+lbl>, EMPSTAT <int+lbl>, AHRSWORKT <dbl+lbl>, #> #   HEALTH <int+lbl> data$cur_pos #> [1] 21 data$is_done() #> [1] FALSE data$reset() yield_results <- tibble(   HEALTH = factor(levels = c(\"Excellent\", \"Very good\", \"Good\", \"Fair\", \"Poor\")),   AT_WORK = factor(levels = c(\"No\", \"Yes\")),   n = integer(0) ) while (!data$is_done()) {   # Yield new data and process   new <- data$yield(n = 1000) %>%     mutate(       HEALTH = as_factor(HEALTH),       AT_WORK = as_factor(         lbl_relabel(           EMPSTAT,           lbl(1, \"Yes\") ~ .lbl == \"At work\",           lbl(0, \"No\") ~ .lbl != \"At work\"         )       )     ) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = n(), .groups = \"drop\")    # Combine the new yield with the previously processed yields   yield_results <- bind_rows(yield_results, new) %>%     group_by(HEALTH, AT_WORK) %>%     summarize(n = sum(n), .groups = \"drop\") }  yield_results #> # A tibble: 10 × 3 #>    HEALTH    AT_WORK     n #>    <fct>     <fct>   <int> #>  1 Excellent No       4055 #>  2 Excellent Yes      2900 #>  3 Very good No       3133 #>  4 Very good Yes      3371 #>  5 Good      No       2480 #>  6 Good      Yes      2178 #>  7 Fair      No       1123 #>  8 Fair      Yes       443 #>  9 Poor      No        603 #> 10 Poor      Yes        65"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"yielded-glm-regression","dir":"Articles","previous_headings":"Option 3: Process the data in pieces > Reading yielded data","what":"Yielded GLM regression","title":"Big IPUMS Data","text":"One major benefits yielded reading chunked reading compatible GLM functions biglm, allowing use complicated models. run logistic regression, first need reset yield object previous example: Next make function takes single argument: reset. reset TRUE, resets data beginning. dictated bigglm biglm. create function, use reset() method yield object: Finally feed function model specification bigglm() function:","code":"data$reset() get_model_data <- function(reset) {   if (reset) {     data$reset()   } else {     yield <- data$yield(n = 1000)      if (is.null(yield)) {       return(yield)     }      yield %>%       mutate(         HEALTH = as_factor(HEALTH),         WORK30PLUS = lbl_na_if(AHRSWORKT, ~ .lbl == \"NIU (Not in universe)\") >= 30,         AT_WORK = as_factor(           lbl_relabel(             EMPSTAT,             lbl(1, \"Yes\") ~ .lbl == \"At work\",             lbl(0, \"No\") ~ .lbl != \"At work\"           )         )       ) %>%       filter(AT_WORK == \"Yes\")   } } results <- bigglm(   WORK30PLUS ~ AGE + I(AGE^2) + HEALTH,   family = binomial(link = \"logit\"),   data = get_model_data )  summary(results) #> Large data regression model: bigglm(WORK30PLUS ~ AGE + I(AGE^2) + HEALTH, family = binomial(link = \"logit\"),  #>     data = get_model_data) #> Sample size =  8957  #>                    Coef    (95%     CI)     SE      p #> (Intercept)     -4.0021 -4.4297 -3.5744 0.2138 0.0000 #> AGE              0.2714  0.2498  0.2930 0.0108 0.0000 #> I(AGE^2)        -0.0029 -0.0032 -0.0027 0.0001 0.0000 #> HEALTHVery good  0.0038 -0.1346  0.1423 0.0692 0.9557 #> HEALTHGood      -0.1129 -0.2685  0.0426 0.0778 0.1465 #> HEALTHFair      -0.6637 -0.9160 -0.4115 0.1261 0.0000 #> HEALTHPoor      -0.7879 -1.3697 -0.2062 0.2909 0.0068"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"database","dir":"Articles","previous_headings":"","what":"Option 4: Use a database","title":"Big IPUMS Data","text":"Storing data database another way work data fit memory data frame. access database remote machine, can easily select use parts data analysis. Even databases machine may provide efficient data storage use hard drive, enabling data loaded R. many different kinds databases, benefits drawbacks, database choose use specific use case. However, ’ve chosen database, two general steps: Importing data database Connecting database R R several tools support database integration, including DBI, dbplyr, sparklyr, bigrquery, others. example, ’ll use RSQLite load data -memory database. (use RSQLite easy set , likely efficient enough fully resolve issues large IPUMS data, may wise consider alternative practice.)","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"importing-data-into-the-database","dir":"Articles","previous_headings":"Option 4: Use a database","what":"Importing data into the database","title":"Big IPUMS Data","text":"rectangular extracts, likely simplest load data database CSV format, widely supported. working hierarchical extract (database software doesn’t support CSV format), can use ipumsr chunked function load data database without needing store entire dataset R. See IPUMS data reading vignette rectangular vs. hierarchical extracts.","code":"library(DBI) library(RSQLite)  # Connect to database con <- dbConnect(SQLite(), path = \":memory:\")  # Load file metadata ddi <- read_ipums_ddi(cps_ddi_file)  # Write data to database in chunks read_ipums_micro_chunked(   ddi,   readr::SideEffectChunkCallback$new(     function(x, pos) {       if (pos == 1) {         dbWriteTable(con, \"cps\", x)       } else {         dbWriteTable(con, \"cps\", x, row.names = FALSE, append = TRUE)       }     }   ),   chunk_size = 1000,   verbose = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"connecting-to-a-database-with-dbplyr","dir":"Articles","previous_headings":"Option 4: Use a database","what":"Connecting to a database with dbplyr","title":"Big IPUMS Data","text":"variety ways access data stored database. example, use dbplyr. details dbplyr, see vignette(\"dbplyr\", package = \"dbplyr\"). run simple query AGE, can use syntax use dplyr: dbplyr shows us nice preview first rows result query, data still exist database. can use dplyr::collect() load full results query current R session. However, omit variable metadata attached IPUMS data, since database doesn’t store metadata: Instead, use ipums_collect(), uses provided ipums_ddi object reattach metadata loading R environment: See value labels vignette variable metadata IPUMS data.","code":"example <- tbl(con, \"cps\")  example %>%   filter(\"AGE\" > 25) #> # Source:   SQL [?? x 14] #> # Database: sqlite 3.43.2 [] #>     YEAR SERIAL MONTH   CPSID ASECFLAG ASECWTH FOODSTMP PERNUM  CPSIDP ASECWT #>    <dbl>  <dbl> <int>   <dbl>    <int>   <dbl>    <int>  <dbl>   <dbl>  <dbl> #>  1  2011     33     3 2.01e13        1    308.        1      1 2.01e13   308. #>  2  2011     33     3 2.01e13        1    308.        1      2 2.01e13   217. #>  3  2011     33     3 2.01e13        1    308.        1      3 2.01e13   249. #>  4  2011     46     3 2.01e13        1    266.        1      1 2.01e13   266. #>  5  2011     46     3 2.01e13        1    266.        1      2 2.01e13   266. #>  6  2011     46     3 2.01e13        1    266.        1      3 2.01e13   265. #>  7  2011     46     3 2.01e13        1    266.        1      4 2.01e13   296. #>  8  2011     64     3 2.01e13        1    241.        1      1 2.01e13   241. #>  9  2011     64     3 2.01e13        1    241.        1      2 2.01e13   241. #> 10  2011     64     3 2.01e13        1    241.        1      3 2.01e13   278. #> # ℹ more rows #> # ℹ 4 more variables: AGE <int>, EMPSTAT <int>, AHRSWORKT <dbl>, HEALTH <int> data <- example %>%   filter(\"AGE\" > 25) %>%   collect()  # Variable metadata is missing ipums_val_labels(data$MONTH) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> data <- example %>%   filter(\"AGE\" > 25) %>%   ipums_collect(ddi)  ipums_val_labels(data$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-bigdata.html","id":"learning-more","dir":"Articles","previous_headings":"","what":"Learning more","title":"Big IPUMS Data","text":"Big data isn’t just problem IPUMS users, many R resources available. See documentation packages mentioned databases section information options. past blog posts articles topic, see following: Big Data R - Part Stephen Mooney’s EPIC: Epidemiologic Analysis Using R, June 2015 class Statistical Analysis Open-Source R RStudio Amazon EMR - Markus Schmidberger AWS Big Data Blog Hosting RStudio Server Azure - Colin Gillespie’s blog post using Rstudio Azure Improving DBI: Retrospect - Kirill Müller’s report R Consortium grant improve database support R","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"ipums-extract-structure","dir":"Articles","previous_headings":"","what":"IPUMS extract structure","title":"Reading IPUMS Data","text":"IPUMS extracts organized slightly differently different IPUMS projects. general, projects provide multiple files data extract. files relevant ipumsr : metadata file containing information variables included extract data One data files, depending project specifications extract files necessary properly load data R. Obviously, data files contain actual data values loaded. often fixed-width format, metadata files required correctly parse data load. Even .csv files, metadata file allows addition contextual variable information loaded data. makes much easier interpret values data variables effectively use data processing pipeline. See value labels vignette information working labels.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-microdata-extracts","dir":"Articles","previous_headings":"","what":"Reading microdata extracts","title":"Reading IPUMS Data","text":"Microdata extracts typically provide metadata DDI (.xml) file separate compressed data (.dat.gz) files. Provide path DDI file read_ipums_micro() directly load associated data file R. Note provide path DDI file, data file. ipumsr needs find DDI data files read data, DDI file includes name data file, whereas data file contains raw data. loaded data parsed correctly include variable metadata column. summary column contents, use ipums_var_info(): information also attached specific columns. can obtain attributes() using ipumsr helpers: straightforward way load microdata, ’s often advantageous independently load DDI file ipums_ddi object containing metadata: many common data processing functions side-effect removing attributes: case, can always use separate DDI metadata reference: even reattach metadata, assuming variable names still match DDI:","code":"library(ipumsr) library(dplyr)  # Example data cps_ddi_file <- ipums_example(\"cps_00157.xml\")  cps_data <- read_ipums_micro(cps_ddi_file)  head(cps_data) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375 ipums_var_info(cps_data) #> # A tibble: 8 × 4 #>   var_name var_label                                         var_desc val_labels #>   <chr>    <chr>                                             <chr>    <list>     #> 1 YEAR     Survey year                                       \"YEAR r… <tibble>   #> 2 SERIAL   Household serial number                           \"SERIAL… <tibble>   #> 3 MONTH    Month                                             \"MONTH … <tibble>   #> 4 ASECWTH  Annual Social and Economic Supplement Household … \"ASECWT… <tibble>   #> 5 STATEFIP State (FIPS code)                                 \"STATEF… <tibble>   #> 6 PERNUM   Person number in sample unit                      \"PERNUM… <tibble>   #> 7 ASECWT   Annual Social and Economic Supplement Weight      \"ASECWT… <tibble>   #> 8 INCTOT   Total personal income                             \"INCTOT… <tibble> attributes(cps_data$MONTH) #> $labels #>   January  February     March     April       May      June      July    August  #>         1         2         3         4         5         6         7         8  #> September   October  November  December  #>         9        10        11        12  #>  #> $class #> [1] \"haven_labelled\" \"vctrs_vctr\"     \"integer\"        #>  #> $label #> [1] \"Month\" #>  #> $var_desc #> [1] \"MONTH indicates the calendar month of the CPS interview.\"  ipums_val_labels(cps_data$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December cps_ddi <- read_ipums_ddi(cps_ddi_file)  cps_ddi #> An IPUMS DDI for IPUMS CPS with 8 variables #> Extract 'cps_00157.dat' created on 2023-07-10 #> User notes:  User-provided description: Reproducing cps00006 # This doesn't actually change the data... cps_data2 <- cps_data %>%   mutate(MONTH = ifelse(TRUE, MONTH, MONTH))  # but removes attributes! ipums_val_labels(cps_data2$MONTH) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> ipums_val_labels(cps_ddi, var = MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <dbl> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December cps_data2 <- set_ipums_var_attributes(cps_data2, cps_ddi)  ipums_val_labels(cps_data2$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"hierarchical-extracts","dir":"Articles","previous_headings":"Reading microdata extracts","what":"Hierarchical extracts","title":"Reading IPUMS Data","text":"IPUMS microdata can come either rectangular hierarchical format. Rectangular data transformed every row data represents type record. instance, row represent person record, household-level information person included row. (case cps_data shown example .) Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can loaded list format long format. read_ipums_micro() read long format: long format consists single tibble includes rows varying record types. example, rows record type “Household” others record type “Person”. Variables apply particular record type filled NA rows record type. read data list format, use read_ipums_micro_list(). function returns list element contains records given record type: read_ipums_micro() read_ipums_micro_list() also support partial loading selecting subset columns limited number rows. See documentation details options.","code":"cps_hier_ddi <- read_ipums_ddi(ipums_example(\"cps_00159.xml\"))  read_ipums_micro(cps_hier_ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> # A tibble: 11,053 × 9 #>    RECTYPE     YEAR SERIAL MONTH    ASECWTH STATEFIP PERNUM ASECWT INCTOT        #>    <chr+lbl>  <dbl>  <dbl> <int+lb>   <dbl> <int+lb>  <dbl>  <dbl> <dbl+lbl>     #>  1 H [Househ…  1962     80  3 [Mar…   1476. 55 [Wis…     NA    NA  NA            #>  2 P [Person…  1962     80 NA           NA  NA            1  1476.  4.88e3       #>  3 P [Person…  1962     80 NA           NA  NA            2  1471.  5.8 e3       #>  4 P [Person…  1962     80 NA           NA  NA            3  1579.  1.00e9 [Mis… #>  5 H [Househ…  1962     82  3 [Mar…   1598. 27 [Min…     NA    NA  NA            #>  6 P [Person…  1962     82 NA           NA  NA            1  1598.  1.40e4       #>  7 H [Househ…  1962     83  3 [Mar…   1707. 27 [Min…     NA    NA  NA            #>  8 P [Person…  1962     83 NA           NA  NA            1  1707.  1.66e4       #>  9 H [Househ…  1962     84  3 [Mar…   1790. 27 [Min…     NA    NA  NA            #> 10 P [Person…  1962     84 NA           NA  NA            1  1790.  6.38e3       #> # ℹ 11,043 more rows read_ipums_micro_list(cps_hier_ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> $HOUSEHOLD #> # A tibble: 3,385 × 6 #>    RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>    <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #>  1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #>  2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #>  3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #>  4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #>  5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #>  6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]      #>  7 H [Household Record]  1962    122 3 [March]   3603. 27 [Minnesota] #>  8 H [Household Record]  1962    124 3 [March]   4104. 55 [Wisconsin] #>  9 H [Household Record]  1962    125 3 [March]   2182. 55 [Wisconsin] #> 10 H [Household Record]  1962    126 3 [March]   1826. 55 [Wisconsin] #> # ℹ 3,375 more rows #>  #> $PERSON #> # A tibble: 7,668 × 6 #>    RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                           #>    <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                        #>  1 P [Person Record]  1962     80      1  1476.      4883                        #>  2 P [Person Record]  1962     80      2  1471.      5800                        #>  3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 … #>  4 P [Person Record]  1962     82      1  1598.     14015                        #>  5 P [Person Record]  1962     83      1  1707.     16552                        #>  6 P [Person Record]  1962     84      1  1790.      6375                        #>  7 P [Person Record]  1962    107      1  4355. 999999999 [N.I.U.]               #>  8 P [Person Record]  1962    107      2  1386.         0                        #>  9 P [Person Record]  1962    107      3  1629.       600                        #> 10 P [Person Record]  1962    107      4  1432. 999999999 [N.I.U.]               #> # ℹ 7,658 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-ipums-nhgis-extracts","dir":"Articles","previous_headings":"","what":"Reading IPUMS NHGIS extracts","title":"Reading IPUMS Data","text":"Unlike microdata projects, NHGIS extracts provide data metadata files bundled single .zip archive. read_nhgis() anticipates structure can read data files directly file without need manually extract files: Like microdata extracts, data include variable-level metadata, available: However, variable metadata NHGIS data slightly different provided microdata products. First, come .txt codebook file rather .xml DDI file. Codebooks can still loaded ipums_ddi object, fields apply aggregate data empty. general, NHGIS codebooks provide variable labels descriptions, along citation information. design, NHGIS codebooks human-readable, may easier interpret contents raw format. view codebook without converting ipums_ddi object, set raw = TRUE.","code":"nhgis_ex1 <- ipums_example(\"nhgis0972_csv.zip\")  nhgis_data <- read_nhgis(nhgis_ex1) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 71 Columns: 25 #> ── Column specification ──────────────────────────────────────────────────────── #> Delimiter: \",\" #> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F... #> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003... #> lgl  (3): DIVISIONA, REGIONA, STATEA #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.  nhgis_data #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA             4472 Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA             2162 Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA             1602 Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA             6282 Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA             5602 Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA             5602 Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl> attributes(nhgis_data$D6Z001) #> $label #> [1] \"Total area: 1989 to March 1990\" #>  #> $var_desc #> [1] \"Table D6Z: Year Structure Built (Universe: Housing Units)\" nhgis_cb <- read_nhgis_codebook(nhgis_ex1)  # Most useful metadata for NHGIS is for variable labels: ipums_var_info(nhgis_cb) %>%   select(var_name, var_label, var_desc) #> # A tibble: 25 × 3 #>    var_name  var_label                                                  var_desc #>    <chr>     <chr>                                                      <chr>    #>  1 GISJOIN   GIS Join Match Code                                        \"\"       #>  2 YEAR      Data File Year                                             \"\"       #>  3 STUSAB    State/US Abbreviation                                      \"\"       #>  4 CMSA      Consolidated Metropolitan Statistical Area                 \"\"       #>  5 DIVISIONA Division Code                                              \"\"       #>  6 MSA_CMSAA Metropolitan Statistical Area/Consolidated Metropolitan S… \"\"       #>  7 PMSA      Primary Metropolitan Statistical Area Name                 \"\"       #>  8 PMSAA     Primary Metropolitan Statistical Area Code                 \"\"       #>  9 REGIONA   Region Code                                                \"\"       #> 10 STATEA    State Code                                                 \"\"       #> # ℹ 15 more rows nhgis_cb <- read_nhgis_codebook(nhgis_ex1, raw = TRUE)  cat(nhgis_cb[1:20], sep = \"\\n\") #> -------------------------------------------------------------------------------- #> Codebook for NHGIS data file 'nhgis0972_ds135_1990_pmsa' #> -------------------------------------------------------------------------------- #>   #> Contents #>     - Data Summary #>     - Data Dictionary #>     - Citation and Use #>   #> Additional documentation on NHGIS data sources is available at:  #>     https://www.nhgis.org/documentation/tabular-data  #>   #> -------------------------------------------------------------------------------- #> Data Summary #> -------------------------------------------------------------------------------- #>   #> Year:             1990 #> Geographic level: Consolidated Metropolitan Statistical Area--Primary Metropolitan Statistical Area #> Dataset:          1990 Census: SSTF 9 - Housing Characteristics of New Units #>    NHGIS code:    1990_SSTF09"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"handling-multiple-files","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts","what":"Handling multiple files","title":"Reading IPUMS Data","text":"complicated NHGIS extracts include data multiple data sources, provided .zip archive contain multiple codebook data files. can view files contained extract determine case: cases, can use file_select argument indicate file load. file_select supports features tidyselect selection language. (See ?selection_language documentation features supported ipumsr.) matching codebook automatically loaded attached data: (reason codebook loaded correctly, can load separately read_nhgis_codebook(), also accepts file_select specification.) file_select also accepts full path index file load:","code":"nhgis_ex2 <- ipums_example(\"nhgis0731_csv.zip\")  ipums_list_files(nhgis_ex2) #> # A tibble: 2 × 2 #>   type  file                                           #>   <chr> <chr>                                          #> 1 data  nhgis0731_csv/nhgis0731_ds239_20185_nation.csv #> 2 data  nhgis0731_csv/nhgis0731_ts_nominal_state.csv nhgis_data2 <- read_nhgis(nhgis_ex2, file_select = contains(\"nation\")) nhgis_data3 <- read_nhgis(nhgis_ex2, file_select = contains(\"ts_nominal_state\")) attributes(nhgis_data2$AJWBE001) #> $label #> [1] \"Estimates: Total\" #>  #> $var_desc #> [1] \"Table AJWB: Sex by Age (Universe: Total population)\"  attributes(nhgis_data3$A00AA1790) #> $label #> [1] \"1790: Persons: Total\" #>  #> $var_desc #> [1] \"Table A00: Total Population\" # Match by file name read_nhgis(nhgis_ex2, file_select = \"nhgis0731_csv/nhgis0731_ds239_20185_nation.csv\")  # Match first file in extract read_nhgis(nhgis_ex2, file_select = 1)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"csv-data","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts > NHGIS data formats","what":"CSV data","title":"Reading IPUMS Data","text":"NHGIS data easily handled .csv format. read_nhgis() uses readr::read_csv() handle generation column type specifications. guessed specifications incorrect, can use col_types argument adjust. likely occur columns contain geographic codes stored numeric values:","code":"# Convert MSA codes to character format read_nhgis(   nhgis_ex1,   col_types = c(MSA_CMSAA = \"c\"),   verbose = FALSE ) #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>     <chr>     <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA        1692      Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA        4472      Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA        2162      Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA        1602      Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA        6282      Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA        5602      Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA        1122      Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA        2082      Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA        3362      Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA        5602      Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"fixed-width-data","dir":"Articles","previous_headings":"Reading IPUMS NHGIS extracts > NHGIS data formats","what":"Fixed-width data","title":"Reading IPUMS Data","text":"read_nhgis() also handles NHGIS files provided fixed-width format: correct parsing NHGIS fixed-width files driven column parsing information contained .file provided .zip archive. contains information column positions data types, also implicit decimals data. longer access .file, best resubmit /re-download extract (may also consider converting .csv format process). moved .file, provide file path do_file argument use column parsing information. Note unlike read_ipums_micro(), fixed-width files NHGIS still handled providing path data file, metadata file (.e. provide ipums_ddi object data_file argument read_nhgis()). syntactical consistency loading NHGIS .csv files.","code":"nhgis_fwf <- ipums_example(\"nhgis0730_fixed.zip\")  nhgis_fwf_data <- read_nhgis(nhgis_fwf, file_select = matches(\"ts_nominal\")) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 84 Columns: 28 #> ── Column specification ──────────────────────────────────────────────────────── #>  #> chr  (4): GISJOIN, STATE, STATEFP, STATENH #> dbl (24): A00AA1790, A00AA1800, A00AA1810, A00AA1820, A00AA1830, A00AA1840, ... #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.  nhgis_fwf_data #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"reading-spatial-data","dir":"Articles","previous_headings":"","what":"Reading spatial data","title":"Reading IPUMS Data","text":"IPUMS distributes spatial data several projects. microdata projects, spatial data distributed shapefiles dedicated geography pages separate standard extract system. Look Geography GIS link Supplemental Data section project’s website find spatial data files information. NHGIS, spatial data can obtained within extract system. Shapefiles distributed .zip archive alongside .zip archive containing extract’s tabular data (tabular data requested). Use read_ipums_sf() load spatial data sources sf object sf. read_ipums_sf() also supports loading spatial files within .zip archives file_select syntax file selection multiple internal files present. data can joined associated tabular data. preserve IPUMS attributes tabular data used join, use ipums_shape_*_join() function: NHGIS data, join code typically corresponds GISJOIN variable. However, microdata projects, variable name used geographic level tabular data may differ spatial data. Consult documentation metadata files identify correct join columns use argument join columns. joined, data include statistical spatial information along variable metadata.","code":"nhgis_shp_file <- ipums_example(\"nhgis0972_shape_small.zip\")  shp_data <- read_ipums_sf(nhgis_shp_file)  head(shp_data) #> Simple feature collection with 6 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -129888.4 ymin: -967051.1 xmax: 1948770 ymax: 751282.5 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 6 × 9 #>   PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>   <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #> 1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #> 2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #> 3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #> 4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #> 5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #> 6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]> joined_data <- ipums_shape_left_join(   nhgis_data,   shp_data,   by = \"GISJOIN\" )  attributes(joined_data$MSA_CMSAA) #> $label #> [1] \"Metropolitan Statistical Area/Consolidated Metropolitan Statistical Area Code\" #>  #> $var_desc #> [1] \"\""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums-read.html","id":"harmonized-vs--non-harmonized-data","dir":"Articles","previous_headings":"Reading spatial data","what":"Harmonized vs. non-harmonized data","title":"Reading IPUMS Data","text":"Longitudinal analysis geographic data complicated fact geographic boundaries shift time. IPUMS therefore provides multiple types spatial data: Harmonized (also called “integrated” “consistent”) files made consistent time combining geographies share area different time periods. Non-harmonized, year-specific, files represent geographies specific point time. Furthermore, NHGIS time series tables standardized statistics adjusted apply year-specific geographical boundary. using spatial data, important consult project-specific documentation ensure using appropriate boundaries research question data included analysis. always, documentation IPUMS project ’re working explain different options available.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-ipums-data","dir":"Articles","previous_headings":"","what":"Obtaining IPUMS data","title":"IPUMS Data and R","text":"IPUMS data free, require registration. New users can register particular IPUMS project clicking Register link top right project website. Users obtain IPUMS data creating submitting extract request. specifies data include resulting extract (data extract). IPUMS servers process submitted extract request, complete, users can download extract containing requested data. Extracts typically contain data metadata files. Data files typically come fixed-width (.dat) files comma-delimited (.csv) files. Metadata files contain information data file contents, including variable descriptions parsing instructions fixed-width data files. IPUMS microdata projects provide metadata DDI (.xml) files. Aggregate data projects provide metadata either .txt .csv formats. Users can submit extract requests download extracts via either IPUMS website IPUMS API. ipumsr provides set client tools interface API. Note certain IPUMS projects currently supported IPUMS API.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-data-via-an-ipums-project-website","dir":"Articles","previous_headings":"Obtaining IPUMS data","what":"Obtaining data via an IPUMS project website","title":"IPUMS Data and R","text":"create new extract request via IPUMS project website (e.g. IPUMS CPS), navigate extract interface project clicking Select Data heading project website.  project’s extract interface allows explore ’s available, find documentation data concepts sources, specify data ’d like download. data selection parameters differ across projects; see project’s documentation details available options. ’ve never created extract project ’re interested , good way learn basics watch project-specific video creating extracts hosted IPUMS Tutorials page.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"downloading-from-microdata-projects","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via an IPUMS project website","what":"Downloading from microdata projects","title":"IPUMS Data and R","text":"extract ready, click green Download button download data file. , right-click DDI link Codebook column, select Save Link … (see ).  Note browsers may display different text, option download DDI file .xml. (instance, Safari, select Download Linked File ….) ipumsr read metadata, must save file .xml format, .html format.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"downloading-from-aggregate-data-projects","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via an IPUMS project website","what":"Downloading from aggregate data projects","title":"IPUMS Data and R","text":"Aggregate data projects include data metadata together single .zip archive. download , simply click green Tables button (tabular data) /GIS Files button (spatial boundary location data) Download Data column.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"obtaining-data-via-the-ipums-api","dir":"Articles","previous_headings":"Obtaining IPUMS data","what":"Obtaining data via the IPUMS API","title":"IPUMS Data and R","text":"Users can also create submit extract requests within R using ipumsr functions interface IPUMS API. IPUMS API currently supports access extract system certain IPUMS collections.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"extract-support","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via the IPUMS API","what":"Extract support","title":"IPUMS Data and R","text":"ipumsr provides interface IPUMS extract system via IPUMS API following collections: IPUMS USA IPUMS CPS IPUMS International IPUMS NHGIS","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"metadata-support","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via the IPUMS API","what":"Metadata support","title":"IPUMS Data and R","text":"ipumsr provides access comprehensive metadata via IPUMS API following collections: IPUMS NHGIS Users can query NHGIS metadata explore available data specifying NHGIS extract requests. listing available samples provided following collections: IPUMS USA IPUMS CPS IPUMS International Increased access metadata projects progress. Currently, creating extract requests projects requires using corresponding project websites find samples variables interest obtain API identifiers use R extract definitions.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"workflow","dir":"Articles","previous_headings":"Obtaining IPUMS data > Obtaining data via the IPUMS API","what":"Workflow","title":"IPUMS Data and R","text":"identified data like request, workflow requesting downloading data via API straightforward. First, define parameters extract. available extract definition options differ IPUMS data collection. See microdata API request NHGIS API request vignettes details defining extract. Next, submit extract definition. waiting complete, can download files directly local machine without ever leave R: can also get specifications previous extract requests, even weren’t made API: See introduction IPUMS API details use ipumsr interact IPUMS API.","code":"cps_extract_request <- define_extract_cps(   description = \"2018-2019 CPS Data\",   samples = c(\"cps2018_05s\", \"cps2019_05s\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  nhgis_extract_request <- define_extract_nhgis(   description = \"NHGIS Data via IPUMS API\",   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"state\"   ) ) submitted_extract <- submit_extract(cps_extract_request) downloadable_extract <- wait_for_extract(submitted_extract) data_files <- download_extract(downloadable_extract) past_extracts <- get_extract_history(\"nhgis\")"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"reading-ipums-data","dir":"Articles","previous_headings":"","what":"Reading IPUMS data","title":"IPUMS Data and R","text":"downloaded extract, can load data R family read_*() functions ipumsr. functions expand provided readr two ways: ipumsr anticipates standard IPUMS file structures, limiting need users manually extract organize downloaded files reading. ipumsr uses extract’s metadata files automatically attach contextual information data. allows users easily identify variable names, variable descriptions, labeled data values (haven), common IPUMS files. File loading covered depth reading IPUMS data vignette.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"microdata-files","dir":"Articles","previous_headings":"Reading IPUMS data","what":"Microdata files","title":"IPUMS Data and R","text":"microdata files, use read_ipums_micro_*() family DDI (.xml) metadata file extract:","code":"cps_file <- ipums_example(\"cps_00157.xml\") cps_data <- read_ipums_micro(cps_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps_data) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"nhgis-files","dir":"Articles","previous_headings":"Reading IPUMS data","what":"NHGIS files","title":"IPUMS Data and R","text":"NHGIS files, use read_nhgis():","code":"nhgis_file <- ipums_example(\"nhgis0972_csv.zip\") nhgis_data <- read_nhgis(nhgis_file, verbose = FALSE)  head(nhgis_data) #> # A tibble: 6 × 25 #>   GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA       PMSAA REGIONA STATEA #>   <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>      <chr> <lgl>   <lgl>  #> 1 G0080    1990 OH     28    NA             1692 Akron, OH… 0080  NA      NA     #> 2 G0360    1990 CA     49    NA             4472 Anaheim--… 0360  NA      NA     #> 3 G0440    1990 MI     35    NA             2162 Ann Arbor… 0440  NA      NA     #> 4 G0620    1990 IL     14    NA             1602 Aurora--E… 0620  NA      NA     #> 5 G0845    1990 PA     78    NA             6282 Beaver Co… 0845  NA      NA     #> 6 G0875    1990 NJ     70    NA             5602 Bergen--P… 0875  NA      NA     #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"spatial-boundary-files","dir":"Articles","previous_headings":"Reading IPUMS data","what":"Spatial boundary files","title":"IPUMS Data and R","text":"ipumsr also supports reading IPUMS shapefiles (spatial boundary location files) sf format provided sf package:","code":"shp_file <- ipums_example(\"nhgis0972_shape_small.zip\") nhgis_shp <- read_ipums_sf(shp_file)  head(nhgis_shp) #> Simple feature collection with 6 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -129888.4 ymin: -967051.1 xmax: 1948770 ymax: 751282.5 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 6 × 9 #>   PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>   <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #> 1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #> 2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #> 3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #> 4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #> 5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #> 6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"ancillary-files","dir":"Articles","previous_headings":"Reading IPUMS data","what":"Ancillary files","title":"IPUMS Data and R","text":"ipumsr primarily designed read data produced IPUMS extract system. However, IPUMS distribute files, often available via direct download. many cases, can loaded ipumsr. Otherwise, files can likely handled existing data reading packages like readr (delimited files) haven (Stata, SPSS, SAS files).","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"exploring-file-metadata","dir":"Articles","previous_headings":"Reading IPUMS data","what":"Exploring file metadata","title":"IPUMS Data and R","text":"Load file’s metadata read_ipums_ddi() (microdata projects) read_nhgis_codebook() (NHGIS). provide file- variable-level metadata given data source, can used interpret data contents. Summarize variable metadata dataset using ipums_var_info(): can also get contextual details specific variables:","code":"cps_meta <- read_ipums_ddi(cps_file) nhgis_meta <- read_nhgis_codebook(nhgis_file) ipums_var_info(cps_meta) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl> ipums_var_desc(cps_data$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\"  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 75 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     1 Alabama              #>  2     2 Alaska               #>  3     4 Arizona              #>  4     5 Arkansas             #>  5     6 California           #>  6     8 Colorado             #>  7     9 Connecticut          #>  8    10 Delaware             #>  9    11 District of Columbia #> 10    12 Florida              #> # ℹ 65 more rows"},{"path":"http://tech.popdata.org/ipumsr/articles/ipums.html","id":"labelled-values","dir":"Articles","previous_headings":"Reading IPUMS data > Exploring file metadata","what":"Labelled values","title":"IPUMS Data and R","text":"ipumsr also provides family lbl_*() functions assist accessing manipulating value-level metadata included IPUMS data. allows value labels incorporated data processing pipeline. instance: See value labels vignette details.","code":"# Remove labels for values that do not appear in the data cps_data$STATEFIP <- lbl_clean(cps_data$STATEFIP)  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 5 × 2 #>     val lbl          #>   <int> <chr>        #> 1    19 Iowa         #> 2    27 Minnesota    #> 3    38 North Dakota #> 4    46 South Dakota #> 5    55 Wisconsin # Combine North and South Dakota into a single value/label pair cps_data$STATEFIP <- lbl_relabel(   cps_data$STATEFIP,   lbl(\"38_46\", \"Dakotas\") ~ grepl(\"Dakota\", .lbl) )  ipums_val_labels(cps_data$STATEFIP) #> # A tibble: 4 × 2 #>   val   lbl       #>   <chr> <chr>     #> 1 19    Iowa      #> 2 27    Minnesota #> 3 38_46 Dakotas   #> 4 55    Wisconsin"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"ipums-variable-metadata","dir":"Articles","previous_headings":"","what":"IPUMS variable metadata","title":"Value Labels in IPUMS data","text":"IPUMS data come three primary types variable-level metadata: Variable labels succinct labels serve human-readable variable names (contrast esoteric column names). Variable descriptions extended text descriptions contents variable. provide information given variable measures. Value labels link particular data values meaningful text labels. instance, HEALTH variable may data values including 1 2, actually stand-ins “Excellent” “good” health. mapping contained value-label pair includes value associated label. rest article focus value labels; variable labels descriptions, see ?ipums_var_info.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"value-labels","dir":"Articles","previous_headings":"","what":"Value labels","title":"Value Labels in IPUMS data","text":"ipumsr uses labelled class haven package handle value labels. can see column data types loading IPUMS data. Note <int+lbl> appears MONTH ASECFLAG: indicates data contained columns integers include value labels. can use function .labelled() determine variable indeed labelled: labels actually printed inline alongside data values, can easier see isolating :","code":"library(ipumsr)  ddi <- read_ipums_ddi(ipums_example(\"cps_00160.xml\")) cps <- read_ipums_micro(ddi, verbose = FALSE)  cps[, 1:5] #> # A tibble: 10,883 × 5 #>     YEAR SERIAL MONTH       CPSID ASECFLAG  #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl> #>  1  2016  24138 3 [March] 2.02e13 1 [ASEC]  #>  2  2016  24139 3 [March] 2.02e13 1 [ASEC]  #>  3  2016  24139 3 [March] 2.02e13 1 [ASEC]  #>  4  2016  24140 3 [March] 2.02e13 1 [ASEC]  #>  5  2016  24140 3 [March] 2.02e13 1 [ASEC]  #>  6  2016  24140 3 [March] 2.02e13 1 [ASEC]  #>  7  2016  24141 3 [March] 2.02e13 1 [ASEC]  #>  8  2016  24142 3 [March] 2.02e13 1 [ASEC]  #>  9  2016  24142 3 [March] 2.02e13 1 [ASEC]  #> 10  2016  24142 3 [March] 2.02e13 1 [ASEC]  #> # ℹ 10,873 more rows is.labelled(cps$STATEFIP) #> [1] TRUE # Labels print when accessing the column head(cps$MONTH) #> <labelled<integer>[6]>: Month #> [1] 3 3 3 3 3 3 #>  #> Labels: #>  value     label #>      1   January #>      2  February #>      3     March #>      4     April #>      5       May #>      6      June #>      7      July #>      8    August #>      9 September #>     10   October #>     11  November #>     12  December  # Get labels alone ipums_val_labels(cps$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"labelled-vs--factor","dir":"Articles","previous_headings":"","what":"labelled vs. factor","title":"Value Labels in IPUMS data","text":"Base R already supports linking numeric data categories using factor data type. factors may familiar, designed support efficient calculations linear models, human-readable labeling system interpreting processing data. Compared factors, labelled vectors two main properties make suitable working IPUMS data: don’t require values labelled don’t require values assigned increasing integers starting 1 Consider case AGE variable. many IPUMS products, AGE provides person’s age years, certain special values interpretations: can see, 0 value represents ages less 1, 90 99 values actually represent ranges ages. Coercing AGE factor convert values 0 1, factors always assign values starting 1: Additionally, values exist data, high values, like 85, 90, 99 mapped lower values: different representations lead inconsistencies calculated values:","code":"head(cps$AGE) #> <labelled<integer>[6]>: Age #> [1] 54 54 52 38 15 38 #>  #> Labels: #>  value               label #>      0        Under 1 year #>     90 90 (90+, 1988-2002) #>     99                 99+ cps$AGE_FACTOR <- as_factor(cps$AGE)  age0_factor <- cps[cps$AGE == 0, ]$AGE_FACTOR  # The levels look the same unique(age0_factor) #> [1] Under 1 year #> 84 Levels: Under 1 year 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ... 99+  # But the values have changed unique(as.numeric(age0_factor)) #> [1] 1 age85_factor <- cps[cps$AGE == 85, ]$AGE_FACTOR  unique(as.numeric(age85_factor)) #> [1] 82 mean(cps$AGE) #> [1] 35.0226  mean(as.numeric(cps$AGE_FACTOR)) #> [1] 35.94836"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"cautions-regarding-labelled-variables","dir":"Articles","previous_headings":"labelled vs. factor","what":"Cautions regarding labelled variables","title":"Value Labels in IPUMS data","text":"labelled variables provide benefits described , also present challenges. example, may noticed means calculated suspect: case AGE_FACTOR, values remapped conversion several inconsistent original data. case AGE, considered people 90 exactly 90, people 99 exactly 99—labelled variables don’t ensure calculations correct factors ! Furthermore, many R functions ignore value labels even actively remove data: , labelled vectors intended use throughout entire analysis process. Instead, used initial data preparation process convert raw data values meaningful. can converted variable types (often factors) analysis. Unfortunately, isn’t process can typically automated, depends primarily research questions data used address. However, ipumsr provides several functions manipulate value labels make process easier.","code":"ipums_val_labels(cps$HEALTH) #> # A tibble: 5 × 2 #>     val lbl       #>   <int> <chr>     #> 1     1 Excellent #> 2     2 Very good #> 3     3 Good      #> 4     4 Fair      #> 5     5 Poor  HEALTH2 <- ifelse(cps$HEALTH > 3, 3, cps$HEALTH) ipums_val_labels(HEALTH2) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr>"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"convert-labelled-values-to-other-data-types","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Convert labelled values to other data types","title":"Value Labels in IPUMS data","text":"Use as_factor() labels correct categories need manipulation. instance, MONTH already sensible categories, can convert factor right away: as_factor() can also convert labelled variables data frame factors . prefer work factors, can conversion immediately loading data, prepare variables using techniques use factors. prefer handle variables labelled format, can use lbl_* helpers first, call as_factor() entire data frame. variables may appropriate use numeric values rather factors. cases, can simply remove labels zap_labels(). INCTOT, measures personal income, fits description: Note labelled values generally intended interpreted numeric values, zap_labels() used labels properly handled. example, INCTOT, labelled values used identify missing values encoded large numbers: Treating legitimate observations significantly skew calculations variable first converted NA.","code":"ipums_val_labels(cps$MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <int> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December  cps$MONTH <- as_factor(cps$MONTH) cps <- as_factor(cps)  # ... further preparation of variables as factors inctot_num <- zap_labels(cps$INCTOT)  typeof(inctot_num) #> [1] \"double\"  ipums_val_labels(inctot_num) #> # A tibble: 0 × 2 #> # ℹ 2 variables: val <dbl>, lbl <chr> ipums_val_labels(cps$INCTOT) #> # A tibble: 2 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) #> 2 999999999 N.I.U."},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"create-missing-values-based-on-value-labels","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Create missing values based on value labels","title":"Value Labels in IPUMS data","text":"Many IPUMS variables use labelled values identify missing data. allows detail certain observations missing available values loaded NA. saw INCTOT, value labels used identify two types missing data: legitimately missing universe observations. convert one labelled values NA, use lbl_na_if(). use lbl_na_if(), must supply function handle conversion. function take value-label pair input output TRUE pairs whose values converted NA.","code":"ipums_val_labels(cps$INCTOT) #> # A tibble: 2 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) #> 2 999999999 N.I.U."},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"syntax","dir":"Articles","previous_headings":"Prepping data with value labels > Create missing values based on value labels","what":"Syntax for value label functions","title":"Value Labels in IPUMS data","text":"Several lbl_* helper functions, including lbl_na_if(), require user-defined function handle recoding value-label pairs. ipumsr provides syntax easily reference values labels user-defined function: .val argument references values .lbl argument references labels instance, convert values equal 999999999 NA, can provide function uses .val argument: achieve result referencing labels : can also specify function using one-sided formula: Note .val refers labelled values—unlabelled values affected: convert unlabelled values NA, use dplyr::na_if() instead.","code":"# Convert to NA using function that returns TRUE for all labelled values equal to 99999999 inctot_na <- lbl_na_if(   cps$INCTOT,   function(.val, .lbl) .val == 999999999 )  # All 99999999 values have been converted to NA any(inctot_na == 999999999, na.rm = TRUE) #> [1] FALSE  # And the label has been removed: ipums_val_labels(inctot_na) #> # A tibble: 1 × 2 #>         val lbl                       #>       <dbl> <chr>                     #> 1 999999998 Missing. (1962-1964 only) # Convert to NA for labels that contain \"N.I.U.\" inctot_na2 <- lbl_na_if(   cps$INCTOT,   function(.val, .lbl) grepl(\"N.I.U.\", .lbl) )  # Same result all(inctot_na2 == inctot_na, na.rm = TRUE) #> [1] TRUE lbl_na_if(cps$INCTOT, ~ .val == 999999999) x <- lbl_na_if(cps$INCTOT, ~ .val >= 0)  # Unlabelled values greater than the cutoff are still present: length(which(x > 0)) #> [1] 7501"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"relabel-values","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Relabel values","title":"Value Labels in IPUMS data","text":"lbl_relabel() can used create new value-label pairs, often recombine existing labels general categories. takes two-sided formula handle relabeling: left-hand side, use lbl() helper define new value-label pair. right-hand side, provide function returns TRUE value-label pairs relabelled new value-label pair left-hand side. function uses .val .lbl syntax mentioned refer values labels, respectively. instance, reclassify categories MIGRATE1 migration within state captured single category: Many IPUMS variables include detailed labels grouped together general categories. often encoded multi-digit values, starting digit refers larger category. instance, EDUC variable contains categories individual grades well categories multiple grade groups: use lbl_relabel() collapse detailed categories general ones, define new value labels categories. Instead, use lbl_collapse(). lbl_collapse() uses function takes .val .lbl arguments returns new value input value assigned . label lowest original value used collapsed group. group tens digit, use integer division operator %/%:","code":"ipums_val_labels(cps$MIGRATE1) #> # A tibble: 8 × 2 #>     val lbl                                  #>   <int> <chr>                                #> 1     0 NIU                                  #> 2     1 Same house                           #> 3     2 Different house, place not reported  #> 4     3 Moved within county                  #> 5     4 Moved within state, different county #> 6     5 Moved between states                 #> 7     6 Abroad                               #> 8     9 Unknown  cps$MIGRATE1 <- lbl_relabel(   cps$MIGRATE1,   lbl(0, \"NIU / Missing / Unknown\") ~ .val %in% c(0, 2, 9),   lbl(1, \"Stayed in state\") ~ .val %in% c(1, 3, 4) )  ipums_val_labels(cps$MIGRATE1) #> # A tibble: 4 × 2 #>     val lbl                     #>   <dbl> <chr>                   #> 1     0 NIU / Missing / Unknown #> 2     1 Stayed in state         #> 3     5 Moved between states    #> 4     6 Abroad head(ipums_val_labels(cps$EDUC), 15) #> # A tibble: 15 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     0 NIU or no schooling  #>  2     1 NIU or blank         #>  3     2 None or preschool    #>  4    10 Grades 1, 2, 3, or 4 #>  5    11 Grade 1              #>  6    12 Grade 2              #>  7    13 Grade 3              #>  8    14 Grade 4              #>  9    20 Grades 5 or 6        #> 10    21 Grade 5              #> 11    22 Grade 6              #> 12    30 Grades 7 or 8        #> 13    31 Grade 7              #> 14    32 Grade 8              #> 15    40 Grade 9 # %/% refers to integer division, which divides but discards the remainder 10 %/% 10 #> [1] 1 11 %/% 10 #> [1] 1  # Convert to groups by tens digit cps$EDUC2 <- lbl_collapse(cps$EDUC, ~ .val %/% 10)  ipums_val_labels(cps$EDUC2) #> # A tibble: 14 × 2 #>      val lbl                  #>    <dbl> <chr>                #>  1     0 NIU or no schooling  #>  2     1 Grades 1, 2, 3, or 4 #>  3     2 Grades 5 or 6        #>  4     3 Grades 7 or 8        #>  5     4 Grade 9              #>  6     5 Grade 10             #>  7     6 Grade 11             #>  8     7 Grade 12             #>  9     8 1 year of college    #> 10     9 2 years of college   #> 11    10 3 years of college   #> 12    11 4 years of college   #> 13    12 5+ years of college  #> 14    99 Missing/Unknown"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"relabeling-caveats","dir":"Articles","previous_headings":"Prepping data with value labels > Relabel values","what":"Relabeling caveats","title":"Value Labels in IPUMS data","text":"always worth checking new labels make sense based research question. instance, example, \"12th grade, diploma\" \"High school diploma equivalent\" collapsed single group values 70s. may suitable purposes, control, best use lbl_relabel(). Note lbl_relabel() lbl_collapse() operate labelled values, therefore designed use fully labelled vectors. , attempt relabel vector unlabelled values, converted NA. avoid , can add labels values using lbl_add_vals() relabeling (see ). general, shouldn’t necessary, partially-labelled vectors include labels ancillary information, like missing value indicators. can typically handled helpers, like lbl_na_if(), without requiring relabeling.","code":""},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"remove-unused-value-labels","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Remove unused value labels","title":"Value Labels in IPUMS data","text":"variables may contain labels values don’t appear data. Unused levels still appear factor representations variables, often beneficial remove lbl_clean():","code":"ipums_val_labels(cps$STATEFIP) #> # A tibble: 75 × 2 #>      val lbl                  #>    <int> <chr>                #>  1     1 Alabama              #>  2     2 Alaska               #>  3     4 Arizona              #>  4     5 Arkansas             #>  5     6 California           #>  6     8 Colorado             #>  7     9 Connecticut          #>  8    10 Delaware             #>  9    11 District of Columbia #> 10    12 Florida              #> # ℹ 65 more rows  ipums_val_labels(lbl_clean(cps$STATEFIP)) #> # A tibble: 5 × 2 #>     val lbl          #>   <int> <chr>        #> 1    19 Iowa         #> 2    27 Minnesota    #> 3    38 North Dakota #> 4    46 South Dakota #> 5    55 Wisconsin"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"lbl_add","dir":"Articles","previous_headings":"Prepping data with value labels","what":"Add new labels","title":"Value Labels in IPUMS data","text":"mentioned , value labels intended used intermediate data structure preparing newly-imported data. , ’re likely need add new labels, , use lbl_add(), lbl_add_vals(), lbl_define(). lbl_add() takes arbitrary number lbl() placeholders added given labelled vector: lbl_add_vals() adds labels unlabelled values labelled vector optional labeller function. (can useful wish operate partially labelled vector function requires labelled input, like lbl_relabel().) lbl_define() makes labelled vector unlabelled one. Use syntax used lbl_relabel() define new labels based unlabelled values: labelled variables appropriately converted factors numeric values, data can move forward processing pipeline.","code":"x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") ) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU # `.` refers to each label value lbl_add_vals(x, ~ paste0(\"$\", .)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU age <- c(10, 12, 16, 18, 20, 22, 25, 27)  # Group age values into two label groups. # Values not captured by the right hand side functions remain unlabelled lbl_define(   age,   lbl(1, \"Pre-college age\") ~ .val < 18,   lbl(2, \"College age\") ~ .val >= 18 & .val <= 22 ) #> <labelled<double>[8]> #> [1]  1  1  1  2  2  2 25 27 #>  #> Labels: #>  value           label #>      1 Pre-college age #>      2     College age"},{"path":"http://tech.popdata.org/ipumsr/articles/value-labels.html","id":"other-resources","dir":"Articles","previous_headings":"","what":"Other resources","title":"Value Labels in IPUMS data","text":"haven package, underlies ipumsr’s handling value labels, provides details labelled class. See vignette(\"semantics\", package = \"haven\"). labelled package provides methods manipulating value labels, overlap provided ipumsr. questionr package includes functions exploring labelled variables. particular, functions describe, freq lookfor print console information variable using value labels. Finally, foreign prettyR packages don’t use labelled class, provide similar functionality handling value labels, adapted use labelled vectors.","code":""},{"path":"http://tech.popdata.org/ipumsr/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Greg Freedman Ellis. Author. Derek Burk. Author, maintainer. Finn Roberts. Author. Joe Grover. Contributor. Dan Ehrlich. Contributor. Renae Rodgers. Contributor. Institute Social Research Data Innovation. Copyright holder.","code":""},{"path":"http://tech.popdata.org/ipumsr/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Greg Freedman Ellis, Derek Burk, Finn Roberts (2024). ipumsr: R Interface Downloading, Reading, Handling IPUMS Data. https://tech.popdata.org/ipumsr/, https://github.com/ipums/ipumsr, https://www.ipums.org.","code":"@Manual{,   title = {ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data},   author = {{Greg Freedman Ellis} and {Derek Burk} and {Finn Roberts}},   year = {2024},   note = {https://tech.popdata.org/ipumsr/, https://github.com/ipums/ipumsr, https://www.ipums.org}, }"},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"ipumsr-","dir":"","previous_headings":"","what":"An R Interface for Downloading, Reading, and Handling IPUMS Data","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"ipumsr provides R interface handling IPUMS data, allowing users : Easily read files downloaded IPUMS extract system Request data, download files, get metadata certain IPUMS collections Interpret process data using contextual information included many IPUMS files","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"install package CRAN, use install development version package, use","code":"install.packages(\"ipumsr\") remotes::install_github(\"ipums/ipumsr\")"},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"what-is-ipums","dir":"","previous_headings":"","what":"What is IPUMS?","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"IPUMS world’s largest publicly available population database, providing census survey data around world integrated across time space. IPUMS integration documentation make easy study change, conduct comparative research, merge information across data types, analyze individuals within family community context. Data services available free charge. IPUMS consists multiple projects, collections, provide different data products. Microdata projects distribute data individual survey units, like people households. Aggregate data projects distribute summary tables aggregate statistics particular geographic units along corresponding GIS mapping files. ipumsr supports different levels functionality IPUMS project, summarized table . ipumsr uses IPUMS API submit data requests, download data extracts, get metadata, scope functionality generally corresponds available via API. IPUMS team extends API support functionality projects, aim extend ipumsr capabilities accordingly.","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"getting-started","dir":"","previous_headings":"","what":"Getting started","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"’re new IPUMS data, learn ’s available IPUMS Projects Overview. , see vignette(\"ipums\") overview obtain IPUMS data. package vignettes best place explore ipumsr offer: read IPUMS data extracts R, see vignette(\"ipums-read\"). interact IPUMS extract metadata system via IPUMS API, see vignette(\"ipums-api\"). additional details microdata NHGIS extract requests, see vignette(\"ipums-api-micro\") vignette(\"ipums-api-nhgis\"). work labelled values IPUMS data, see vignette(\"value-labels\"). techniques working large data extracts, see vignette(\"ipums-bigdata\"). IPUMS support website also houses many project-specific R-based training exercises. However, note exercises may date ipumsr’s current functionality.","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"related-work","dir":"","previous_headings":"","what":"Related work","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"survey srvyr packages can help incorporate IPUMS survey weights analysis various survey designs. See haven information value labels labelled vectors hipread underlies hierarchical file reading functions ipumsr","code":""},{"path":"http://tech.popdata.org/ipumsr/index.html","id":"getting-help--contributing","dir":"","previous_headings":"","what":"Getting help + contributing","title":"An R Interface for Downloading, Reading, and Handling IPUMS Data","text":"greatly appreciate feedback development contributions. Please submit bug reports, pull requests, suggestions GitHub. contributing, please sure read Contributing Guidelines Code Conduct. general questions concerns IPUMS data, check user forum send email ipums@umn.edu.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing IPUMS extract definition — add_to_extract","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"Add replace values existing ipums_extract object. function S3 generic whose behavior depend subclass (.e. collection) extract modified. add IPUMS Microdata extract definition, click . includes: IPUMS USA IPUMS CPS IPUMS International add IPUMS NHGIS extract definition, click function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"","code":"add_to_extract(extract, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"extract ipums_extract object. ... Additional arguments specifying extract fields values add extract definition. arguments available collection's define_extract_*() function can passed add_to_extract().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"object class extract containing modified extract definition","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing IPUMS extract definition — add_to_extract","text":"","code":"# Microdata extracts usa_extract <- define_extract_usa(   description = \"2013 ACS Data\",   samples = \"us2013a\",   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  # Add new samples and variables add_to_extract(   usa_extract,   samples = c(\"us2014a\", \"us2015a\"),   variables = var_spec(\"MARST\", data_quality_flags = TRUE) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (3 total) us2013a, us2014a, us2015a #> Variables: (4 total) SEX, AGE, YEAR, MARST  # Update existing variables add_to_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"1\") ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (1 total) us2013a #> Variables: (3 total) SEX, AGE, YEAR  # Modify/add multiple variables add_to_extract(   usa_extract,   variables = list(     var_spec(\"SEX\", case_selections = \"1\"),     var_spec(\"RELATE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (1 total) us2013a #> Variables: (4 total) SEX, AGE, YEAR, RELATE  # NHGIS extracts nhgis_extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\"),     geog_levels = \"county\"   ) )  # Add a new dataset or time series table add_to_extract(   nhgis_extract,   datasets = ds_spec(     \"1980_STF1\",     data_tables = \"NT1A\",     geog_levels = c(\"county\", \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Dataset: 1980_STF1 #>   Tables: NT1A #>   Geog Levels: county, state  # Update existing datasets/time series tables add_to_extract(   nhgis_extract,   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"state\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county, state  # Modify/add multiple datasets or time series tables add_to_extract(   nhgis_extract,   time_series_tables = list(     tst_spec(\"CW3\", geog_levels = \"state\"),     tst_spec(\"CW4\", geog_levels = \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state #>  #> Time Series Table: CW4 #>   Geog Levels: state  # Values that can only take a single value are replaced add_to_extract(nhgis_extract, data_format = \"fixed_width\")$data_format #> [1] \"fixed_width\""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"Add new values IPUMS NHGIS extract definition. fields optional, omitted, unchanged. Supplying value fields take single value, description data_format, replace existing value supplied value. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_nhgis(). complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values IPUMS NHGIS extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"","code":"# S3 method for nhgis_extract add_to_extract(   extract,   description = NULL,   datasets = NULL,   time_series_tables = NULL,   geographic_extents = NULL,   shapefiles = NULL,   breakdown_and_data_type_layout = NULL,   tst_layout = NULL,   data_format = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"extract ipums_extract object. description Description extract. datasets List ds_spec objects created ds_spec() containing specifications datasets include extract request. See examples. dataset already exists extract, new specifications added already exist dataset. time_series_tables List tst_spec objects created tst_spec() containing specifications time series tables include extract request. time series table already exists extract, new specifications added already exist time series table. geographic_extents Vector geographic extents use datasets extract definition (instance, obtain data within particular state). Use \"*\" select available extents. Required datasets included extract definition include geog_levels require extent selection. See get_metadata_nhgis() determine geographic level requires extent selection. time writing, NHGIS supports extent selection blocks block groups. shapefiles Names shapefiles include extract request. breakdown_and_data_type_layout desired layout datasets multiple data types breakdown values. \"single_file\" (default) keeps data types breakdown values one file \"separate_files\" splits data type breakdown value file Required datasets included extract definition consist multiple data types (instance, estimates margins error) multiple breakdown values specified. See get_metadata_nhgis() determine whether requested dataset multiple data types. tst_layout desired layout time_series_tables included extract definition. \"time_by_column_layout\" (wide format, default): rows correspond geographic units, columns correspond different times time series \"time_by_row_layout\" (long format): rows correspond single geographic unit single point time \"time_by_file_layout\": data different times provided separate files Required extract definition includes time_series_tables. data_format desired format extract data file. \"csv_no_header\" (default) includes minimal header first row \"csv_header\" includes second, descriptive header row. \"fixed_width\" provides data fixed width format Note default, read_nhgis() removes additional header row \"csv_header\" files. Required extract definition includes datasets time_series_tables. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"modified nhgis_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"extract fields take single value, add_to_extract() replace existing value new value provided field. necessary first remove value using remove_from_extract(). supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract.nhgis_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing IPUMS NHGIS extract definition — add_to_extract.nhgis_extract","text":"","code":"extract <- define_extract_nhgis(   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"county\") )  # Add a new dataset or time series table to the extract add_to_extract(   extract,   datasets = ds_spec(\"1990_STF2a\", \"NPA1\", \"county\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Dataset: 1990_STF2a #>   Tables: NPA1 #>   Geog Levels: county  add_to_extract(   extract,   time_series_tables = tst_spec(\"A00\", \"state\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: A00 #>   Geog Levels: state  # If a dataset/time series table name already exists in the definition # its specification will be modified by adding the new specifications to # the existing ones add_to_extract(   extract,   datasets = ds_spec(\"1990_STF1\", \"NP4\", \"nation\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP4 #>   Geog Levels: county, nation  # You can add new datasets and modify existing ones simultaneously by # providing a list of `ds_spec` objects add_to_extract(   extract,   datasets = list(     ds_spec(\"1990_STF1\", \"NP4\", \"nation\"),     ds_spec(\"1990_STF2a\", \"NPA1\", \"county\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP4 #>   Geog Levels: county, nation #>  #> Dataset: 1990_STF2a #>   Tables: NPA1 #>   Geog Levels: county  # Values that can only take a single value are replaced add_to_extract(extract, data_format = \"fixed_width\")$data_format #> [1] \"fixed_width\""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"Add new values replace existing values IPUMS microdata extract definition. fields optional, omitted, unchanged. Supplying value fields take single value, description data_format, replace existing value supplied value. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. remove existing values IPUMS microdata extract definition, use remove_from_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"","code":"# S3 method for micro_extract add_to_extract(   extract,   description = NULL,   samples = NULL,   variables = NULL,   data_format = NULL,   data_structure = NULL,   rectangular_on = NULL,   case_select_who = NULL,   data_quality_flags = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"extract ipums_extract object. description Description extract. samples Vector samples include extract request. Use get_sample_info() identify sample IDs given collection. variables Character vector variable names list var_spec objects created var_spec() containing specifications variables include extract. variable already exists extract, specifications added already exist variable. data_format Format output extract data file. Either \"fixed_width\" \"csv\". Note \"stata\", \"spss\", \"sas9\" also accepted, file formats supported ipumsr data-reading functions. data_structure Data structure output extract data. \"rectangular\" provides person records requested household information attached respective household members. \"hierarchical\" provides household records followed person records. Defaults \"rectangular\". rectangular_on data_structure \"rectangular\", records rectangularize. Currently \"P\" (person records) supported. Defaults \"P\" data_structure \"rectangular\" NULL otherwise. case_select_who Indication interpret case selections included variables extract definition. \"individuals\" includes records individuals match specified case selections. \"households\" includes records members household contains individual matches specified case selections. Defaults \"individuals\". Use var_spec() add case selections specific variables. data_quality_flags Set TRUE include data quality flags applicable variables extract definition. override data_quality_flags specification individual variables definition. Use var_spec() add data quality flags specific variables. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"modified micro_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state. modify variable-specific parameters variables already exist extract, create new variable specification var_spec().","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/add_to_extract_micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add values to an existing extract definition for an IPUMS microdata\ncollection — add_to_extract_micro","text":"","code":"extract <- define_extract_usa(   description = \"2013 ACS Data\",   samples = \"us2013a\",   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  # Add a single sample add_to_extract(extract, samples = \"us2014a\") #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # Add samples and variables extract2 <- add_to_extract(   extract,   samples = \"us2014a\",   variables = c(\"MARST\", \"BIRTHYR\") )  # Modify specifications for variables in the extract by using `var_spec()` # with the existing variable name: add_to_extract(   extract,   samples = \"us2014a\",   variables = var_spec(\"SEX\", case_selections = \"2\") ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # You can make multiple modifications or additions by providing a list # of `var_spec()` objects: add_to_extract(   extract,   samples = \"us2014a\",   variables = list(     var_spec(\"RACE\", attached_characteristics = \"mother\"),     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"RELATE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (5 total) SEX, AGE, YEAR, RACE, RELATE  # Values that only take a single value are replaced add_to_extract(extract, description = \"New description\")$description #> [1] \"New description\""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an extract request for an IPUMS microdata collection — define_extract-micro","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"Define parameters IPUMS microdata extract request submitted via IPUMS API. Currently supported microdata collections include: IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() Learn IPUMS API vignette(\"ipums-api\") microdata extract definitions vignette(\"ipums-api-micro\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"","code":"define_extract_usa(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )  define_extract_cps(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )  define_extract_ipumsi(   description,   samples,   variables,   data_format = \"fixed_width\",   data_structure = \"rectangular\",   rectangular_on = NULL,   case_select_who = \"individuals\",   data_quality_flags = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"description Description extract. samples Vector samples include extract request. Use get_sample_info() identify sample IDs given collection. variables Vector variable names list detailed variable specifications include extract request. Use var_spec() create var_spec object containing detailed variable specification. See examples. data_format Format output extract data file. Either \"fixed_width\" \"csv\". Note \"stata\", \"spss\", \"sas9\" also accepted, file formats supported ipumsr data-reading functions. Defaults \"fixed_width\". data_structure Data structure output extract data. \"rectangular\" provides person records requested household information attached respective household members. \"hierarchical\" provides household records followed person records. Defaults \"rectangular\". rectangular_on data_structure \"rectangular\", records rectangularize. Currently \"P\" (person records) supported. Defaults \"P\" data_structure \"rectangular\" NULL otherwise. case_select_who Indication interpret case selections included variables extract definition. \"individuals\" includes records individuals match specified case selections. \"households\" includes records members household contains individual matches specified case selections. Defaults \"individuals\". Use var_spec() add case selections specific variables. data_quality_flags Set TRUE include data quality flags applicable variables extract definition. override data_quality_flags specification individual variables definition. Use var_spec() add data quality flags specific variables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"object class micro_extract containing extract definition.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract-micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define an extract request for an IPUMS microdata collection — define_extract-micro","text":"","code":"usa_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  usa_extract #> Unsubmitted IPUMS USA extract  #> Description: 2013-2014 ACS Data #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) SEX, AGE, YEAR  # Use `var_spec()` to created detailed variable specifications: usa_extract <- define_extract_usa(   description = \"Example USA extract definition\",   samples = c(\"us2013a\", \"us2014a\"),   variables = var_spec(     \"SEX\",     case_selections = \"2\",     attached_characteristics = c(\"mother\", \"father\")   ) )  # For multiple variables, provide a list of `var_spec` objects and/or # variable names. cps_extract <- define_extract_cps(   description = \"Example CPS extract definition\",   samples = c(\"cps2020_02s\", \"cps2020_03s\"),   variables = list(     var_spec(\"AGE\", data_quality_flags = TRUE),     var_spec(\"SEX\", case_selections = \"2\"),     \"RACE\"   ) )  cps_extract #> Unsubmitted IPUMS CPS extract  #> Description: Example CPS extract definition #>  #> Samples: (2 total) cps2020_02s, cps2020_03s #> Variables: (3 total) AGE, SEX, RACE  # To recycle specifications to many variables, it may be useful to # create variables prior to defining the extract: var_names <- c(\"AGE\", \"SEX\")  my_vars <- purrr::map(   var_names,   ~ var_spec(.x, attached_characteristics = \"mother\") )  ipumsi_extract <- define_extract_ipumsi(   description = \"Extract definition with predefined variables\",   samples = c(\"br2010a\", \"cl2017a\"),   variables = my_vars )  # Extract specifications can be indexed by name names(ipumsi_extract$samples) #> [1] \"br2010a\" \"cl2017a\"  names(ipumsi_extract$variables) #> [1] \"AGE\" \"SEX\"  ipumsi_extract$variables$AGE #> $name #> [1] \"AGE\" #>  #> $attached_characteristics #> [1] \"mother\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"        if (FALSE) { # Use the extract definition to submit an extract request to the API submit_extract(usa_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an IPUMS extract object — define_extract","title":"Define an IPUMS extract object — define_extract","text":"Specify parameters new IPUMS extract request object submitted via IPUMS API. extract request contains specifications required obtain particular set data IPUMS collection. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":"supported-collections","dir":"Reference","previous_headings":"","what":"Supported collections","title":"Define an IPUMS extract object — define_extract","text":"Currently, ipumsr supports extract definitions following collections: IPUMS USA: define_extract_usa() IPUMS CPS: define_extract_cps() IPUMS International: define_extract_ipumsi() IPUMS NHGIS: define_extract_nhgis()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an IPUMS extract object — define_extract","text":"functions produce ipums_extract object subclass based collection corresponding extract request. core ipumsr API client tools designed handle objects.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"Define an IPUMS NHGIS extract request — define_extract_nhgis","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"Define parameters IPUMS NHGIS extract request submitted via IPUMS API. Use get_metadata_nhgis() browse identify data sources use NHGIS extract definitions. general information, see NHGIS data source overview FAQ. Learn IPUMS API vignette(\"ipums-api\") NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"","code":"define_extract_nhgis(   description = \"\",   datasets = NULL,   time_series_tables = NULL,   shapefiles = NULL,   geographic_extents = NULL,   breakdown_and_data_type_layout = NULL,   tst_layout = NULL,   data_format = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"description Description extract. datasets List dataset specifications datasets include extract request. Use ds_spec() create ds_spec object containing dataset specification. See examples. time_series_tables List time series table specifications time series tables include extract request. Use tst_spec() create tst_spec object containing time series table specification. See examples. shapefiles Names shapefiles include extract request. geographic_extents Vector geographic extents use datasets extract definition (instance, obtain data within particular state). Use \"*\" select available extents. Required datasets included extract definition include geog_levels require extent selection. See get_metadata_nhgis() determine geographic level requires extent selection. time writing, NHGIS supports extent selection blocks block groups. breakdown_and_data_type_layout desired layout datasets multiple data types breakdown values. \"single_file\" (default) keeps data types breakdown values one file \"separate_files\" splits data type breakdown value file Required datasets included extract definition consist multiple data types (instance, estimates margins error) multiple breakdown values specified. See get_metadata_nhgis() determine whether requested dataset multiple data types. tst_layout desired layout time_series_tables included extract definition. \"time_by_column_layout\" (wide format, default): rows correspond geographic units, columns correspond different times time series \"time_by_row_layout\" (long format): rows correspond single geographic unit single point time \"time_by_file_layout\": data different times provided separate files Required extract definition includes time_series_tables. data_format desired format extract data file. \"csv_no_header\" (default) includes minimal header first row \"csv_header\" includes second, descriptive header row. \"fixed_width\" provides data fixed width format Note default, read_nhgis() removes additional header row \"csv_header\" files. Required extract definition includes datasets time_series_tables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"object class nhgis_extract containing extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"NHGIS extract definition must include least one dataset, time series table, shapefile specification. Create NHGIS dataset specification ds_spec(). dataset must associated selection data_tables geog_levels. datasets also support selection years breakdown_values. Create NHGIS time series table specification tst_spec(). time series table must associated selection geog_levels may optionally associated selection years. See examples vignette(\"ipums-api-nhgis\") details specifying datasets time series tables NHGIS extract definition.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/define_extract_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define an IPUMS NHGIS extract request — define_extract_nhgis","text":"","code":"# Extract definition for tables from an NHGIS dataset # Use `ds_spec()` to create an NHGIS dataset specification nhgis_extract <- define_extract_nhgis(   description = \"Example NHGIS extract\",   datasets = ds_spec(     \"1990_STF3\",     data_tables = \"NP57\",     geog_levels = c(\"county\", \"tract\")   ) )  nhgis_extract #> Unsubmitted IPUMS NHGIS extract  #> Description: Example NHGIS extract #>  #> Dataset: 1990_STF3 #>   Tables: NP57 #>   Geog Levels: county, tract  # Use `tst_spec()` to create an NHGIS time series table specification define_extract_nhgis(   description = \"Example NHGIS extract\",   time_series_tables = tst_spec(\"CL8\", geog_levels = \"county\"),   tst_layout = \"time_by_row_layout\" ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example NHGIS extract #>  #> Time Series Table: CL8 #>   Geog Levels: county  # To request multiple datasets, provide a list of `ds_spec` objects define_extract_nhgis(   description = \"Extract definition with multiple datasets\",   datasets = list(     ds_spec(\"2014_2018_ACS5a\", \"B01001\", c(\"state\", \"county\")),     ds_spec(\"2015_2019_ACS5a\", \"B01001\", c(\"state\", \"county\"))   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract definition with multiple datasets #>  #> Dataset: 2014_2018_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county #>  #> Dataset: 2015_2019_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county  # If you need to specify the same table or geographic level for # many datasets, you may want to make a set of datasets before defining # your extract request: dataset_names <- c(\"2014_2018_ACS5a\", \"2015_2019_ACS5a\")  dataset_spec <- purrr::map(   dataset_names,   ~ ds_spec(     .x,     data_tables = \"B01001\",     geog_levels = c(\"state\", \"county\")   ) )  define_extract_nhgis(   description = \"Extract definition with multiple datasets\",   datasets = dataset_spec ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract definition with multiple datasets #>  #> Dataset: 2014_2018_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county #>  #> Dataset: 2015_2019_ACS5a #>   Tables: B01001 #>   Geog Levels: state, county  # You can request datasets, time series tables, and shapefiles in the same # definition: define_extract_nhgis(   description = \"Extract with datasets and time series tables\",   datasets = ds_spec(\"1990_STF1\", c(\"NP1\", \"NP2\"), \"county\"),   time_series_tables = tst_spec(\"CL6\", \"state\"),   shapefiles = \"us_county_1990_tl2008\" ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Extract with datasets and time series tables #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2 #>   Geog Levels: county #>  #> Time Series Table: CL6 #>   Geog Levels: state #>  #> Shapefiles: us_county_1990_tl2008  # Extract specifications can be indexed by name names(nhgis_extract$datasets) #> [1] \"1990_STF3\"  nhgis_extract$datasets[[\"1990_STF3\"]] #> $name #> [1] \"1990_STF3\" #>  #> $data_tables #> [1] \"NP57\" #>  #> $geog_levels #> [1] \"county\" \"tract\"  #>  #> attr(,\"class\") #> [1] \"ds_spec\"    \"ipums_spec\" \"list\"        if (FALSE) { # Use the extract definition to submit an extract request to the API submit_extract(nhgis_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Download a completed IPUMS data extract — download_extract","title":"Download a completed IPUMS data extract — download_extract","text":"Download IPUMS data extract via IPUMS API write disk. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Download a completed IPUMS data extract — download_extract","text":"","code":"download_extract(   extract,   download_dir = getwd(),   overwrite = FALSE,   progress = TRUE,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Download a completed IPUMS data extract — download_extract","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). download_dir Path directory files written. Defaults current working directory. overwrite TRUE, overwrite conflicting files already exist download_dir. Defaults FALSE. progress TRUE, output progress bar showing status download request. Defaults TRUE. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Download a completed IPUMS data extract — download_extract","text":"path(s) files required read data requested extract, invisibly. NHGIS, paths named either \"data\" (tabular data files) \"shape\" (spatial data files) indicate type data file contains.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Download a completed IPUMS data extract — download_extract","text":"NHGIS extracts, data files GIS files (shapefiles) saved separate .zip archives. download_extract() return character vector including file paths downloaded files. microdata extracts, file path downloaded .xml DDI file returned, sufficient reading data provided associated .gz data file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/download_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Download a completed IPUMS data extract — download_extract","text":"","code":"usa_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(usa_extract)  downloadable_extract <- wait_for_extract(submitted_extract)  # For microdata, the path to the DDI .xml codebook file is provided. usa_xml_file <- download_extract(downloadable_extract)  # Load with a `read_ipums_micro_*()` function usa_data <- read_ipums_micro(usa_xml_file)  # You can also download previous extracts with their collection and number: nhgis_files <- download_extract(\"nhgis:1\")  # NHGIS extracts return a path to both the tabular and spatial data files, # as applicable. nhgis_data <- read_nhgis(data = nhgis_files[\"data\"])  # Load NHGIS spatial data nhgis_geog <- read_ipums_sf(data = nhgis_files[\"shape\"]) }"},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"Provide specifications individual datasets time series tables defining IPUMS NHGIS extract request. Use get_metadata_nhgis() identify available values dataset time series table specification parameters. Learn NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"","code":"ds_spec(   name,   data_tables = NULL,   geog_levels = NULL,   years = NULL,   breakdown_values = NULL )  tst_spec(name, geog_levels = NULL, years = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"name Name dataset time series table. data_tables Vector summary tables retrieve given dataset. geog_levels Geographic levels (e.g. \"county\" \"state\") obtain data given dataset time series table. years Years obtain data given dataset time series table. time series tables, years selected default. datasets, use \"*\" select available years. Use get_metadata_nhgis() determine dataset allows year selection. breakdown_values Breakdown values apply given dataset.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"ds_spec tst_spec object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"general, data_tables geog_levels required dataset specifications, geog_levels required time series table specifications. However, possible make temporary specification incomplete dataset time series table omitting values. supports syntax used modifying existing extract (see add_to_extract() remove_from_extract()).","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ds_spec.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create dataset and time series table specifications for IPUMS NHGIS extract\nrequests — ds_spec","text":"","code":"dataset <- ds_spec(   \"2013_2017_ACS5a\",   data_tables = c(\"B00001\", \"B01002\"),   geog_levels = \"state\" )  tst <- tst_spec(   \"CW5\",   geog_levels = c(\"county\", \"tract\"),   years = \"1990\" )  # Use variable specifications in an extract definition: define_extract_nhgis(   description = \"Example extract\",   datasets = dataset,   time_series_tables = tst ) #> Unsubmitted IPUMS NHGIS extract  #> Description: Example extract #>  #> Dataset: 2013_2017_ACS5a #>   Tables: B00001, B01002 #>   Geog Levels: state #>  #> Time Series Table: CW5 #>   Geog Levels: county, tract #>   Years: 1990"},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"Convert tibble extract definition specifications list ipums_extract objects vice versa. Support tabular extract history deprecated. Instead, use get_extract_history() obtain definitions recently-submitted extracts list. identify past extracts meet certain criteria, iterate list.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"","code":"extract_tbl_to_list(extract_tbl, validate = TRUE)  extract_list_to_tbl(extract_list)"},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"extract_tbl tibble (data.frame) containing specifications one ipums_extract objects. validate Logical value indicating whether check output ipums_extract objects contains valid complete extract definition. Defaults TRUE. extract_list list ipums_extract objects single ipums_extract object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/extract_tbl_to_list.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert recent extract definitions from tibble to list format — extract_tbl_to_list","text":"extract_tbl_to_list(), list length equal number extracts represented extract_tbl. extract_list_to_tbl(), tibble representing specifications extract requests represented extract_list.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":null,"dir":"Reference","previous_headings":"","what":"Browse definitions of previously submitted extract requests — get_extract_history","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"Retrieve definitions arbitrary number previously submitted extract requests given IPUMS collection, starting recent extract request. check status particular extract request, use get_extract_info(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"","code":"get_extract_history(   collection = NULL,   how_many = 10,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"collection Character string IPUMS collection retrieve extract history. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections(). how_many number extract requests retrieve information. Defaults 10 recent extracts. delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve extract history without exceeding API rate limit. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"list ipums_extract objects","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_history.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Browse definitions of previously submitted extract requests — get_extract_history","text":"","code":"if (FALSE) { # Get information for most recent extract requests. # By default gets the most recent 10 extracts get_extract_history(\"usa\")  # Return only the most recent 3 extract definitions get_extract_history(\"cps\", how_many = 3)  # To get the most recent extract (for instance, if you have forgotten its # extract number), use `get_last_extract_info()` get_last_extract_info(\"nhgis\") }  # To browse your extract history by particular criteria, you can # loop through the extract objects. We'll create a sample list of 2 extracts: extract1 <- define_extract_usa(   description = \"2013 ACS\",   samples = \"us2013a\",   variables = var_spec(     \"SEX\",     case_selections = \"2\",     data_quality_flags = TRUE   ) )  extract2 <- define_extract_usa(   description = \"2014 ACS\",   samples = \"us2014a\",   variables = list(     var_spec(\"RACE\"),     var_spec(       \"SEX\",       case_selections = \"1\",       data_quality_flags = FALSE     )   ) )  extracts <- list(extract1, extract2)  # `purrr::keep()`` is particularly useful for filtering: purrr::keep(extracts, ~ \"RACE\" %in% names(.x$variables)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2014 ACS #>  #> Samples: (1 total) us2014a #> Variables: (2 total) RACE, SEX #>   purrr::keep(extracts, ~ grepl(\"2014 ACS\", .x$description)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2014 ACS #>  #> Samples: (1 total) us2014a #> Variables: (2 total) RACE, SEX #>   # You can also filter on variable-specific criteria purrr::keep(extracts, ~ isTRUE(.x$variables[[\"SEX\"]]$data_quality_flags)) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX #>   # To filter based on all variables in an extract, you'll need to # create a nested loop. For instance, to find all extracts that have # any variables with data_quality_flags: purrr::keep(   extracts,   function(extract) {     any(purrr::map_lgl(       names(extract$variables),       function(var) isTRUE(extract$variables[[var]]$data_quality_flags)     ))   } ) #> [[1]] #> Unsubmitted IPUMS USA extract  #> Description: 2013 ACS #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX #>   # To peruse your extract history without filtering, `purrr::map()` is more # useful purrr::map(extracts, ~ names(.x$variables)) #> [[1]] #> [1] \"SEX\" #>  #> [[2]] #> [1] \"RACE\" \"SEX\"  #>   purrr::map(extracts, ~ names(.x$samples)) #> [[1]] #> [1] \"us2013a\" #>  #> [[2]] #> [1] \"us2014a\" #>   purrr::map(extracts, ~ .x$variables[[\"RACE\"]]$case_selections) #> [[1]] #> NULL #>  #> [[2]] #> NULL #>   # Once you have identified a past extract, you can easily download or # resubmit it if (FALSE) { extracts <- get_extract_history(\"nhgis\")  extract <- purrr::keep(   extracts,   ~ \"CW3\" %in% names(.x$time_series_tables) )  download_extract(extract[[1]]) }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Retrieve the definition and latest status of an extract request — get_extract_info","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"Retrieve latest status extract request. get_last_extract_info() convenience function retrieve recent extract given collection. browse definitions previously submitted extract requests, see get_extract_history(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"","code":"get_extract_info(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))  get_last_extract_info(collection = NULL, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key(). collection Character string IPUMS collection retrieve extract history. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"ipums_extract object.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_extract_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Retrieve the definition and latest status of an extract request — get_extract_info","text":"","code":"my_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(my_extract)  # Get latest info for the request associated with a given `ipums_extract` # object: updated_extract <- get_extract_info(submitted_extract)  updated_extract$status  # Or specify the extract collection and number: get_extract_info(\"usa:1\") get_extract_info(c(\"usa\", 1))  # If you have a default collection, you can use the extract number alone: set_ipums_default_collection(\"nhgis\") get_extract_info(1)  # To get the most recent extract (for instance, if you have forgotten its # extract number), use `get_last_extract_info()` get_last_extract_info(\"nhgis\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"Retrieve information available NHGIS data sources, including datasets, data tables (summary tables), time series tables, shapefiles (GIS files). retrieve summary metadata available data sources particular type, use type argument. retrieve detailed metadata single data source, use dataset, data_table, time_series_table argument. See metadata availability section information metadata provided data type. general information, see NHGIS data source overview FAQ. Learn IPUMS API vignette(\"ipums-api\") NHGIS extract definitions vignette(\"ipums-api-nhgis\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"","code":"get_metadata_nhgis(   type = NULL,   dataset = NULL,   data_table = NULL,   time_series_table = NULL,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"type One \"datasets\", \"data_tables\", \"time_series_tables\", \"shapefiles\" indicating type summary metadata retrieve. Leave NULL requesting metadata single dataset, data_table, time_series_table. dataset Name individual dataset retrieve metadata. data_table Name individual data table retrieve metadata. provided, associated dataset must also specified. time_series_table Name individual time series table retrieve metadata. delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve metadata records without exceeding API rate limit. used type provided. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"type provided, tibble summary metadata data sources provided type. Otherwise, named list metadata specified dataset, data_table, time_series_table.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"metadata-availability","dir":"Reference","previous_headings":"","what":"Metadata availability","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"following sections summarize metadata fields provided data type. Summary metadata include subset fields provided individual data sources.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"datasets-","dir":"Reference","previous_headings":"","what":"Datasets:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier dataset. value used refer dataset interacting IPUMS API. group: group datasets dataset belongs. instance, 5 separate datasets part \"2015 American Community Survey\" group. description: short description dataset. sequence: Order dataset appear metadata API extracts. has_multiple_data_types: Logical value indicating whether multiple data types exist dataset. example, ACS datasets include estimates margins error. data_tables: tibble containing names, codes, descriptions data tables available dataset. geog_levels: tibble containing names, descriptions, extent information geographic levels available dataset. has_geog_extent_selection field contains logical values indicating whether extent selection allowed (required) associated geographic level. See geographic_instances . breakdowns: tibble containing names, types, descriptions, breakdown values breakdowns available dataset. years: vector years dataset available. field present dataset available multiple years. Note ACS datasets considered available multiple years. geographic_instances: tibble containing names descriptions valid geographic extents dataset. field present least one dataset's geog_levels allows geographic extent selection.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"data-tables-","dir":"Reference","previous_headings":"","what":"Data tables:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier data table within dataset. value used refer data table interacting IPUMS API. description: short description data table. universe: statistical population measured data table (e.g. persons, families, occupied housing units, etc.) nhgis_code: code identifying data table extract. Variables extract data include column names prefixed code. sequence: Order data table appear metadata API extracts. dataset_name: Name dataset data table belongs. n_variables: Number variables included data table. variables: tibble containing variable descriptions codes variables included data table","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"time-series-tables-","dir":"Reference","previous_headings":"","what":"Time series tables:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier time series table. value used refer time series table interacting IPUMS API. description: short description time series table. geographic_integration: method time series table aligns geographic units across time. \"Nominal\" integration indicates geographic units aligned name (disregarding changes unit boundaries). \"Standardized\" integration indicates data multiple time points standardized indicated year's census units. information, click . sequence: Order time series table appear metadata API extracts. time_series: tibble containing names descriptions individual time series available time series table. years: tibble containing information available data years time series table. geog_levels: tibble containing names descriptions geographic levels available time series table.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"shapefiles-","dir":"Reference","previous_headings":"","what":"Shapefiles:","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"name: unique identifier shapefile. value used refer shapefile interacting IPUMS API. year: survey year shapefile's represented areas used tabulations, may different vintage represented areas. information, click . geographic_level: geographic level shapefile. extent: geographic extent covered shapefile. basis: derivation source shapefile. sequence: Order shapefile appear metadata API extracts.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_metadata_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List available data sources from IPUMS NHGIS — get_metadata_nhgis","text":"","code":"if (FALSE) { library(dplyr)  # Get summary metadata for all available sources of a given data type get_metadata_nhgis(\"datasets\")  # Filter to identify data sources of interest by their metadata values all_tsts <- get_metadata_nhgis(\"time_series_tables\")  tsts <- all_tsts %>%   filter(     grepl(\"Children\", description),     grepl(\"Families\", description),     geographic_integration == \"Standardized to 2010\"   )  tsts$name  # Get detailed metadata for a single source with its associated argument: cs5_meta <- get_metadata_nhgis(time_series_table = \"CS5\") cs5_meta$geog_levels  # Use the available values when defining an NHGIS extract request define_extract_nhgis(   time_series_tables = tst_spec(\"CS5\", geog_levels = \"state\") )  # Detailed metadata is also provided for datasets and data tables get_metadata_nhgis(dataset = \"1990_STF1\") get_metadata_nhgis(data_table = \"NP1\", dataset = \"1990_STF1\")  # Iterate over data sources to retrieve detailed metadata for several # records. For instance, to get variable metadata for a set of data tables: tables <- c(\"NP1\", \"NP2\", \"NP10\")  var_meta <- purrr::map(   tables,   function(dt) {     dt_meta <- get_metadata_nhgis(dataset = \"1990_STF1\", data_table = dt)      # This ensures you avoid hitting rate limit for large numbers of tables     Sys.sleep(1)      dt_meta$variables   } ) }"},{"path":"http://tech.popdata.org/ipumsr/reference/get_recent_extracts_info_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Get information on recent extracts — get_recent_extracts_info_list","title":"Get information on recent extracts — get_recent_extracts_info_list","text":"Functionality accessing recent extract information moved get_extract_history(). Please use function instead. Additionally, support tabular extract history deprecated. identify past extracts meet certain criteria, iterate list provided get_extract_history().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_recent_extracts_info_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get information on recent extracts — get_recent_extracts_info_list","text":"","code":"get_recent_extracts_info_list(   collection = NULL,   how_many = 10,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )  get_recent_extracts_info_tbl(   collection = NULL,   how_many = 10,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":null,"dir":"Reference","previous_headings":"","what":"List available samples for IPUMS microdata collections — get_sample_info","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"Retrieve sample IDs descriptions IPUMS microdata collections. Currently supported microdata collections : IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"","code":"get_sample_info(   collection = NULL,   delay = 0,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"collection Character string IPUMS collection retrieve sample IDs. Defaults current default collection, exists. See set_ipums_default_collection(). list codes used refer collection, see ipums_data_collections(). delay Number seconds delay successive API requests, multiple requests needed retrieve records. delay highly unlikely necessary intended fallback event retrieve sample IDs without exceeding API rate limit. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"tibble containing sample IDs descriptions indicated collection.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/get_sample_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List available samples for IPUMS microdata collections — get_sample_info","text":"","code":"if (FALSE) { get_sample_info(\"usa\") get_sample_info(\"cps\") get_sample_info(\"ipumsi\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":null,"dir":"Reference","previous_headings":"","what":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"Analogous dplyr::bind_rows(), preserves labelled attributes provided IPUMS data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"","code":"ipums_bind_rows(..., .id = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"... Data frames tibbles combine. argument can data frame list data frames. binding, columns matched name. Missing columns filled NA. .id name optional identifier column. Provide string create output column identifies input. column use names available, otherwise use positions.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"Returns type first input. Either data.frame, tbl_df, grouped_df","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_bind_rows.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Bind multiple data frames by row, preserving labelled attributes — ipums_bind_rows","text":"","code":"file <- ipums_example(\"nhgis0712_csv.zip\")  d1 <- read_nhgis(   file,   file_select = 1,   verbose = FALSE )  d2 <- read_nhgis(   file,   file_select = 2,   verbose = FALSE )  # Variables have associated label attributes: ipums_var_label(d1$PMSAA) #> [1] \"Primary Metropolitan Statistical Area Code\"  # Preserve labels when binding data sources: d <- ipums_bind_rows(d1, d2) ipums_var_label(d$PMSAA) #> [1] \"Primary Metropolitan Statistical Area Code\"  # dplyr `bind_rows()` drops labels: d <- dplyr::bind_rows(d1, d2) ipums_var_label(d$PMSAA) #> [1] NA"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":null,"dir":"Reference","previous_headings":"","what":"Callback classes — ipums_callback","title":"Callback classes — ipums_callback","text":"classes used define callback behaviors use read_ipums_micro_chunked(). based callback classes readr, adapted include handling implicit decimal values variable/value labeling use IPUMS microdata extracts.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Callback classes — ipums_callback","text":"IpumsSideEffectCallback Callback function used side effects, results returned. Initialize function takes 2 arguments. first argument (x) correspond data chunk second (pos) correspond position first observation chunk. function returns FALSE, chunks read. IpumsDataFrameCallback Callback function combines results chunk single output data.frame (similar) object. Initialize way IpumsSideEffectCallback. provided function return object inherits data.frame. results application callback function added output data.frame. IpumsListCallback Callback function returns list, element contains result single chunk. Initialize IpumsSideEffectCallback. IpumsBiglmCallback Callback function performs linear regression dataset chunks using biglm package. Initialize function takes 2 arguments: first argument correspond formula specifying regression model. second correspond function prepares data running regression analysis. function follows conventions functions used callbacks. additional arguments passed function passed biglm. IpumsChunkCallback (Advanced) Callback interface definition. callback functions IPUMS data inherit class, use private method ipumsify data handle implicit decimals value labels.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> IpumsChunkCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() hipread::ChunkCallback$finally() hipread::ChunkCallback$initialize() hipread::ChunkCallback$receive() hipread::ChunkCallback$result()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsChunkCallback$set_ipums_fields() IpumsChunkCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsChunkCallback$set_ipums_fields(   data_structure,   ddi,   var_attrs,   rt_ddi = NULL )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone-","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsChunkCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-1","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsSideEffectCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-1","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$finally() hipread::ChunkCallback$result() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-1","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsSideEffectCallback$new() IpumsSideEffectCallback$receive() IpumsSideEffectCallback$continue() IpumsSideEffectCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-4","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$continue()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--1","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-5","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsSideEffectCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-1","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-2","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsDataFrameCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-2","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-2","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsDataFrameCallback$new() IpumsDataFrameCallback$receive() IpumsDataFrameCallback$result() IpumsDataFrameCallback$finally() IpumsDataFrameCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-6","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-7","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-8","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$result()"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-9","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$finally()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--2","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-10","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsDataFrameCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-2","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-3","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsListCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-3","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-3","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsListCallback$new() IpumsListCallback$receive() IpumsListCallback$result() IpumsListCallback$finally() IpumsListCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-11","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$new(callback)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-12","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-13","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$result()"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-14","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$finally()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--3","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-15","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsListCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-3","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"super-classes-4","dir":"Reference","previous_headings":"","what":"Super classes","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback -> hipread::HipChunkCallback -> ipumsr::IpumsChunkCallback -> IpumsBiglmCallback","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"methods-4","dir":"Reference","previous_headings":"","what":"Methods","title":"Callback classes — ipums_callback","text":"hipread::ChunkCallback$continue() hipread::ChunkCallback$finally() ipumsr::IpumsChunkCallback$set_ipums_fields()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"public-methods-4","dir":"Reference","previous_headings":"","what":"Public methods","title":"Callback classes — ipums_callback","text":"IpumsBiglmCallback$new() IpumsBiglmCallback$receive() IpumsBiglmCallback$result() IpumsBiglmCallback$clone()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-16","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$new(model, prep = function(x, pos) x, ...)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-17","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$receive(data, index)"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-18","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$result()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"method-clone--4","dir":"Reference","previous_headings":"","what":"Method clone()","title":"Callback classes — ipums_callback","text":"objects class cloneable method.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"usage-19","dir":"Reference","previous_headings":"","what":"Usage","title":"Callback classes — ipums_callback","text":"","code":"IpumsBiglmCallback$clone(deep = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_callback.html","id":"arguments-4","dir":"Reference","previous_headings":"","what":"Arguments","title":"Callback classes — ipums_callback","text":"deep Whether make deep clone.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":null,"dir":"Reference","previous_headings":"","what":"Collect data into R session with IPUMS attributes — ipums_collect","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"Convenience wrapper around dplyr's collect() set_ipums_var_attributes(). Use attach variable labels collecting data database.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"","code":"ipums_collect(data, ddi, var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"data dplyr tbl object (generally tbl_lazy object stored database). ddi ipums_ddi object created read_ipums_ddi(). var_attrs Variable attributes add output. Defaults available attributes. See set_ipums_var_attributes() details.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_collect.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Collect data into R session with IPUMS attributes — ipums_collect","text":"local tibble requested attributes attached.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":null,"dir":"Reference","previous_headings":"","what":"List IPUMS data collections — ipums_data_collections","title":"List IPUMS data collections — ipums_data_collections","text":"List IPUMS data collections corresponding codes used IPUMS API. Note data collections yet API support. Currently, ipumsr supports extract definitions following collections: IPUMS USA (\"usa\") IPUMS CPS (\"cps\") IPUMS International (\"ipumsi\") IPUMS NHGIS (\"nhgis\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List IPUMS data collections — ipums_data_collections","text":"","code":"ipums_data_collections()"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List IPUMS data collections — ipums_data_collections","text":"tibble four columns containing full collection name, type data collection provides, collection code used IPUMS API, status API support collection.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_data_collections.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List IPUMS data collections — ipums_data_collections","text":"","code":"ipums_data_collections() #> # A tibble: 14 × 4 #>    collection_name     collection_type code_for_api api_support #>    <chr>               <chr>           <chr>        <lgl>       #>  1 IPUMS USA           microdata       usa          TRUE        #>  2 IPUMS CPS           microdata       cps          TRUE        #>  3 IPUMS International microdata       ipumsi       TRUE        #>  4 IPUMS NHGIS         aggregate data  nhgis        TRUE        #>  5 IPUMS IHGIS         aggregate data  ihgis        FALSE       #>  6 IPUMS ATUS          microdata       atus         FALSE       #>  7 IPUMS AHTUS         microdata       ahtus        FALSE       #>  8 IPUMS MTUS          microdata       mtus         FALSE       #>  9 IPUMS DHS           microdata       dhs          FALSE       #> 10 IPUMS PMA           microdata       pma          FALSE       #> 11 IPUMS MICS          microdata       mics         FALSE       #> 12 IPUMS NHIS          microdata       nhis         FALSE       #> 13 IPUMS MEPS          microdata       meps         FALSE       #> 14 IPUMS Higher Ed     microdata       highered     FALSE"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":null,"dir":"Reference","previous_headings":"","what":"ipums_ddi class — ipums_ddi-class","title":"ipums_ddi class — ipums_ddi-class","text":"ipums_ddi class provides data structure storing metadata information contained IPUMS codebook files. objects primarily used loading IPUMS data, can also used explore metadata IPUMS extract. microdata projects, information provided DDI codebook (.xml) files. NHGIS, information provided .txt codebook files. codebook file contains metadata extract files , including file name, file path, extract date well information variables present data, including variable names, descriptions, data types, implied decimals, positions fixed-width files. information used correctly parse IPUMS fixed-width files attach additional variable metadata data upon load. Note codebook metadata NHGIS extracts can also stored ipums_ddi object, even though codebooks distributed .txt files, .xml files. files adhere standards DDI codebook files, ipums_ddi fields left blank reading NHGIS codebooks.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"creating-an-ipums-ddi-object","dir":"Reference","previous_headings":"","what":"Creating an ipums_ddi object","title":"ipums_ddi class — ipums_ddi-class","text":"create ipums_ddi object IPUMS microdata extract, use read_ipums_ddi(). create ipums_ddi object IPUMS NHGIS extract, use read_nhgis_codebook()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"loading-data","dir":"Reference","previous_headings":"","what":"Loading data","title":"ipums_ddi class — ipums_ddi-class","text":"load data associated ipums_ddi object, use read_ipums_micro(), read_ipums_micro_chunked(), read_ipums_micro_yield()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_ddi-class.html","id":"view-metadata","dir":"Reference","previous_headings":"","what":"View metadata","title":"ipums_ddi class — ipums_ddi-class","text":"Use ipums_var_info() explore variable-level metadata variables included dataset. Use ipums_file_info() explore file-level metadata extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":null,"dir":"Reference","previous_headings":"","what":"Get path to IPUMS example datasets — ipums_example","title":"Get path to IPUMS example datasets — ipums_example","text":"Construct file path example extracts included ipumsr. data used package examples can used experiment ipumsr functionality.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get path to IPUMS example datasets — ipums_example","text":"","code":"ipums_example(path = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get path to IPUMS example datasets — ipums_example","text":"path Name file. NULL, available example files listed.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get path to IPUMS example datasets — ipums_example","text":"path specific example file vector available files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_example.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get path to IPUMS example datasets — ipums_example","text":"","code":"# List all available example files ipums_example() #>  [1] \"cps_00097.dat.gz\"          \"cps_00097.xml\"             #>  [3] \"cps_00157.dat.gz\"          \"cps_00157.xml\"             #>  [5] \"cps_00158.csv.gz\"          \"cps_00158.xml\"             #>  [7] \"cps_00159.dat.gz\"          \"cps_00159.xml\"             #>  [9] \"cps_00160.dat.gz\"          \"cps_00160.xml\"             #> [11] \"nhgis0712_csv.zip\"         \"nhgis0712_shape_small.zip\" #> [13] \"nhgis0730_fixed.zip\"       \"nhgis0731_csv.zip\"         #> [15] \"nhgis0972_csv.zip\"         \"nhgis0972_shape_small.zip\"  # Get path to a specific example file file <- ipums_example(\"cps_00157.xml\")  read_ipums_micro(file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> # A tibble: 7,668 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883         #>  2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800         #>  3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missi… #>  4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015         #>  5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552         #>  6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375         #>  7  1962    107 3 [March]   4355. 19 [Iowa]           1  4355. 999999999 [N.I.U… #>  8  1962    107 3 [March]   4355. 19 [Iowa]           2  1386.         0         #>  9  1962    107 3 [March]   4355. 19 [Iowa]           3  1629.       600         #> 10  1962    107 3 [March]   4355. 19 [Iowa]           4  1432. 999999999 [N.I.U… #> # ℹ 7,658 more rows"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":null,"dir":"Reference","previous_headings":"","what":"ipums_extract class — ipums_extract-class","title":"ipums_extract class — ipums_extract-class","text":"ipums_extract class provides data structure storing extract definition status IPUMS data extract request. submitted unsubmitted extract requests stored ipums_extract objects. ipums_extract objects divided microdata aggregate data classes, also include collection-specific extract subclass accommodate differences extract options content across collections. Currently supported collections : IPUMS microdata (\"micro_extract\") IPUMS USA (\"usa_extract\") IPUMS CPS (\"cps_extract\") IPUMS International (\"ipumsi_extract\") IPUMS aggregate data (\"agg_extract\") IPUMS NHGIS (\"nhgis_extract\") Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"properties","dir":"Reference","previous_headings":"","what":"Properties","title":"ipums_extract class — ipums_extract-class","text":"Objects class ipums_extract : class attribute form c(\"{collection}_extract\", \"{collection_type}_extract\", \"ipums_extract\"). instance, c(\"cps_extract\", \"micro_extract\", \"ipums_extract\"). base type \"list\". names attribute character vector length underlying list. ipums_extract objects include several core fields identifying extract status: collection: collection extract request. description: description extract request. submitted: logical indicating whether extract request submitted IPUMS API processing. download_links: links downloadable data, extract request completed time last checked. number: number extract request. collection, uniquely identifies extract request given user. status: status extract request time last checked. One \"unsubmitted\", \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\".","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"creating-an-extract","dir":"Reference","previous_headings":"","what":"Creating an extract","title":"ipums_extract class — ipums_extract-class","text":"Create ipums_extract object scratch appropriate define_extract_*() function. functions take form define_extract_{collection}. Use get_extract_info() get latest status submitted extract request. Use get_extract_history() obtain extract definitions previously-submitted extract requests.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"submitting-an-extract","dir":"Reference","previous_headings":"","what":"Submitting an extract","title":"ipums_extract class — ipums_extract-class","text":"Use submit_extract() submit extract request processing IPUMS API. Use wait_for_extract() periodically check status submitted extract request ready download. Use is_extract_ready() manually check whether submitted extract request ready download.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"downloading-an-extract","dir":"Reference","previous_headings":"","what":"Downloading an extract","title":"ipums_extract class — ipums_extract-class","text":"Download data contained completed extract download_extract().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_extract-class.html","id":"saving-an-extract","dir":"Reference","previous_headings":"","what":"Saving an extract","title":"ipums_extract class — ipums_extract-class","text":"Save extract JSON-formatted file save_extract_as_json(). Create ipums_extract object saved JSON-formatted definition define_extract_from_json().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Get file information for an IPUMS extract — ipums_file_info","title":"Get file information for an IPUMS extract — ipums_file_info","text":"Get information IPUMS project, date, notes, conditions, citation requirements extract based ipums_ddi object. ipums_conditions() convenience function provides conditions citation information recently loaded dataset.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get file information for an IPUMS extract — ipums_file_info","text":"","code":"ipums_file_info(object, type = NULL)  ipums_conditions(object = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get file information for an IPUMS extract — ipums_file_info","text":"object ipums_ddi object. ipums_conditions(), leave NULL display conditions recently loaded dataset. type Type file information display. NULL, loads types. Otherwise, one \"ipums_project\", \"extract_date\", \"extract_notes\", \"conditions\" \"citation\".","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get file information for an IPUMS extract — ipums_file_info","text":"ipums_file_info(), type = NULL, named list metadata information. Otherwise, string containing requested information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_file_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get file information for an IPUMS extract — ipums_file_info","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  ipums_file_info(ddi) #> $ipums_project #> [1] \"IPUMS CPS\" #>  #> $extract_date #> [1] \"2023-07-10\" #>  #> $extract_notes #> [1] \"User-provided description:  Reproducing cps00006\" #>  #> $conditions #> [1] \"Users of IPUMS-CPS data must agree to abide by the conditions of use. A user's license is valid for one year and may be renewed.  Users must agree to the following conditions:\\n\\n(1) No fees may be charged for use or distribution of the data.  All persons are granted a limited license to use these data, but you may not charge a fee for the data if you distribute it to others.\\n\\n(2) Cite IPUMS appropriately.  For information on proper citation,  refer to the citation requirement section of this DDI document.\\n\\n(3) Tell us about any work you do using the IPUMS.  Publications, research  reports, or presentations making use of IPUMS-CPS should be added to our  Bibliography. Continued funding for the IPUMS depends on our ability to  show our sponsor agencies that researchers are using the data for productive  purposes.\\n\\n(4) Use it for GOOD -- never for EVIL.\" #>  #> $citation #> [1] \"Publications and research reports based on the IPUMS-CPS database must cite it appropriately. The citation should include the following:\\n\\nSarah Flood, Miriam King, Renae Rodgers, Steven Ruggles, J. Robert Warren and Michael Westberry. Integrated Public Use Microdata Series, Current Population Survey: Version 10.0 [dataset]. Minneapolis, MN: IPUMS, 2022. https://doi.org/10.18128/D030.V10.0\\n\\nThe licensing agreement for use of IPUMS-CPS data requires that users supply us with the title and full citation for any publications, research reports, or educational materials making use of the data or documentation. Please add your citation to the IPUMS bibliography: http://bibliography.ipums.org/\" #>"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":null,"dir":"Reference","previous_headings":"","what":"List files contained within a zipped IPUMS extract — ipums_list_files","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"Identify files can read IPUMS extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"","code":"ipums_list_files(   file,   file_select = NULL,   types = NULL,   data_layer = deprecated(),   shape_layer = deprecated(),   raster_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"file Path .zip archive containing IPUMS extract examined. file_select path file contains multiple files, tidyselect selection identifying files included output. files match provided expression included. less useful, can also provided string specifying exact file name integer match files index position. types One \"data\" \"shape\" indicating type files include output. \"data\" refers tabular data sources, \"shape\" refers spatial data sources. use \"raster\" deprecated removed future release. data_layer, shape_layer, raster_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"tibble containing types names available files.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_list_files.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"List files contained within a zipped IPUMS extract — ipums_list_files","text":"","code":"nhgis_file <- ipums_example(\"nhgis0712_csv.zip\")  # 2 available files in this extract ipums_list_files(nhgis_file) #> # A tibble: 2 × 2 #>   type  file                                        #>   <chr> <chr>                                       #> 1 data  nhgis0712_csv/nhgis0712_ds135_1990_pmsa.csv #> 2 data  nhgis0712_csv/nhgis0712_ds136_1990_pmsa.csv  # Look for files that match a particular pattern: ipums_list_files(nhgis_file, file_select = matches(\"ds136\")) #> # A tibble: 1 × 2 #>   type  file                                        #>   <chr> <chr>                                       #> 1 data  nhgis0712_csv/nhgis0712_ds136_1990_pmsa.csv"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":null,"dir":"Reference","previous_headings":"","what":"Join tabular data to geographic boundaries — ipums_shape_join","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"functions analogous dplyr's joins, except : operate data frame sf object retain variable attributes provided IPUMS files loaded ipumsr data-reading functions handle minor incompatibilities attributes spatial tabular data emerge IPUMS files","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"","code":"ipums_shape_left_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_right_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_inner_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )  ipums_shape_full_join(   data,   shape_data,   by,   suffix = c(\"\", \"SHAPE\"),   verbose = TRUE )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"data tibble data frame. Typically, contain data aggregated specific geographic level. shape_data sf object loaded read_ipums_sf(). Character vector variables join . See dplyr::left_join() syntax. suffix non-joined duplicate variables two data sources, suffixes added output disambiguate . character vector length 2. Defaults adding \"SHAPE\" suffix duplicated variables shape_file. verbose TRUE, display information geometries unmatched join.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"sf object containing joined data","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_shape_join.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Join tabular data to geographic boundaries — ipums_shape_join","text":"","code":"data <- read_nhgis(   ipums_example(\"nhgis0972_csv.zip\"),   verbose = FALSE )  sf_data <- read_ipums_sf(ipums_example(\"nhgis0972_shape_small.zip\")) joined_data <- ipums_shape_inner_join(data, sf_data, by = \"GISJOIN\")  colnames(joined_data) #>  [1] \"GISJOIN\"    \"YEAR\"       \"STUSAB\"     \"CMSA\"       \"DIVISIONA\"  #>  [6] \"MSA_CMSAA\"  \"PMSA\"       \"PMSAA\"      \"REGIONA\"    \"STATEA\"     #> [11] \"AREALAND\"   \"AREAWAT\"    \"ANPSADPI\"   \"FUNCSTAT\"   \"INTPTLAT\"   #> [16] \"INTPTLNG\"   \"PSADC\"      \"D6Z001\"     \"D6Z002\"     \"D6Z003\"     #> [21] \"D6Z004\"     \"D6Z005\"     \"D6Z006\"     \"D6Z007\"     \"D6Z008\"     #> [26] \"PMSASHAPE\"  \"MSACMSA\"    \"ALTCMSA\"    \"GISJOIN2\"   \"SHAPE_AREA\" #> [31] \"SHAPE_LEN\"  \"GISJOIN3\"   \"geometry\""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":null,"dir":"Reference","previous_headings":"","what":"Get contextual information about variables in an IPUMS data source — ipums_var_info","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"Summarize variable metadata variables found ipums_ddi object data frame. Provides descriptions variable content (var_label var_desc) well labels particular values variable (val_labels). ipums_var_info() produces tibble summary multiple variables . ipums_var_label(), ipums_var_desc(), ipums_val_labels() provide specific metadata single variable.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"","code":"ipums_var_info(object, vars = NULL)  ipums_var_label(object, var = NULL)  ipums_var_desc(object, var = NULL)  ipums_val_labels(object, var = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"object ipums_ddi object, data frame containing variable metadata (produced ipumsr data-reading functions), haven::labelled() vector single column data frame. vars, var tidyselect selection identifying variable(s) include output. ipums_var_info() allows selection multiple variables.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"ipums_var_info(), tibble containing variable information. Otherwise, length-1 character vector requested variable information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"ipums_var_info(), provided object haven::labelled() vector (.e. single column data frame), summary output include variable label, variable description, value labels, applicable. data frame, information provided variables present data indicated vars. ipums_ddi object, summary also include information used reading data disk, including start/end positions columns fixed-width file, implied decimals, variable types. Providing ipums_ddi object robust way access variable metadata, many data processing operations remove attributes data frame-like objects.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_var_info.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Get contextual information about variables in an IPUMS data source — ipums_var_info","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  # Info for all variables in a data source ipums_var_info(ddi) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  # Metadata for individual variables ipums_var_desc(ddi, MONTH) #> [1] \"MONTH indicates the calendar month of the CPS interview.\"  ipums_var_label(ddi, MONTH) #> [1] \"Month\"  ipums_val_labels(ddi, MONTH) #> # A tibble: 12 × 2 #>      val lbl       #>    <dbl> <chr>     #>  1     1 January   #>  2     2 February  #>  3     3 March     #>  4     4 April     #>  5     5 May       #>  6     6 June      #>  7     7 July      #>  8     8 August    #>  9     9 September #> 10    10 October   #> 11    11 November  #> 12    12 December   # NHGIS also supports variable-level metadata, though many fields # are not relevant and remain blank: cb <- read_nhgis_codebook(ipums_example(\"nhgis0972_csv.zip\"))  ipums_var_info(cb) #> # A tibble: 25 × 10 #>    var_name  var_label      var_desc val_labels code_instr start end   imp_decim #>    <chr>     <chr>          <chr>    <list>     <chr>      <lgl> <lgl>     <dbl> #>  1 GISJOIN   GIS Join Matc… \"\"       <tibble>   \"\"         NA    NA            0 #>  2 YEAR      Data File Year \"\"       <tibble>   \"\"         NA    NA            0 #>  3 STUSAB    State/US Abbr… \"\"       <tibble>   \"\"         NA    NA            0 #>  4 CMSA      Consolidated … \"\"       <tibble>   \"\"         NA    NA            0 #>  5 DIVISIONA Division Code  \"\"       <tibble>   \"\"         NA    NA            0 #>  6 MSA_CMSAA Metropolitan … \"\"       <tibble>   \"\"         NA    NA            0 #>  7 PMSA      Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  8 PMSAA     Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  9 REGIONA   Region Code    \"\"       <tibble>   \"\"         NA    NA            0 #> 10 STATEA    State Code     \"\"       <tibble>   \"\"         NA    NA            0 #> # ℹ 15 more rows #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":null,"dir":"Reference","previous_headings":"","what":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"given ipums_ddi object data frame, display metadata contents RStudio viewer pane. includes extract-level information well metadata variables included input object. also possible save output external HTML file without launching RStudio viewer.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"","code":"ipums_view(x, out_file = NULL, launch = TRUE)"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"x ipums_ddi object data frame IPUMS attributes attached. Note file-level information (e.g. extract notes) available x ipums_ddi object. out_file Optional location save output HTML file. NULL, makes temporary file. launch Logical indicating whether launch HTML file RStudio viewer pane. TRUE, RStudio rstudioapi must available.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"file path output HTML file (invisibly, launch = TRUE)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"ipums_view() requires htmltools, shiny, DT packages installed. launch = TRUE, RStudio rstudioapi package must also available. Note launch = FALSE out_file unspecified, output file written temporary directory. operating systems may unable open HTML file temporary directory; suggest manually specify out_file location case.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_view.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"View a static webpage with variable metadata from an IPUMS extract — ipums_view","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  if (FALSE) { ipums_view(ddi) ipums_view(ddi, \"codebook.html\", launch = FALSE) }"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":null,"dir":"Reference","previous_headings":"","what":"Launch a browser window to an IPUMS metadata page — ipums_website","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"Launch documentation webpage given IPUMS project variable. project can provided form ipums_ddi object can manually specified. provides access extensive variable metadata may contained within ipums_ddi object . Note IPUMS projects (e.g. IPUMS NHGIS) variable-specific pages. cases, ipums_website() launch project's main data selection page.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"","code":"ipums_website(   x,   var = NULL,   launch = TRUE,   verbose = TRUE,   homepage_if_missing = FALSE,   project = deprecated(),   var_label = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"x ipums_ddi object name IPUMS project. See ipums_data_collections() supported projects. var Name variable load. NULL, provides URL project's main data selection site. launch TRUE, launch browser window metadata webpage. Otherwise, return URL webpage. verbose TRUE, produce warnings invalid URL specifications detected. homepage_if_missing TRUE, return IPUMS homepage IPUMS project x recognized. project Please use x instead. var_label Variable label provided var. typically obtained input ipums_ddi object unlikely needed.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"URL IPUMS webpage indicated project variable (invisibly launch = TRUE)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"launch = TRUE, need valid registration specified project successfully launch webpage. IPUMS variables found webpages exactly match variable names included completed extract files (ipums_ddi objects). Therefore, may projects variables ipums_website() launch page different variable invalid page.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/ipums_website.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Launch a browser window to an IPUMS metadata page — ipums_website","text":"","code":"ddi <- read_ipums_ddi(ipums_example(\"cps_00157.xml\"))  if (FALSE) { # Launch webpage for particular variable ipums_website(ddi, \"MONTH\") }  # Can also specify an IPUMS project instead of an `ipums_ddi` object ipums_website(\"IPUMS CPS\", var = \"RECTYPE\", launch = FALSE) #> [1] \"https://cps.ipums.org/cps-action/variables/RECTYPE\"  # Shorthand project names from `ipums_data_collections()` are also accepted: ipums_website(\"ipumsi\", var = \"YEAR\", launch = FALSE) #> [1] \"https://international.ipums.org/international-action/variables/YEAR\""},{"path":"http://tech.popdata.org/ipumsr/reference/ipumsr-package.html","id":null,"dir":"Reference","previous_headings":"","what":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","title":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","text":"easy way work census, survey, geographic data provided IPUMS R. Generate download data IPUMS API load IPUMS files R associated metadata make analysis easier. IPUMS data describing 1.4 billion individuals drawn 750 censuses surveys available free charge IPUMS website https://www.ipums.org.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/ipumsr-package.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"ipumsr: An R Interface for Downloading, Reading, and Handling IPUMS Data — ipumsr-package","text":"Maintainer: Derek Burk ipums+cran@umn.edu Authors: Greg Freedman Ellis Finn Roberts contributors: Joe Grover [contributor] Dan Ehrlich [contributor] Renae Rodgers [contributor] Institute Social Research Data Innovation ipums@umn.edu [copyright holder]","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":null,"dir":"Reference","previous_headings":"","what":"Report on observations dropped during a join — join_failures","title":"Report on observations dropped during a join — join_failures","text":"Helper display observations matched joining tabular spatial data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Report on observations dropped during a join — join_failures","text":"","code":"join_failures(join_results)"},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Report on observations dropped during a join — join_failures","text":"join_results data frame just created ipums shape join.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/join_failures.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Report on observations dropped during a join — join_failures","text":"list data frames, first element (shape) includes observations dropped shapefile second (data) includes observations dropped data file.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":null,"dir":"Reference","previous_headings":"","what":"Make a label placeholder object — lbl","title":"Make a label placeholder object — lbl","text":"Define new label/value pair. use functions like lbl_relabel() lbl_add().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Make a label placeholder object — lbl","text":"","code":"lbl(...)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Make a label placeholder object — lbl","text":"... Either one two arguments specifying label (.lbl) value (.val) use new label pair. arguments named, must named .val /.lbl. single unnamed value passed, used .lbl new label. two unnamed values passed, used .val .lbl, respectively.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Make a label placeholder object — lbl","text":"label_placeholder object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Make a label placeholder object — lbl","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Make a label placeholder object — lbl","text":"","code":"# Label placeholder with no associated value lbl(\"New label\") #> $.val #> NULL #>  #> $.lbl #> [1] \"New label\" #>  #> attr(,\"class\") #> [1] \"lbl_placeholder\"  # Label placeholder with a value/label pair lbl(10, \"New label\") #> $.val #> [1] 10 #>  #> $.lbl #> [1] \"New label\" #>  #> attr(,\"class\") #> [1] \"lbl_placeholder\"  # Use placeholders as inputs to other label handlers x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  x <- lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") )  lbl_relabel(x, lbl(9999, \"Missing\") ~ .val > 900) #> <labelled<double>[6]> #> [1]  100  200  105 9999 9999  230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>   9999 Missing"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":null,"dir":"Reference","previous_headings":"","what":"Add labels for unlabelled values — lbl_add","title":"Add labels for unlabelled values — lbl_add","text":"Add labels values already labelled vector.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add labels for unlabelled values — lbl_add","text":"","code":"lbl_add(x, ...)  lbl_add_vals(x, labeller = as.character, vals = NULL)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add labels for unlabelled values — lbl_add","text":"x labelled vector ... Arbitrary number label placeholders created lbl() indicating value/label pairs add. labeller function takes values added argument returns labels associate values. default, uses values converting character. vals Vector values labelled. NULL, labels unlabelled values exist data.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add labels for unlabelled values — lbl_add","text":"labelled vector","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_add.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add labels for unlabelled values — lbl_add","text":"","code":"x <- haven::labelled(   c(100, 200, 105, 990, 999, 230),   c(`Unknown` = 990, NIU = 999) )  # Add new labels manually lbl_add(   x,   lbl(100, \"$100\"),   lbl(105, \"$105\"),   lbl(200, \"$200\"),   lbl(230, \"$230\") ) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU  # Add labels for all unlabelled values lbl_add_vals(x) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100     100 #>    105     105 #>    200     200 #>    230     230 #>    990 Unknown #>    999     NIU  # Update label names while adding lbl_add_vals(x, labeller = ~ paste0(\"$\", .)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100    $100 #>    105    $105 #>    200    $200 #>    230    $230 #>    990 Unknown #>    999     NIU  # Add labels for select values lbl_add_vals(x, vals = c(100, 200)) #> <labelled<double>[6]> #> [1] 100 200 105 990 999 230 #>  #> Labels: #>  value   label #>    100     100 #>    200     200 #>    990 Unknown #>    999     NIU"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":null,"dir":"Reference","previous_headings":"","what":"Clean unused labels — lbl_clean","title":"Clean unused labels — lbl_clean","text":"Remove labels appear data. converting labelled values factor, avoids creation additional factor levels.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Clean unused labels — lbl_clean","text":"","code":"lbl_clean(x)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Clean unused labels — lbl_clean","text":"x labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Clean unused labels — lbl_clean","text":"labelled vector","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_clean.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Clean unused labels — lbl_clean","text":"","code":"x <- haven::labelled(   c(1, 2, 3, 1, 2, 3, 1, 2, 3),   c(Q1 = 1, Q2 = 2, Q3 = 3, Q4 = 4) )  lbl_clean(x) #> <labelled<double>[9]> #> [1] 1 2 3 1 2 3 1 2 3 #>  #> Labels: #>  value label #>      1    Q1 #>      2    Q2 #>      3    Q3  # Compare the factor levels of the normal and cleaned labels after coercion as_factor(lbl_clean(x)) #> [1] Q1 Q2 Q3 Q1 Q2 Q3 Q1 Q2 Q3 #> Levels: Q1 Q2 Q3  as_factor(x) #> [1] Q1 Q2 Q3 Q1 Q2 Q3 Q1 Q2 Q3 #> Levels: Q1 Q2 Q3 Q4"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":null,"dir":"Reference","previous_headings":"","what":"Define labels for an unlabelled vector — lbl_define","title":"Define labels for an unlabelled vector — lbl_define","text":"Create labelled vector unlabelled vector using lbl_relabel() syntax, allowing grouping multiple values single label. Values assigned label remain unlabelled.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Define labels for an unlabelled vector — lbl_define","text":"","code":"lbl_define(x, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Define labels for an unlabelled vector — lbl_define","text":"x unlabelled vector ... Arbitrary number two-sided formulas. left hand side label placeholder created lbl(). right hand side function taking .val evaluates TRUE cases receive label specified left hand side. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Define labels for an unlabelled vector — lbl_define","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Define labels for an unlabelled vector — lbl_define","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_define.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Define labels for an unlabelled vector — lbl_define","text":"","code":"age <- c(10, 12, 16, 18, 20, 22, 25, 27)  # Group age values into two label groups. # Values not captured by the right hand side functions remain unlabelled lbl_define(   age,   lbl(1, \"Pre-college age\") ~ .val < 18,   lbl(2, \"College age\") ~ .val >= 18 & .val <= 22 ) #> <labelled<double>[8]> #> [1]  1  1  1  2  2  2 25 27 #>  #> Labels: #>  value           label #>      1 Pre-college age #>      2     College age"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":null,"dir":"Reference","previous_headings":"","what":"Convert labelled data values to NA — lbl_na_if","title":"Convert labelled data values to NA — lbl_na_if","text":"Convert data values labelled vector NA based value labels associated vector. Ignores values label.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Convert labelled data values to NA — lbl_na_if","text":"","code":"lbl_na_if(x, .predicate)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Convert labelled data values to NA — lbl_na_if","text":"x labelled vector .predicate function taking .val .lbl arguments returns TRUE values converted NA. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Convert labelled data values to NA — lbl_na_if","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Convert labelled data values to NA — lbl_na_if","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_na_if.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Convert labelled data values to NA — lbl_na_if","text":"","code":"x <- haven::labelled(   c(10, 10, 11, 20, 30, 99, 30, 10),   c(Yes = 10, `Yes - Logically Assigned` = 11, No = 20, Maybe = 30, NIU = 99) )  # Convert labelled values greater than 90 to `NA` lbl_na_if(x, function(.val, .lbl) .val >= 90) #> <labelled<double>[8]> #> [1] 10 10 11 20 30 NA 30 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     30                    Maybe  # Can use purrr-style notation lbl_na_if(x, ~ .lbl %in% c(\"Maybe\")) #> <labelled<double>[8]> #> [1] 10 10 11 20 NA 99 NA 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     99                      NIU  # Or refer to named function na_function <- function(.val, .lbl) .val >= 90 lbl_na_if(x, na_function) #> <labelled<double>[8]> #> [1] 10 10 11 20 30 NA 30 10 #>  #> Labels: #>  value                    label #>     10                      Yes #>     11 Yes - Logically Assigned #>     20                       No #>     30                    Maybe"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":null,"dir":"Reference","previous_headings":"","what":"Modify value labels for a labelled vector — lbl_relabel","title":"Modify value labels for a labelled vector — lbl_relabel","text":"Update mapping values labels labelled vector. functions allow simultaneously update data values existing value labels. Modifying data values directly result updated value labels. Use lbl_relabel() manually specify new value/label mappings. allows addition new labels. Use lbl_collapse() collapse detailed labels general categories. Values can grouped together associated individual labels already exist labelled vector. Unlabelled values converted NA.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Modify value labels for a labelled vector — lbl_relabel","text":"","code":"lbl_relabel(x, ...)  lbl_collapse(x, .fun)"},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Modify value labels for a labelled vector — lbl_relabel","text":"x labelled vector ... Arbitrary number two-sided formulas. left hand side label placeholder created lbl() value already exists data. right hand side function taking .val .lbl arguments evaluates TRUE cases receive label specified left hand side. Can provided anonymous function formula. See Details section. .fun function taking .val .lbl arguments returns value associated existing label vector. Input values function relabeled label function's output value. Can provided anonymous function formula. See Details section.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Modify value labels for a labelled vector — lbl_relabel","text":"labelled vector","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Modify value labels for a labelled vector — lbl_relabel","text":"Several lbl_*() functions include arguments can passed function .val /.lbl. refer existing values labels input vector, respectively. Use .val refer values vector's value labels. Use .lbl refer label names vector's value labels. Note lbl_*() functions support arguments.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/lbl_relabel.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Modify value labels for a labelled vector — lbl_relabel","text":"","code":"x <- haven::labelled(   c(10, 10, 11, 20, 21, 30, 99, 30, 10),   c(     Yes = 10, `Yes - Logically Assigned` = 11,     No = 20, Unlikely = 21, Maybe = 30, NIU = 99   ) )  # Convert cases with value 11 to value 10 and associate with 10's label lbl_relabel(x, 10 ~ .val == 11) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 30 99 30 10 #>  #> Labels: #>  value    label #>     10      Yes #>     20       No #>     21 Unlikely #>     30    Maybe #>     99      NIU lbl_relabel(x, lbl(\"Yes\") ~ .val == 11) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 30 99 30 10 #>  #> Labels: #>  value    label #>     10      Yes #>     20       No #>     21 Unlikely #>     30    Maybe #>     99      NIU  # To relabel using new value/label pairs, use `lbl()` to define a new pair lbl_relabel(   x,   lbl(10, \"Yes/Yes-ish\") ~ .val %in% c(10, 11),   lbl(90, \"???\") ~ .val == 99 | .lbl == \"Maybe\" ) #> <labelled<double>[9]> #> [1] 10 10 10 20 21 90 90 90 10 #>  #> Labels: #>  value       label #>     10 Yes/Yes-ish #>     20          No #>     21    Unlikely #>     90         ???  # Collapse labels to create new label groups lbl_collapse(x, ~ (.val %/% 10) * 10) #> <labelled<double>[9]> #> [1] 10 10 10 20 20 30 90 30 10 #>  #> Labels: #>  value label #>     10   Yes #>     20    No #>     30 Maybe #>     90   NIU  # These are equivalent lbl_collapse(x, ~ ifelse(.val == 10, 11, .val)) #> <labelled<double>[9]> #> [1] 11 11 11 20 21 30 99 30 11 #>  #> Labels: #>  value                    label #>     11 Yes - Logically Assigned #>     20                       No #>     21                 Unlikely #>     30                    Maybe #>     99                      NIU lbl_relabel(x, 11 ~ .val == 10) #> <labelled<double>[9]> #> [1] 11 11 11 20 21 30 99 30 11 #>  #> Labels: #>  value                    label #>     11 Yes - Logically Assigned #>     20                       No #>     21                 Unlikely #>     30                    Maybe #>     99                      NIU"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":null,"dir":"Reference","previous_headings":"","what":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"Reads metadata IPUMS extract DDI codebook ipums_ddi object. metadata contains parsing instructions associated fixed-width data file, contextual labels variables values data, general extract information. See Downloading IPUMS files information downloading IPUMS DDI codebook files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"","code":"read_ipums_ddi(   ddi_file,   lower_vars = FALSE,   file_select = deprecated(),   data_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"ddi_file Path DDI .xml file downloaded IPUMS. See Downloading IPUMS files . lower_vars Logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. data_layer, file_select Reading DDI files contained .zip archive deprecated. Please provide full path .xml file loaded ddi_file.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"ipums_ddi object metadata information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"DDI codebook (.xml) file provided IPUMS microdata extracts can downloaded IPUMS extract interface (collections) within R using IPUMS API. using IPUMS extract interface: Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_ddi.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read metadata about an IPUMS microdata extract from a DDI codebook (.xml)\nfile — read_ipums_ddi","text":"","code":"# Example codebook file ddi_file <- ipums_example(\"cps_00157.xml\")  # Load data into an `ipums_ddi` obj ddi <- read_ipums_ddi(ddi_file)  # Use the object to load its associated data cps <- read_ipums_micro(ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375           # Or get metadata information directly ipums_var_info(ddi) #> # A tibble: 8 × 10 #>   var_name var_label        var_desc val_labels code_instr start   end imp_decim #>   <chr>    <chr>            <chr>    <list>     <chr>      <dbl> <dbl>     <dbl> #> 1 YEAR     Survey year      \"YEAR r… <tibble>   \"YEAR is …     1     4         0 #> 2 SERIAL   Household seria… \"SERIAL… <tibble>   \"SERIAL i…     5     9         0 #> 3 MONTH    Month            \"MONTH … <tibble>    NA           10    11         0 #> 4 ASECWTH  Annual Social a… \"ASECWT… <tibble>   \"ASECWTH …    12    22         4 #> 5 STATEFIP State (FIPS cod… \"STATEF… <tibble>    NA           23    24         0 #> 6 PERNUM   Person number i… \"PERNUM… <tibble>   \"PERNUM i…    25    26         0 #> 7 ASECWT   Annual Social a… \"ASECWT… <tibble>   \"ASECWT i…    27    37         4 #> 8 INCTOT   Total personal … \"INCTOT… <tibble>   \"99999999…    38    46         0 #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  ipums_file_info(ddi)[1:2] #> $ipums_project #> [1] \"IPUMS CPS\" #>  #> $extract_date #> [1] \"2023-07-10\" #>   # If variable metadata have been lost from a data source, reattach from # its corresponding `ipums_ddi` object: cps <- zap_ipums_attributes(cps)  ipums_var_label(cps$STATEFIP) #> [1] NA  cps <- set_ipums_var_attributes(cps, ddi$var_info)  ipums_var_label(cps$STATEFIP) #> [1] \"State (FIPS code)\""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract — read_ipums_micro","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"Read microdata dataset downloaded IPUMS extract system. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro() read_ipums_micro_list() differ handling extracts contain multiple record types. See Data structures . Note Stata, SAS, SPSS file formats supported ipumsr readers. Convert extract fixed-width CSV format, see haven help loading files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"","code":"read_ipums_micro(   ddi,   vars = NULL,   n_max = Inf,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list(   ddi,   vars = NULL,   n_max = Inf,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. n_max maximum number lines read. read_ipums_micro_list(), applies splitting records list components. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. lower_vars = TRUE vars specified, vars reference lowercase column names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"read_ipums_micro() returns single tibble object. read_ipums_micro_list() returns list tibble objects one entry record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro() reads data tibble row represents single record, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. read_ipums_micro_list() reads data list tibble objects, list element contains one record type. list element named corresponding record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract — read_ipums_micro","text":"","code":"# Codebook for rectangular example file cps_rect_ddi_file <- ipums_example(\"cps_00157.xml\")  # Load data based on codebook file info cps <- read_ipums_micro(cps_rect_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps) #> # A tibble: 6 × 8 #>    YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT             #>   <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>          #> 1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883          #> 2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800          #> 3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missin… #> 4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015          #> 5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552          #> 6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375           # Can also load data from a pre-existing `ipums_ddi` object # (This may be useful to retain codebook metadata even if lost from data # during processing) ddi <- read_ipums_ddi(cps_rect_ddi_file) cps <- read_ipums_micro(ddi, verbose = FALSE)  # Codebook for hierarchical example file cps_hier_ddi_file <- ipums_example(\"cps_00159.xml\")  # Read in \"long\" format to get a single data frame read_ipums_micro(cps_hier_ddi_file, verbose = FALSE) #> # A tibble: 11,053 × 9 #>    RECTYPE     YEAR SERIAL MONTH    ASECWTH STATEFIP PERNUM ASECWT INCTOT        #>    <chr+lbl>  <dbl>  <dbl> <int+lb>   <dbl> <int+lb>  <dbl>  <dbl> <dbl+lbl>     #>  1 H [Househ…  1962     80  3 [Mar…   1476. 55 [Wis…     NA    NA  NA            #>  2 P [Person…  1962     80 NA           NA  NA            1  1476.  4.88e3       #>  3 P [Person…  1962     80 NA           NA  NA            2  1471.  5.8 e3       #>  4 P [Person…  1962     80 NA           NA  NA            3  1579.  1.00e9 [Mis… #>  5 H [Househ…  1962     82  3 [Mar…   1598. 27 [Min…     NA    NA  NA            #>  6 P [Person…  1962     82 NA           NA  NA            1  1598.  1.40e4       #>  7 H [Househ…  1962     83  3 [Mar…   1707. 27 [Min…     NA    NA  NA            #>  8 P [Person…  1962     83 NA           NA  NA            1  1707.  1.66e4       #>  9 H [Househ…  1962     84  3 [Mar…   1790. 27 [Min…     NA    NA  NA            #> 10 P [Person…  1962     84 NA           NA  NA            1  1790.  6.38e3       #> # ℹ 11,043 more rows  # Read in \"list\" format and you get a list of multiple data frames cps_list <- read_ipums_micro_list(cps_hier_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(cps_list$PERSON) #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                          head(cps_list$HOUSEHOLD) #> # A tibble: 6 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #> 5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #> 6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]       # Use the `%<-%` operator from zeallot to unpack into separate objects c(household, person) %<-% read_ipums_micro_list(cps_hier_ddi_file) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  head(person) #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                          head(household) #> # A tibble: 6 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #> 5 H [Household Record]  1962    107 3 [March]   4355. 19 [Iowa]      #> 6 H [Household Record]  1962    108 3 [March]   1479. 19 [Iowa]"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Read microdata dataset downloaded IPUMS extract system chunks. Use functions read file large store memory single time. file processed chunks given size, provided callback function applied chunk. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro_chunked() read_ipums_micro_list_chunked() differ handling extracts contain multiple record types. See Data structures . Note Stata, SAS, SPSS file formats supported ipumsr readers. Convert extract fixed-width CSV format, see haven help loading files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"","code":"read_ipums_micro_chunked(   ddi,   callback,   chunk_size = 10000,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list_chunked(   ddi,   callback,   chunk_size = 10000,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . callback ipums_callback object, function converted IpumsSideEffectCallback object. Callback functions include data (x) position (pos) arguments. See examples. chunk_size Integer number observations read per chunk. Higher values use RAM, typically result faster processing. Defaults 10,000. vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. Note reading chunks .csv .csv.gz file, callback function called variable names converted lowercase, thus reference uppercase variable names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Depends provided callback object. See ipums_callback.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro_chunked() reads chunk data tibble row represents single record, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. provided callback function therefore operate tibble object. read_ipums_micro_list_chunked() reads chunk data list tibble objects, list element contains one record type. list element named corresponding record type. provided callback function therefore operate list object. case, chunk size references total number rows across record types, rather record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_chunked.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract by chunk — read_ipums_micro_chunked","text":"","code":"suppressMessages(library(dplyr))  # Example codebook file cps_rect_ddi_file <- ipums_example(\"cps_00157.xml\")  # Function to extract Minnesota cases from CPS example # (This can also be accomplished by including case selections # in an extract definition) # # Function must take `x` and `pos` to refer to data and row position, # respectively. filter_mn <- function(x, pos) {   x[x$STATEFIP == 27, ] }  # Initialize callback filter_mn_callback <- IpumsDataFrameCallback$new(filter_mn)  # Process data in chunks, filtering to MN cases in each chunk read_ipums_micro_chunked(   cps_rect_ddi_file,   callback = filter_mn_callback,   chunk_size = 1000,   verbose = FALSE ) #> # A tibble: 2,362 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT    #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl> #>  1  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598. 14015     #>  2  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707. 16552     #>  3  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.  6375     #>  4  1962    122 3 [March]   3603. 27 [Minnesota]      1  3603. 15550     #>  5  1962    122 3 [March]   3603. 27 [Minnesota]      2  3603.     0     #>  6  1962    122 3 [March]   3603. 27 [Minnesota]      3  4243.  3443     #>  7  1962    122 3 [March]   3603. 27 [Minnesota]      4  3920.   255     #>  8  1962    122 3 [March]   3603. 27 [Minnesota]      5  3689.   135     #>  9  1962    857 3 [March]   1832. 27 [Minnesota]      1  1832.   624     #> 10  1962    857 3 [March]   1832. 27 [Minnesota]      2  1832.  3600     #> # ℹ 2,352 more rows  # Tabulate INCTOT average by state without storing full dataset in memory read_ipums_micro_chunked(   cps_rect_ddi_file,   callback = IpumsDataFrameCallback$new(     function(x, pos) {       x %>%         mutate(           INCTOT = lbl_na_if(             INCTOT,             ~ grepl(\"Missing|N.I.U.\", .lbl)           )         ) %>%         filter(!is.na(INCTOT)) %>%         group_by(STATEFIP = as_factor(STATEFIP)) %>%         summarize(INCTOT_SUM = sum(INCTOT), n = n(), .groups = \"drop\")     }   ),   chunk_size = 1000,   verbose = FALSE ) %>%   group_by(STATEFIP) %>%   summarize(avg_inc = sum(INCTOT_SUM) / sum(n)) #> # A tibble: 5 × 2 #>   STATEFIP     avg_inc #>   <fct>          <dbl> #> 1 Iowa           2252. #> 2 Minnesota      2500. #> 3 North Dakota   2800. #> 4 South Dakota   1641. #> 5 Wisconsin      2733.  # `x` will be a list when using `read_ipums_micro_list_chunked()` read_ipums_micro_list_chunked(   ipums_example(\"cps_00159.xml\"),   callback = IpumsSideEffectCallback$new(function(x, pos) {     print(       paste0(         nrow(x$PERSON), \" persons and \",         nrow(x$HOUSEHOLD), \" households in this chunk.\"       )     )   }),   chunk_size = 1000,   verbose = FALSE ) #> [1] \"699 persons and 301 households in this chunk.\" #> [1] \"701 persons and 299 households in this chunk.\" #> [1] \"693 persons and 307 households in this chunk.\" #> [1] \"685 persons and 315 households in this chunk.\" #> [1] \"696 persons and 304 households in this chunk.\" #> [1] \"691 persons and 309 households in this chunk.\" #> [1] \"695 persons and 305 households in this chunk.\" #> [1] \"691 persons and 309 households in this chunk.\" #> [1] \"694 persons and 306 households in this chunk.\" #> [1] \"692 persons and 308 households in this chunk.\" #> [1] \"692 persons and 308 households in this chunk.\" #> [1] \"39 persons and 14 households in this chunk.\" #> NULL  # Using the biglm package, you can even run a regression without storing # the full dataset in memory if (requireNamespace(\"biglm\")) {   lm_results <- read_ipums_micro_chunked(    ipums_example(\"cps_00160.xml\"),    IpumsBiglmCallback$new(      INCTOT ~ AGE + HEALTH, # Model formula      function(x, pos) {        x %>%          mutate(            INCTOT = lbl_na_if(              INCTOT,              ~ grepl(\"Missing|N.I.U.\", .lbl)            ),            HEALTH = as_factor(HEALTH)          )      }    ),    chunk_size = 1000,    verbose = FALSE  )   summary(lm_results) } #> Loading required namespace: biglm #> Large data regression model: biglm(INCTOT ~ AGE + HEALTH, data, ...) #> Sample size =  8194  #>                        Coef        (95%        CI)        SE      p #> (Intercept)      25351.6183  21728.2210  28975.016 1811.6986 0.0000 #> AGE                499.7783    427.5196    572.037   36.1293 0.0000 #> HEALTHVery good  -2135.1060  -5431.8110   1161.599 1648.3525 0.1952 #> HEALTHGood      -10480.2543 -14052.9835  -6907.525 1786.3646 0.0000 #> HEALTHFair      -23091.1061 -28274.2254 -17907.987 2591.5596 0.0000 #> HEALTHPoor      -34341.0066 -42611.9852 -26070.028 4135.4893 0.0000"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":null,"dir":"Reference","previous_headings":"","what":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"Read microdata dataset downloaded IPUMS extract system object can read operate group (\"yield\") lines time. Use functions read file large store memory single time. represent flexible implementation read_ipums_micro_chunked() using R6. Two files required load IPUMS microdata extracts: DDI codebook file (.xml) used parse extract's data file data file (either .dat.gz .csv.gz) See Downloading IPUMS files information downloading files. read_ipums_micro_yield() read_ipums_micro_list_yield() differ handling extracts contain multiple record types. See Data structures . Note functions support fixed-width (.dat) data files.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"read_ipums_micro_yield(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )  read_ipums_micro_list_yield(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"ddi Either path DDI .xml file downloaded IPUMS, ipums_ddi object parsed read_ipums_ddi(). See Downloading IPUMS files . vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. hierarchical data, RECTYPE variable always included even unspecified. data_file Path data (.gz) file associated provided ddi file. default, looks data file directory DDI file. data file moved, specify location . verbose Logical indicating whether display IPUMS conditions progress information. var_attrs Variable attributes DDI add columns output data. Defaults available attributes. See set_ipums_var_attributes() details. lower_vars reading DDI file, logical indicating whether convert variable names lowercase. Defaults FALSE consistency IPUMS conventions. argument ignored argument ddi ipums_ddi object. Use read_ipums_ddi() convert variable names lowercase reading DDI file. lower_vars = TRUE vars specified, vars reference lowercase column names.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"HipYield R6 object (see Details section)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods-summary-","dir":"Reference","previous_headings":"","what":"Methods summary:","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"functions return HipYield R6 object following methods: yield(n = 10000) reads next \"yield\" data. read_ipums_micro_yield(), returns tibble n rows. read_ipums_micro_list_yield(), returns list tibbles total n rows across list elements. fewer n rows left data, returns remaining rows. rows left data, returns NULL. reset() resets data next yield read data start. is_done() returns logical indicating whether rows file read. cur_pos contains next row number read (1-indexed).","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"data-structures","dir":"Reference","previous_headings":"","what":"Data structures","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"Files IPUMS projects contain data multiple types records (e.g. household records person records) may either rectangular hierarchical. Rectangular data transformed row data represents one type record. instance, row represent person record, household-level information person included row. Hierarchical data records different types interspersed single file. instance, household record included row followed person records associated household. Hierarchical data can read two different formats: read_ipums_micro_yield() produces object yields data tibble whose rows represent single records, regardless record type. Variables apply particular record type filled NA rows record type. instance, person-specific variable missing rows associated household records. read_ipums_micro_list_yield() produces object yields data list tibble objects, list element contains one record type. list element named corresponding record type. case, using yield(), n refers total number rows across record types, rather record type.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"downloading-ipums-files","dir":"Reference","previous_headings":"","what":"Downloading IPUMS files","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"must download DDI codebook data file IPUMS extract system load data R. read_ipums_micro_*() functions assume data file codebook share common base file name present directory. case, provide separate path data file data_file argument. using IPUMS extract interface: Download data file clicking Download .dat Download Data. Download DDI codebook right clicking DDI link Codebook column extract interface selecting Save ... (Safari, may select Download Linked File ...). sure codebook downloaded .xml format. using IPUMS API: supported collections, use download_extract() download completed extract via IPUMS API. automatically downloads DDI codebook data file extract returns path codebook file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"super-classes","dir":"Reference","previous_headings":"","what":"Super classes","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield -> hipread::HipLongYield -> IpumsLongYield","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods","dir":"Reference","previous_headings":"","what":"Methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield$is_done() hipread::HipYield$reset()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"public-methods","dir":"Reference","previous_headings":"","what":"Public methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"IpumsLongYield$new() IpumsLongYield$yield()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsLongYield$new(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-1","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsLongYield$yield(n = 10000)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"super-classes-1","dir":"Reference","previous_headings":"","what":"Super classes","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield -> hipread::HipListYield -> IpumsListYield","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"methods-1","dir":"Reference","previous_headings":"","what":"Methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"hipread::HipYield$is_done() hipread::HipYield$reset()","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"public-methods-1","dir":"Reference","previous_headings":"","what":"Public methods","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"IpumsListYield$new() IpumsListYield$yield()","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-2","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsListYield$new(   ddi,   vars = NULL,   data_file = NULL,   verbose = TRUE,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   lower_vars = FALSE )"},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"usage-3","dir":"Reference","previous_headings":"","what":"Usage","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"IpumsListYield$yield(n = 10000)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_micro_yield.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read data from an IPUMS microdata extract in yields — read_ipums_micro_yield","text":"","code":"# Create an IpumsLongYield object long_yield <- read_ipums_micro_yield(ipums_example(\"cps_00157.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Yield the first 10 rows of the data long_yield$yield(10) #> # A tibble: 10 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962     80 3 [March]   1476. 55 [Wisconsin]      1  1476.      4883         #>  2  1962     80 3 [March]   1476. 55 [Wisconsin]      2  1471.      5800         #>  3  1962     80 3 [March]   1476. 55 [Wisconsin]      3  1579. 999999998 [Missi… #>  4  1962     82 3 [March]   1598. 27 [Minnesota]      1  1598.     14015         #>  5  1962     83 3 [March]   1707. 27 [Minnesota]      1  1707.     16552         #>  6  1962     84 3 [March]   1790. 27 [Minnesota]      1  1790.      6375         #>  7  1962    107 3 [March]   4355. 19 [Iowa]           1  4355. 999999999 [N.I.U… #>  8  1962    107 3 [March]   4355. 19 [Iowa]           2  1386.         0         #>  9  1962    107 3 [March]   4355. 19 [Iowa]           3  1629.       600         #> 10  1962    107 3 [March]   4355. 19 [Iowa]           4  1432. 999999999 [N.I.U…  # Yield the next 20 rows of the data long_yield$yield(20) #> # A tibble: 20 × 8 #>     YEAR SERIAL MONTH     ASECWTH STATEFIP       PERNUM ASECWT INCTOT            #>    <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>       <dbl>  <dbl> <dbl+lbl>         #>  1  1962    108 3 [March]   1479. 19 [Iowa]           1  1479.     12300         #>  2  1962    108 3 [March]   1479. 19 [Iowa]           2  1482.         0         #>  3  1962    122 3 [March]   3603. 27 [Minnesota]      1  3603.     15550         #>  4  1962    122 3 [March]   3603. 27 [Minnesota]      2  3603.         0         #>  5  1962    122 3 [March]   3603. 27 [Minnesota]      3  4243.      3443         #>  6  1962    122 3 [March]   3603. 27 [Minnesota]      4  3920.       255         #>  7  1962    122 3 [March]   3603. 27 [Minnesota]      5  3689.       135         #>  8  1962    124 3 [March]   4104. 55 [Wisconsin]      1  4104.     15000         #>  9  1962    124 3 [March]   4104. 55 [Wisconsin]      2  1487.      3550         #> 10  1962    124 3 [March]   4104. 55 [Wisconsin]      3  1450.       692         #> 11  1962    124 3 [March]   4104. 55 [Wisconsin]      4  1441.         0         #> 12  1962    125 3 [March]   2182. 55 [Wisconsin]      1  2182.      4470         #> 13  1962    126 3 [March]   1826. 55 [Wisconsin]      1  1826. 999999999 [N.I.U… #> 14  1962    126 3 [March]   1826. 55 [Wisconsin]      2  1629.         0         #> 15  1962    761 3 [March]   1751. 19 [Iowa]           1  1751.      7300         #> 16  1962    761 3 [March]   1751. 19 [Iowa]           2  1751.      3700         #> 17  1962    762 3 [March]   1874. 19 [Iowa]           1  1874.      2534         #> 18  1962    762 3 [March]   1874. 19 [Iowa]           2  1874.         0         #> 19  1962    763 3 [March]   1874. 19 [Iowa]           1  1874.      1591         #> 20  1962    764 3 [March]   1724. 19 [Iowa]           1  1724.      8002          # Check the current position after yielding 30 rows long_yield$cur_pos #> [1] 31  # Reset to the beginning of the file long_yield$reset()  # Use a loop to flexibly process the data in pieces. Count all Minnesotans: total_mn <- 0  while (!long_yield$is_done()) {   cur_data <- long_yield$yield(1000)   total_mn <- total_mn + sum(as_factor(cur_data$STATEFIP) == \"Minnesota\") }  total_mn #> [1] 2362  # Can also read hierarchical data as list: list_yield <- read_ipums_micro_list_yield(ipums_example(\"cps_00159.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Yield size is based on total rows for all list elements list_yield$yield(10) #> $HOUSEHOLD #> # A tibble: 4 × 6 #>   RECTYPE               YEAR SERIAL MONTH     ASECWTH STATEFIP       #>   <chr+lbl>            <dbl>  <dbl> <int+lbl>   <dbl> <int+lbl>      #> 1 H [Household Record]  1962     80 3 [March]   1476. 55 [Wisconsin] #> 2 H [Household Record]  1962     82 3 [March]   1598. 27 [Minnesota] #> 3 H [Household Record]  1962     83 3 [March]   1707. 27 [Minnesota] #> 4 H [Household Record]  1962     84 3 [March]   1790. 27 [Minnesota] #>  #> $PERSON #> # A tibble: 6 × 6 #>   RECTYPE            YEAR SERIAL PERNUM ASECWT INCTOT                            #>   <chr+lbl>         <dbl>  <dbl>  <dbl>  <dbl> <dbl+lbl>                         #> 1 P [Person Record]  1962     80      1  1476.      4883                         #> 2 P [Person Record]  1962     80      2  1471.      5800                         #> 3 P [Person Record]  1962     80      3  1579. 999999998 [Missing. (1962-1964 o… #> 4 P [Person Record]  1962     82      1  1598.     14015                         #> 5 P [Person Record]  1962     83      1  1707.     16552                         #> 6 P [Person Record]  1962     84      1  1790.      6375                         #>"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":null,"dir":"Reference","previous_headings":"","what":"Read spatial data from an IPUMS extract — read_ipums_sf","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"Read spatial data file (also referred GIS file shapefile) IPUMS extract sf object sf package.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"","code":"read_ipums_sf(   shape_file,   file_select = NULL,   vars = NULL,   encoding = NULL,   bind_multiple = FALSE,   add_layer_var = NULL,   verbose = FALSE,   shape_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"shape_file Path single .shp file .zip archive containing least one .shp file. See Details section. file_select shape_file .zip archive contains multiple files, expression identifying files load. Accepts character string specifying file name, tidyselect selection, index position. multiple files selected, bind_multiple must equal TRUE. vars Names variables include output. Accepts character vector names tidyselect selection. NULL, includes variables file. encoding Encoding use reading shape file. NULL, defaults \"latin1\" unless file includes .cpg metadata file encoding information. default value generally appropriate. bind_multiple TRUE shape_file contains multiple .shp files, row-bind files single sf object. Useful shape_file contains multiple files represent geographic units different extents (e.g. block-level data multiple states). add_layer_var TRUE, add variable output data indicating file row originates . Defaults FALSE unless bind_multiple = TRUE multiple files exist shape_file. column name always prefixed \"layer\", adjusted avoid name conflicts another column named \"layer\" already exists data. verbose TRUE report additional progress information load. shape_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"sf object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"IPUMS products provide shapefiles \"nested\" .zip archive. , shapefile (including .shp well accompanying files) compressed archive, collection shapefiles provided extract also compressed single .zip archive. read_ipums_sf() designed handle structure. However, files altered internal .zip archive contains multiple shapefiles, function throw error. case, may need manually unzip downloaded file loading R.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_ipums_sf.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read spatial data from an IPUMS extract — read_ipums_sf","text":"","code":"# Example shapefile from NHGIS shape_ex1 <- ipums_example(\"nhgis0972_shape_small.zip\") data_ex1 <- read_nhgis(ipums_example(\"nhgis0972_csv.zip\"), verbose = FALSE)  sf_data <- read_ipums_sf(shape_ex1)  sf_data #> Simple feature collection with 71 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 9 #>    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>    <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #>  1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #>  2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #>  3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #>  4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #>  5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #>  6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #>  7 2960  1602    14      G2960   2960      2387760183.   241350. G16022960 #>  8 5190  5602    70      G5190   5190      2939483018.   872897. G56025190 #>  9 1125  2082    34      G1125   1125      1946034315.   199097. G20821125 #> 10 1120  1122    07      G1120   1120      4715670489.   922769. G11221120 #> # ℹ 61 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>  # To combine spatial data with tabular data without losing the attributes # included in the tabular data, use an ipums shape join: ipums_shape_full_join(data_ex1, sf_data, by = \"GISJOIN\") #> Simple feature collection with 71 features and 32 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 33 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G3280    1990 CT     41    NA             3282 Hartford… 3280  NA      NA     #>  2 G5760    1990 CT     70    NA             5602 Norwalk,… 5760  NA      NA     #>  3 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #>  4 G1920    1990 TX     31    NA             1922 Dallas, … 1920  NA      NA     #>  5 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  6 G1640    1990 ##     21    NA             1642 Cincinna… 1640  NA      NA     #>  7 G2960    1990 IN     14    NA             1602 Gary--Ha… 2960  NA      NA     #>  8 G5190    1990 NJ     70    NA             5602 Monmouth… 5190  NA      NA     #>  9 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #> 10 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #> # ℹ 61 more rows #> # ℹ 23 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>, PMSASHAPE <chr>, MSACMSA <chr>, ALTCMSA <chr>, #> #   GISJOIN2 <chr>, SHAPE_AREA <dbl>, SHAPE_LEN <dbl>, GISJOIN3 <chr>, #> #   geometry <MULTIPOLYGON [m]>  shape_ex2 <- ipums_example(\"nhgis0712_shape_small.zip\")  # Shapefiles are provided in .zip archives that may contain multiple # files. Select a single file with `file_select`: read_ipums_sf(shape_ex2, file_select = matches(\"us_pmsa_1990\")) #> Simple feature collection with 71 features and 8 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1476544 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 71 × 9 #>    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2   SHAPE_AREA SHAPE_LEN GISJOIN3  #>    <chr> <chr>   <chr>   <chr>   <chr>           <dbl>     <dbl> <chr>     #>  1 3280  3282    41      G3280   3280      2840869482.   320921. G32823280 #>  2 5760  5602    70      G5760   5760       237428573.   126226. G56025760 #>  3 1145  3362    42      G1145   1145      3730749183.   489789. G33621145 #>  4 1920  1922    31      G1920   1920     12068105590.   543164. G19221920 #>  5 0080  1692    28      G0080   0080      2401347006.   218892. G16920080 #>  6 1640  1642    21      G1640   1640      5608404797.   415671. G16421640 #>  7 2960  1602    14      G2960   2960      2387760183.   241350. G16022960 #>  8 5190  5602    70      G5190   5190      2939483018.   872897. G56025190 #>  9 1125  2082    34      G1125   1125      1946034315.   199097. G20821125 #> 10 1120  1122    07      G1120   1120      4715670489.   922769. G11221120 #> # ℹ 61 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>  # Or row-bind files with `bind_multiple`. This may be useful for files of # the same geographic level that cover different extents) read_ipums_sf(   shape_ex2,   file_select = matches(\"us_pmsa\"),   bind_multiple = TRUE ) #> Simple feature collection with 144 features and 9 fields #> Geometry type: MULTIPOLYGON #> Dimension:     XY #> Bounding box:  xmin: -2336182 ymin: -1247086 xmax: 2075339 ymax: 1493412 #> Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic #> # A tibble: 144 × 10 #>    layer    PMSA  MSACMSA ALTCMSA GISJOIN GISJOIN2 SHAPE_AREA SHAPE_LEN GISJOIN3 #>    <chr>    <chr> <chr>   <chr>   <chr>   <chr>         <dbl>     <dbl> <chr>    #>  1 US_pmsa… 3280  3282    41      G3280   3280        2.84e 9   320921. G328232… #>  2 US_pmsa… 5760  5602    70      G5760   5760        2.37e 8   126226. G560257… #>  3 US_pmsa… 1145  3362    42      G1145   1145        3.73e 9   489789. G336211… #>  4 US_pmsa… 1920  1922    31      G1920   1920        1.21e10   543164. G192219… #>  5 US_pmsa… 0080  1692    28      G0080   0080        2.40e 9   218892. G169200… #>  6 US_pmsa… 1640  1642    21      G1640   1640        5.61e 9   415671. G164216… #>  7 US_pmsa… 2960  1602    14      G2960   2960        2.39e 9   241350. G160229… #>  8 US_pmsa… 5190  5602    70      G5190   5190        2.94e 9   872897. G560251… #>  9 US_pmsa… 1125  2082    34      G1125   1125        1.95e 9   199097. G208211… #> 10 US_pmsa… 1120  1122    07      G1120   1120        4.72e 9   922769. G112211… #> # ℹ 134 more rows #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":null,"dir":"Reference","previous_headings":"","what":"Read tabular data from an NHGIS extract — read_nhgis","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"Read csv fixed-width (.dat) file downloaded NHGIS extract system. read spatial data NHGIS extract, use read_ipums_sf().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"","code":"read_nhgis(   data_file,   file_select = NULL,   vars = NULL,   col_types = NULL,   n_max = Inf,   guess_max = min(n_max, 1000),   do_file = NULL,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\"),   remove_extra_header = TRUE,   verbose = TRUE,   data_layer = deprecated() )"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"data_file Path .zip archive containing NHGIS extract single file NHGIS extract. file_select data_file .zip archive contains multiple files, expression identifying file load. Accepts character vector specifying file name, tidyselect selection, index position. must uniquely identify file. vars Names variables include output. Accepts vector names tidyselect selection. NULL, includes variables file. col_types One NULL, cols() specification string. NULL, column types inferred values first guess_max rows column. Alternatively, can use compact string representation specify column types: c = character = integer n = number d = double l = logical f = factor D = date T = date time t = time ? = guess _ - = skip See read_delim() details. n_max Maximum number lines read. guess_max .csv files, maximum number lines use guessing column types. never use number lines read. do_file fixed-width files, path .file associated provided data_file. .file contains parsing instructions data file. default, looks path data_file .file name. See Details section . var_attrs Variable attributes add codebook (.txt) file included extract. Defaults available attributes. See set_ipums_var_attributes() details. remove_extra_header TRUE, remove additional descriptive header row included NHGIS .csv files. header row usually needed contains similar information included \"label\" attribute data column (var_attrs includes \"var_label\"). verbose Logical controlling whether display output loading data. TRUE, displays IPUMS conditions, progress bar, column types. Otherwise, suppressed. overridden readr.show_progress readr.show_col_types options, set. data_layer Please use file_select instead.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"tibble containing data found data_file","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Read tabular data from an NHGIS extract — read_nhgis","text":".file included downloading NHGIS fixed-width extract contains necessary metadata (e.g. column positions implicit decimals) correctly parse data file. read_nhgis() uses information parse recode fixed-width data appropriately. longer access .file, consider resubmitting extract produced data. can also change desired data format produce .csv file, require additional metadata files loaded. resubmitting existing extract via IPUMS API, see vignette(\"ipums-api\", package = \"ipumsr\").","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read tabular data from an NHGIS extract — read_nhgis","text":"","code":"# Example files csv_file <- ipums_example(\"nhgis0972_csv.zip\") fw_file <- ipums_example(\"nhgis0730_fixed.zip\")  # Provide the .zip archive directly to load the data inside: read_nhgis(csv_file) #> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details. #> Rows: 71 Columns: 25 #> ── Column specification ──────────────────────────────────────────────────────── #> Delimiter: \",\" #> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F... #> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003... #> lgl  (3): DIVISIONA, REGIONA, STATEA #>  #> ℹ Use `spec()` to retrieve the full column specification for this data. #> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>         <dbl> <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA             1692 Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA             4472 Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA             2162 Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA             1602 Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA             6282 Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA             5602 Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA             1122 Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA             2082 Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA             3362 Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA             5602 Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>  # For extracts that contain multiple files, use `file_select` to specify # a single file to load. This accepts a tidyselect expression: read_nhgis(fw_file, file_select = matches(\"ds239\"), verbose = FALSE) #> # A tibble: 1 × 114 #>   YEAR  STUSAB NATION NATIONA AIHHTLI MEMI  PCI   GEOID NAME_E AJWBE001 AJWBE002 #>   <chr> <chr>  <chr>  <chr>   <chr>   <chr> <chr> <chr> <chr>     <dbl>    <dbl> #> 1 2014… US     Unite… 1       NA      NA    NA    0100… Unite…   3.23e8   1.59e8 #> # ℹ 103 more variables: AJWBE003 <dbl>, AJWBE004 <dbl>, AJWBE005 <dbl>, #> #   AJWBE006 <dbl>, AJWBE007 <dbl>, AJWBE008 <dbl>, AJWBE009 <dbl>, #> #   AJWBE010 <dbl>, AJWBE011 <dbl>, AJWBE012 <dbl>, AJWBE013 <dbl>, #> #   AJWBE014 <dbl>, AJWBE015 <dbl>, AJWBE016 <dbl>, AJWBE017 <dbl>, #> #   AJWBE018 <dbl>, AJWBE019 <dbl>, AJWBE020 <dbl>, AJWBE021 <dbl>, #> #   AJWBE022 <dbl>, AJWBE023 <dbl>, AJWBE024 <dbl>, AJWBE025 <dbl>, #> #   AJWBE026 <dbl>, AJWBE027 <dbl>, AJWBE028 <dbl>, AJWBE029 <dbl>, …  # Or an index position: read_nhgis(fw_file, file_select = 2, verbose = FALSE) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>  # For CSV files, column types are inferred from the data. You can # manually specify column types with `col_types`. This may be useful for # geographic codes, which should typically be interpreted as character values read_nhgis(csv_file, col_types = list(MSA_CMSAA = \"c\"), verbose = FALSE) #> # A tibble: 71 × 25 #>    GISJOIN  YEAR STUSAB CMSA  DIVISIONA MSA_CMSAA PMSA      PMSAA REGIONA STATEA #>    <chr>   <dbl> <chr>  <chr> <lgl>     <chr>     <chr>     <chr> <lgl>   <lgl>  #>  1 G0080    1990 OH     28    NA        1692      Akron, O… 0080  NA      NA     #>  2 G0360    1990 CA     49    NA        4472      Anaheim-… 0360  NA      NA     #>  3 G0440    1990 MI     35    NA        2162      Ann Arbo… 0440  NA      NA     #>  4 G0620    1990 IL     14    NA        1602      Aurora--… 0620  NA      NA     #>  5 G0845    1990 PA     78    NA        6282      Beaver C… 0845  NA      NA     #>  6 G0875    1990 NJ     70    NA        5602      Bergen--… 0875  NA      NA     #>  7 G1120    1990 MA     07    NA        1122      Boston, … 1120  NA      NA     #>  8 G1125    1990 CO     34    NA        2082      Boulder-… 1125  NA      NA     #>  9 G1145    1990 TX     42    NA        3362      Brazoria… 1145  NA      NA     #> 10 G1160    1990 CT     70    NA        5602      Bridgepo… 1160  NA      NA     #> # ℹ 61 more rows #> # ℹ 15 more variables: AREALAND <chr>, AREAWAT <chr>, ANPSADPI <chr>, #> #   FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> #   D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> #   D6Z007 <dbl>, D6Z008 <dbl>  # Fixed-width files are parsed with the correct column positions # and column types automatically: read_nhgis(fw_file, file_select = contains(\"ts\"), verbose = FALSE) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>  # You can also read in a subset of the data file: read_nhgis(   csv_file,   n_max = 15,   vars = c(GISJOIN, YEAR, D6Z002),   verbose = FALSE ) #> # A tibble: 15 × 3 #>    GISJOIN  YEAR D6Z002 #>    <chr>   <dbl>  <dbl> #>  1 G0080    1990  11593 #>  2 G0360    1990  95737 #>  3 G0440    1990   8988 #>  4 G0620    1990   8982 #>  5 G0845    1990   1814 #>  6 G0875    1990  20476 #>  7 G1120    1990  58143 #>  8 G1125    1990   9467 #>  9 G1145    1990   6774 #> 10 G1160    1990   9710 #> 11 G1170    1990   3209 #> 12 G1200    1990   3551 #> 13 G1280    1990  12072 #> 14 G1600    1990 111582 #> 15 G1640    1990  37225"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":null,"dir":"Reference","previous_headings":"","what":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"Read variable metadata contained .txt codebook file included NHGIS extracts ipums_ddi object. NHGIS variable metadata adhere standards microdata DDI files, ipums_ddi fields populated. function marked experimental determine whether may robust way standardize codebook DDI reading across IPUMS collections.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"","code":"read_nhgis_codebook(cb_file, file_select = NULL, raw = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"cb_file Path .zip archive containing NHGIS extract NHGIS codebook (.txt) file. file_select cb_file .zip archive directory contains multiple codebook files, expression identifying file read. Accepts character string specifying file name, tidyselect selection, index position file. Ignored cb_file path single codebook file. raw TRUE, return character vector containing lines cb_file rather ipums_ddi object. Defaults FALSE.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"raw = FALSE, ipums_ddi object information variables contained data extract associated given cb_file. raw = TRUE, character vector one element line given cb_file.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/read_nhgis_codebook.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Read metadata from an NHGIS codebook (.txt) file — read_nhgis_codebook","text":"","code":"# Example file nhgis_file <- ipums_example(\"nhgis0972_csv.zip\")  # Read codebook as an `ipums_ddi` object: codebook <- read_nhgis_codebook(nhgis_file)  # Variable-level metadata about the contents of the data file: ipums_var_info(codebook) #> # A tibble: 25 × 10 #>    var_name  var_label      var_desc val_labels code_instr start end   imp_decim #>    <chr>     <chr>          <chr>    <list>     <chr>      <lgl> <lgl>     <dbl> #>  1 GISJOIN   GIS Join Matc… \"\"       <tibble>   \"\"         NA    NA            0 #>  2 YEAR      Data File Year \"\"       <tibble>   \"\"         NA    NA            0 #>  3 STUSAB    State/US Abbr… \"\"       <tibble>   \"\"         NA    NA            0 #>  4 CMSA      Consolidated … \"\"       <tibble>   \"\"         NA    NA            0 #>  5 DIVISIONA Division Code  \"\"       <tibble>   \"\"         NA    NA            0 #>  6 MSA_CMSAA Metropolitan … \"\"       <tibble>   \"\"         NA    NA            0 #>  7 PMSA      Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  8 PMSAA     Primary Metro… \"\"       <tibble>   \"\"         NA    NA            0 #>  9 REGIONA   Region Code    \"\"       <tibble>   \"\"         NA    NA            0 #> 10 STATEA    State Code     \"\"       <tibble>   \"\"         NA    NA            0 #> # ℹ 15 more rows #> # ℹ 2 more variables: var_type <chr>, rectypes <lgl>  ipums_var_label(codebook, \"PMSA\") #> [1] \"Primary Metropolitan Statistical Area Name\"  # If variable metadata have been lost from a data source, reattach from # the corresponding `ipums_ddi` object: nhgis_data <- read_nhgis(nhgis_file, verbose = FALSE)  nhgis_data <- zap_ipums_attributes(nhgis_data) ipums_var_label(nhgis_data$PMSA) #> [1] NA  nhgis_data <- set_ipums_var_attributes(nhgis_data, codebook$var_info) ipums_var_label(nhgis_data$PMSA) #> [1] \"Primary Metropolitan Statistical Area Name\"  # You can also load the codebook in raw format to display in the console codebook_raw <- read_nhgis_codebook(nhgis_file, raw = TRUE)  # Use `cat` for human-readable output cat(codebook_raw[1:20], sep = \"\\n\") #> -------------------------------------------------------------------------------- #> Codebook for NHGIS data file 'nhgis0972_ds135_1990_pmsa' #> -------------------------------------------------------------------------------- #>   #> Contents #>     - Data Summary #>     - Data Dictionary #>     - Citation and Use #>   #> Additional documentation on NHGIS data sources is available at:  #>     https://www.nhgis.org/documentation/tabular-data  #>   #> -------------------------------------------------------------------------------- #> Data Summary #> -------------------------------------------------------------------------------- #>   #> Year:             1990 #> Geographic level: Consolidated Metropolitan Statistical Area--Primary Metropolitan Statistical Area #> Dataset:          1990 Census: SSTF 9 - Housing Characteristics of New Units #>    NHGIS code:    1990_SSTF09"},{"path":"http://tech.popdata.org/ipumsr/reference/reexports.html","id":null,"dir":"Reference","previous_headings":"","what":"Objects exported from other packages — reexports","title":"Objects exported from other packages — reexports","text":"objects imported packages. Follow links see documentation. haven as_factor, .labelled, zap_labels lifecycle deprecated readr problems, spec tidyselect all_of, any_of, contains, ends_with, everything, last_col, matches, num_range, one_of, starts_with zeallot %<-%","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing IPUMS extract definition — remove_from_extract","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"Remove values specific fields existing ipums_extract object. function S3 generic whose behavior depend subclass (.e. collection) extract modified. remove IPUMS Microdata extract definition, click . includes: IPUMS USA IPUMS CPS IPUMS International remove IPUMS NHGIS extract definition, click function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values extract, see add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"","code":"remove_from_extract(extract, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"extract ipums_extract object. ... Additional arguments specifying extract fields values remove extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"object class extract containing modified extract definition","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing IPUMS extract definition — remove_from_extract","text":"","code":"# Microdata extracts usa_extract <- define_extract_usa(   description = \"USA example\",   samples = c(\"us2013a\", \"us2014a\"),   variables = list(     var_spec(\"AGE\"),     var_spec(\"SEX\", case_selections = \"2\"),     var_spec(\"YEAR\")   ) )  # Remove variables from an extract definition remove_from_extract(   usa_extract,   samples = \"us2014a\",   variables = c(\"AGE\", \"SEX\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (1 total) us2013a #> Variables: (1 total) YEAR  # Remove detailed specifications for an existing variable remove_from_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"2\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) AGE, SEX, YEAR  # NHGIS extracts nhgis_extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"county\"   ),   time_series_tables = tst_spec(\"A00\", geog_levels = \"county\") )  # Remove an existing dataset or time series table remove_from_extract(nhgis_extract, datasets = \"1990_STF1\") #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Time Series Table: A00 #>   Geog Levels: county  # Remove detailed specifications from an existing dataset or # time series table remove_from_extract(   nhgis_extract,   datasets = ds_spec(\"1990_STF1\", data_tables = \"NP1\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP2, NP3 #>   Geog Levels: county #>  #> Time Series Table: A00 #>   Geog Levels: county"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"Remove existing values IPUMS microdata extract definition. fields optional, omitted, unchanged. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_*() functions. complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values IPUMS microdata extract definition, see add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"","code":"# S3 method for micro_extract remove_from_extract(extract, samples = NULL, variables = NULL, ...)"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"extract ipums_extract object. samples Character vector sample names remove extract definition. variables Names variables remove extract definition. variable-specific fields indicated variables also removed. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"modified micro_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state. retain variable modifying particular specifications, first remove variable, add new specification using add_to_extract().","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.micro_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing extract definition for an IPUMS microdata\nproject — remove_from_extract.micro_extract","text":"","code":"usa_extract <- define_extract_usa(   description = \"USA example\",   samples = c(\"us2013a\", \"us2014a\"),   variables = list(     var_spec(\"AGE\", data_quality_flags = TRUE),     var_spec(\"SEX\", case_selections = \"1\"),     \"RACE\"   ) )  # Providing names of samples or variables will remove them and # all of their associated specifications from the extract: remove_from_extract(   usa_extract,   samples = \"us2014a\",   variables = c(\"AGE\", \"RACE\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (1 total) us2013a #> Variables: (1 total) SEX  # To remove detailed specifications from a variable, indicate the # specifications to remove within `var_spec()`. The # named variable will be retained in the extract, but modified by # removing the indicated specifications. remove_from_extract(   usa_extract,   variables = var_spec(\"SEX\", case_selections = \"1\") ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (3 total) AGE, SEX, RACE  # To make multiple modifications, use a list of `var_spec()` objects. remove_from_extract(   usa_extract,   variables = list(     var_spec(\"SEX\", case_selections = \"1\"),     var_spec(\"AGE\")   ) ) #> Unsubmitted IPUMS USA extract  #> Description: USA example #>  #> Samples: (2 total) us2013a, us2014a #> Variables: (2 total) SEX, RACE"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"Remove existing values IPUMS NHGIS extract definition. fields optional, omitted, unchanged. function marked experimental typically best option maintaining reproducible extract definitions may retired future. reproducibility, users strive build extract definitions define_extract_nhgis(). complicated extract definition revise, original extract definition code created , suggest save revised extract JSON file save_extract_as_json(). create stable version extract definition can used future needed. add new values IPUMS NHGIS extract definition, use add_to_extract(). Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"","code":"# S3 method for nhgis_extract remove_from_extract(   extract,   datasets = NULL,   time_series_tables = NULL,   geographic_extents = NULL,   shapefiles = NULL,   ... )"},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"extract ipums_extract object. datasets Dataset specifications remove extract definition. data_tables, geog_levels, years, breakdown_values associated specified datasets also removed. time_series_tables Names time series tables remove extract definition. geog_levels years associated  specified time_series_tables also removed. geographic_extents Geographic extents remove extract definition. shapefiles Shapefiles remove extract definition. ... Ignored","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"modified nhgis_extract object","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"extract fields rendered irrelevant modifying extract automatically removed. (instance, time_series_tables removed extract, tst_layout also removed.) Thus, necessary explicitly remove values. supplied extract definition comes previously submitted extract request, function reset definition unsubmitted state.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/remove_from_extract.nhgis_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove values from an existing NHGIS extract definition — remove_from_extract.nhgis_extract","text":"","code":"extract <- define_extract_nhgis(   datasets = ds_spec(     \"1990_STF1\",     data_tables = c(\"NP1\", \"NP2\", \"NP3\"),     geog_levels = \"county\"   ),   time_series_tables = list(     tst_spec(\"CW3\", c(\"state\", \"county\")),     tst_spec(\"CW5\", c(\"state\", \"county\"))   ) )  # Providing names of datasets or time series tables will remove them and # all of their associated specifications from the extract: remove_from_extract(   extract,   time_series_tables = c(\"CW3\", \"CW5\") ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP3 #>   Geog Levels: county  # To remove detailed specifications from a dataset or time series table, # use `ds_spec()` or `tst_spec()`. The named dataset or time series table # will be retained in the extract, but modified by removing the indicated # specifications: remove_from_extract(   extract,   datasets = ds_spec(\"1990_STF1\", data_tables = c(\"NP2\", \"NP3\")) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state, county #>  #> Time Series Table: CW5 #>   Geog Levels: state, county  # To make multiple modifications, use a list of `ds_spec()` or `tst_spec()` # objects: remove_from_extract(   extract,   time_series_tables = list(     tst_spec(\"CW3\", geog_levels = \"county\"),     tst_spec(\"CW5\", geog_levels = \"state\")   ) ) #> Unsubmitted IPUMS NHGIS extract  #> Description:  #>  #> Dataset: 1990_STF1 #>   Tables: NP1, NP2, NP3 #>   Geog Levels: county #>  #> Time Series Table: CW3 #>   Geog Levels: state #>  #> Time Series Table: CW5 #>   Geog Levels: county"},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":null,"dir":"Reference","previous_headings":"","what":"Store an extract definition in JSON format — save_extract_as_json","title":"Store an extract definition in JSON format — save_extract_as_json","text":"Read write ipums_extract object JSON file contains extract definition specifications. Use functions store copy extract definition outside R environment /share extract definition another registered IPUMS user. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Store an extract definition in JSON format — save_extract_as_json","text":"","code":"save_extract_as_json(extract, file, overwrite = FALSE)  define_extract_from_json(extract_json)"},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Store an extract definition in JSON format — save_extract_as_json","text":"extract ipums_extract object. file File path write JSON-formatted extract definition. overwrite TRUE, overwrite file already exists. Defaults FALSE. extract_json Path file containing JSON-formatted extract definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Store an extract definition in JSON format — save_extract_as_json","text":"ipums_extract object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"api-version-compatibility","dir":"Reference","previous_headings":"","what":"API Version Compatibility","title":"Store an extract definition in JSON format — save_extract_as_json","text":"v0.6.0, ipumsr supports IPUMS API version 2. stored extract definition made using version beta version 1 IPUMS API, able load using define_extract_from_json(). API version request stored saved JSON file. (\"api_version\" \"version\" field JSON file, request likely made version beta version 1.) extract definition originally made user account know corresponding extract number, use get_extract_info() obtain definition compliant IPUMS API version 2. can save definition JSON save_extract_as_json(). Otherwise, need update JSON file compliant IPUMS API version 2. general, require renaming JSON fields written snake_case camelCase. instance, \"data_tables\" become \"dataTables\", \"data_format\" become \"dataFormat\", . also need change \"api_version\" field \"version\" set equal 2. unable create valid extract modifying file, may recreate definition manually using appropriate define_extract_*() function. See IPUMS developer documentation details API versioning breaking changes introduced version 2.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/save_extract_as_json.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Store an extract definition in JSON format — save_extract_as_json","text":"","code":"my_extract <- define_extract_usa(   description = \"2013-2014 ACS Data\",   samples = c(\"us2013a\", \"us2014a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  extract_json_path <- file.path(tempdir(), \"usa_extract.json\") save_extract_as_json(my_extract, file = extract_json_path)  copy_of_my_extract <- define_extract_from_json(extract_json_path)  identical(my_extract, copy_of_my_extract) #> [1] TRUE  file.remove(extract_json_path) #> [1] TRUE"},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":null,"dir":"Reference","previous_headings":"","what":"tidyselect selection language in ipumsr — selection_language","title":"tidyselect selection language in ipumsr — selection_language","text":"Slightly modified implementation tidyselect selection language ipumsr.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"syntax","dir":"Reference","previous_headings":"","what":"Syntax","title":"tidyselect selection language in ipumsr — selection_language","text":"general, selection language ipumsr operates tidyselect. applicable, variables can selected : character vector variable names (c(\"var1\", \"var2\")) bare vector variable names (c(var1, var2)) selection helper tidyselect (starts_with(\"var\")). See list helpers.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"primary-differences","dir":"Reference","previous_headings":"","what":"Primary differences","title":"tidyselect selection language in ipumsr — selection_language","text":"tidyselect selection generally intended use column variables data.frame-like objects. contrast, ipumsr allows selection language syntax cases well (instance, selecting files within .zip archive). ipumsr functions indicate whether support selection language. Selection () consistently supported.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"selection-helpers-from-tidyselect-","dir":"Reference","previous_headings":"","what":"Selection helpers (from tidyselect)","title":"tidyselect selection language in ipumsr — selection_language","text":"var1:var10: variables lying var1 left var10 right. starts_with(\"\"): names start \"\" ends_with(\"z\"): names end \"z\" contains(\"b\"): names contain \"b\" matches(\"x.y\"): names match regular expression x.y num_range(x, 1:4): names following pattern x1, x2, ..., x4 all_of(vars)/any_of(vars): matches names stored character vector vars. all_of(vars) error variables present; any_of(vars) match just variables exist. everything(): variables last_col(): furthest column right Operators combining selections: !selection: variables match selection selection1 & selection2: variables included selection1 selection2 selection1 | selection2: variables match either selection1 selection2","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/selection_language.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"tidyselect selection language in ipumsr — selection_language","text":"","code":"cps_file <- ipums_example(\"cps_00157.xml\")  # Load 3 variables by name read_ipums_micro(   cps_file,   vars = c(\"YEAR\", \"MONTH\", \"PERNUM\"),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     PERNUM #>    <dbl> <int+lbl>  <dbl> #>  1  1962 3 [March]      1 #>  2  1962 3 [March]      2 #>  3  1962 3 [March]      3 #>  4  1962 3 [March]      1 #>  5  1962 3 [March]      1 #>  6  1962 3 [March]      1 #>  7  1962 3 [March]      1 #>  8  1962 3 [March]      2 #>  9  1962 3 [March]      3 #> 10  1962 3 [March]      4 #> # ℹ 7,658 more rows  # \"Bare\" variables are supported read_ipums_micro(   cps_file,   vars = c(YEAR, MONTH, PERNUM),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     PERNUM #>    <dbl> <int+lbl>  <dbl> #>  1  1962 3 [March]      1 #>  2  1962 3 [March]      2 #>  3  1962 3 [March]      3 #>  4  1962 3 [March]      1 #>  5  1962 3 [March]      1 #>  6  1962 3 [March]      1 #>  7  1962 3 [March]      1 #>  8  1962 3 [March]      2 #>  9  1962 3 [March]      3 #> 10  1962 3 [March]      4 #> # ℹ 7,658 more rows  # Standard tidyselect selectors are also supported read_ipums_micro(cps_file, vars = starts_with(\"ASEC\"), verbose = FALSE) #> # A tibble: 7,668 × 2 #>    ASECWTH ASECWT #>      <dbl>  <dbl> #>  1   1476.  1476. #>  2   1476.  1471. #>  3   1476.  1579. #>  4   1598.  1598. #>  5   1707.  1707. #>  6   1790.  1790. #>  7   4355.  4355. #>  8   4355.  1386. #>  9   4355.  1629. #> 10   4355.  1432. #> # ℹ 7,658 more rows  # Selection methods can be combined read_ipums_micro(   cps_file,   vars = c(YEAR, MONTH, contains(\"INC\")),   verbose = FALSE ) #> # A tibble: 7,668 × 3 #>     YEAR MONTH     INCTOT                                #>    <dbl> <int+lbl> <dbl+lbl>                             #>  1  1962 3 [March]      4883                             #>  2  1962 3 [March]      5800                             #>  3  1962 3 [March] 999999998 [Missing. (1962-1964 only)] #>  4  1962 3 [March]     14015                             #>  5  1962 3 [March]     16552                             #>  6  1962 3 [March]      6375                             #>  7  1962 3 [March] 999999999 [N.I.U.]                    #>  8  1962 3 [March]         0                             #>  9  1962 3 [March]       600                             #> 10  1962 3 [March] 999999999 [N.I.U.]                    #> # ℹ 7,658 more rows  read_ipums_micro(   cps_file,   vars = starts_with(\"S\") & ends_with(\"P\"),   verbose = FALSE ) #> # A tibble: 7,668 × 1 #>    STATEFIP       #>    <int+lbl>      #>  1 55 [Wisconsin] #>  2 55 [Wisconsin] #>  3 55 [Wisconsin] #>  4 27 [Minnesota] #>  5 27 [Minnesota] #>  6 27 [Minnesota] #>  7 19 [Iowa]      #>  8 19 [Iowa]      #>  9 19 [Iowa]      #> 10 19 [Iowa]      #> # ℹ 7,658 more rows  # Other selection arguments also support this syntax. # For instance, load a particular file based on a tidyselect match: read_nhgis(   ipums_example(\"nhgis0731_csv.zip\"),   file_select = contains(\"nominal_state\"),   verbose = FALSE ) #> # A tibble: 84 × 28 #>    GISJOIN STATE         STATEFP STATENH A00AA1790 A00AA1800 A00AA1810 A00AA1820 #>    <chr>   <chr>         <chr>   <chr>       <dbl>     <dbl>     <dbl>     <dbl> #>  1 G010    Alabama       01      010            NA        NA        NA    127901 #>  2 G020    Alaska        02      020            NA        NA        NA        NA #>  3 G025    Alaska Terri… NA      025            NA        NA        NA        NA #>  4 G040    Arizona       04      040            NA        NA        NA        NA #>  5 G045    Arizona Terr… NA      045            NA        NA        NA        NA #>  6 G050    Arkansas      05      050            NA        NA        NA        NA #>  7 G055    Arkansas Ter… NA      055            NA        NA        NA     14273 #>  8 G060    California    06      060            NA        NA        NA        NA #>  9 G080    Colorado      08      080            NA        NA        NA        NA #> 10 G085    Colorado Ter… NA      085            NA        NA        NA        NA #> # ℹ 74 more rows #> # ℹ 20 more variables: A00AA1830 <dbl>, A00AA1840 <dbl>, A00AA1850 <dbl>, #> #   A00AA1860 <dbl>, A00AA1870 <dbl>, A00AA1880 <dbl>, A00AA1890 <dbl>, #> #   A00AA1900 <dbl>, A00AA1910 <dbl>, A00AA1920 <dbl>, A00AA1930 <dbl>, #> #   A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> #   A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> #   A00AA2020 <dbl>"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":null,"dir":"Reference","previous_headings":"","what":"Set your IPUMS API key — set_ipums_api_key","title":"Set your IPUMS API key — set_ipums_api_key","text":"Set IPUMS API key value associated IPUMS_API_KEY environment variable. key can stored duration session future sessions. saved future sessions, added .Renviron file home directory. choose save key .Renviron, function create backup copy file modifying. function modeled census_api_key() function tidycensus. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set your IPUMS API key — set_ipums_api_key","text":"","code":"set_ipums_api_key(api_key, save = overwrite, overwrite = FALSE, unset = FALSE)"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set your IPUMS API key — set_ipums_api_key","text":"api_key API key associated user account. save TRUE, save key use future sessions adding .Renviron file home directory. Defaults FALSE, unless overwrite = TRUE. overwrite TRUE, overwrite existing value IPUMS_API_KEY .Renviron file provided api_key. Defaults FALSE. unset TRUE, remove existing value IPUMS_API_KEY environment .Renviron file home directory.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_api_key.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set your IPUMS API key — set_ipums_api_key","text":"value api_key, invisibly.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":null,"dir":"Reference","previous_headings":"","what":"Set your default IPUMS collection — set_ipums_default_collection","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"Set default IPUMS collection value associated IPUMS_DEFAULT_COLLECTION environment variable. environment variable exists, IPUMS API functions require collection specification use value IPUMS_DEFAULT_COLLECTION, unless another collection indicated. default collection can stored duration session future sessions. saved future sessions, added .Renviron file home directory. choose save key .Renviron, function create backup copy file modifying. function modeled census_api_key() function tidycensus. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"","code":"set_ipums_default_collection(   collection = NULL,   save = overwrite,   overwrite = FALSE,   unset = FALSE )"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"collection Character string collection set default collection. collection must currently supported IPUMS API. list codes used refer collection, see ipums_data_collections(). save TRUE, save default collection use future sessions adding .Renviron file home directory. Defaults FALSE, unless overwrite = TRUE. overwrite TRUE, overwrite existing value IPUMS_DEFAULT_COLLECTION .Renviron file provided collection. Defaults FALSE. unset TRUE, remove existing value IPUMS_DEFAULT_COLLECTION environment .Renviron file home directory.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"value collection, invisibly.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_default_collection.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Set your default IPUMS collection — set_ipums_default_collection","text":"","code":"set_ipums_default_collection(\"nhgis\") #> The environment variable IPUMS_DEFAULT_COLLECTION has been set. To save it for future sessions, set `save = TRUE`.  if (FALSE) { # Extract info will now be retrieved for the default collection: get_last_extract_info() get_extract_history()  is_extract_ready(1) get_extract_info(1)  # Equivalent to: get_extract_info(\"nhgis:1\") get_extract_info(c(\"nhgis\", 1))  # Other collections can be specified explicitly # Doing so does not alter the default collection is_extract_ready(\"usa:2\") }  # Remove the variable from the environment and .Renviron, if saved set_ipums_default_collection(unset = TRUE) #> Unsetting environment variable IPUMS_DEFAULT_COLLECTION."},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":null,"dir":"Reference","previous_headings":"","what":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"Add variable attributes ipums_ddi object data frame. provide contextual information variables values contained data columns. ipumsr data-reading functions automatically add attributes. However, data processing operations may remove attributes, may wish store data external database support attributes. cases, use function manually attach information.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"","code":"set_ipums_var_attributes(   data,   var_info,   var_attrs = c(\"val_labels\", \"var_label\", \"var_desc\") )"},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"data tibble data frame var_info ipums_ddi object data frame containing variable information. Variable information can obtained calling ipums_var_info() ipums_ddi object. var_attrs Variable attributes DDI add columns output data. Defaults available attributes.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"data, variable attributes attached","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"Attribute val_labels adds haven_labelled class corresponding value labels applicable variables. haven_labelled class, see vignette(\"semantics\", package = \"haven\"). Attribute var_label adds short summary variable's contents \"label\" attribute. label viewable RStudio Viewer. Attribute var_desc adds longer description variable's contents \"var_desc\" attribute, available. Variable information attached data column name. column names data match found var_info, attributes added.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/set_ipums_var_attributes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add IPUMS variable attributes to a data frame — set_ipums_var_attributes","text":"","code":"ddi_file <- ipums_example(\"cps_00157.xml\")  # Load metadata into `ipums_ddi` object ddi <- read_ipums_ddi(ddi_file)  # Load data cps <- read_ipums_micro(ddi) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  # Data includes variable metadata: ipums_var_desc(cps$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\"  # Some operations remove attributes, even if they do not alter the data: cps$INCTOT <- ifelse(TRUE, cps$INCTOT, NA) ipums_var_desc(cps$INCTOT) #> [1] NA  # We can reattach metadata from the separate `ipums_ddi` object: cps <- set_ipums_var_attributes(cps, ddi) ipums_var_desc(cps$INCTOT) #> [1] \"INCTOT indicates each respondent's total pre-tax personal income or losses from all sources for the previous calendar year.  Amounts are expressed as they were reported to the interviewer; users must adjust for inflation using Consumer Price Index adjustment factors.\""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Submit an extract request via the IPUMS API — submit_extract","title":"Submit an extract request via the IPUMS API — submit_extract","text":"Submit extract request via IPUMS API return ipums_extract object containing extract definition newly-assigned extract request number. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Submit an extract request via the IPUMS API — submit_extract","text":"","code":"submit_extract(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Submit an extract request via the IPUMS API — submit_extract","text":"extract ipums_extract object. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Submit an extract request via the IPUMS API — submit_extract","text":"ipums_extract object containing extract definition newly-assigned extract number submitted extract. Note unspecified extract fields may populated default values therefore change slightly upon submission.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/submit_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Submit an extract request via the IPUMS API — submit_extract","text":"","code":"my_extract <- define_extract_cps(   description = \"2018-2019 CPS Data\",   samples = c(\"cps2018_05s\", \"cps2019_05s\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { # Store your submitted extract request to obtain the extract number submitted_extract <- submit_extract(my_extract)  submitted_extract$number  # This is useful for checking the extract request status get_extract_info(submitted_extract)  # You can always get the latest status, even if you forget to store the # submitted extract request object submitted_extract <- get_last_extract_info(\"cps\")  # You can also check if submitted extract is ready is_extract_ready(submitted_extract)  # Or have R check periodically and download when ready downloadable_extract <- wait_for_extract(submitted_extract) }"},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":null,"dir":"Reference","previous_headings":"","what":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"Provide specifications individual variables defining IPUMS microdata extract request. Currently, additional specifications available IPUMS samples. Learn microdata extract definitions vignette(\"ipums-api-micro\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"","code":"var_spec(   name,   case_selections = NULL,   case_selection_type = NULL,   attached_characteristics = NULL,   data_quality_flags = NULL,   preselected = NULL )  samp_spec(name)"},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"name Name sample variable. case_selections character vector values given variable used select cases. Values specified exactly appear \"CODES\" tab given variable web-based extract builder, including zero-padding (e.g. see \"CODES\" tab IPUMS CPS variable EDUC). case_selection_type One \"general\" \"detailed\" indicating whether values case_selections matched general detailed codes given variable. variables detailed codes. See IPUMS USA variable RACE example variable general detailed codes. Defaults \"general\" case_selections specified. attached_characteristics Whose characteristics attached, ? Accepted values \"mother\", \"father\", \"spouse\", \"head\", combination. Specifying attached characteristics add variables extract contain values given variable specified household members (e.g. variable \"AGE_MOM\" added \"mother\" specified variable \"AGE\"). data_quality_flags Logical indicating whether include data quality flags given variable. default, data quality flags included. preselected Logical indicating whether variable preselected. needed external use.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"var_spec samp_spec object.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/var_spec.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Create variable and sample specifications for IPUMS microdata extract\nrequests — var_spec","text":"","code":"var1 <- var_spec(   \"SCHOOL\",   case_selections = c(\"1\", \"2\"),   data_quality_flags = TRUE )  var2 <- var_spec(   \"RACE\",   case_selections = c(\"140\", \"150\"),   case_selection_type = \"detailed\",   attached_characteristics = c(\"mother\", \"spouse\") )  # Use variable specifications in a microdata extract definition: extract <- define_extract_usa(   description = \"Example extract\",   samples = \"us2017b\",   variables = list(var1, var2) )  extract$variables$SCHOOL #> $name #> [1] \"SCHOOL\" #>  #> $case_selections #> [1] \"1\" \"2\" #>  #> $data_quality_flags #> [1] TRUE #>  #> $case_selection_type #> [1] \"general\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\"        extract$variables$RACE #> $name #> [1] \"RACE\" #>  #> $case_selections #> [1] \"140\" \"150\" #>  #> $attached_characteristics #> [1] \"mother\" \"spouse\" #>  #> $case_selection_type #> [1] \"detailed\" #>  #> attr(,\"class\") #> [1] \"var_spec\"   \"ipums_spec\" \"list\""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":null,"dir":"Reference","previous_headings":"","what":"Wait for an extract request to finish processing — wait_for_extract","title":"Wait for an extract request to finish processing — wait_for_extract","text":"Wait extract request finish periodically checking status via IPUMS API complete. is_extract_ready() convenience function check extract ready download without committing R session waiting extract completion. Learn IPUMS API vignette(\"ipums-api\").","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Wait for an extract request to finish processing — wait_for_extract","text":"","code":"wait_for_extract(   extract,   initial_delay_seconds = 0,   max_delay_seconds = 300,   timeout_seconds = 10800,   verbose = TRUE,   api_key = Sys.getenv(\"IPUMS_API_KEY\") )  is_extract_ready(extract, api_key = Sys.getenv(\"IPUMS_API_KEY\"))"},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Wait for an extract request to finish processing — wait_for_extract","text":"extract One : ipums_extract object data collection extract number formatted string form \"collection:number\" vector form c(\"collection\", number) extract number associated default IPUMS collection. See set_ipums_default_collection() list codes used refer collection, see ipums_data_collections(). initial_delay_seconds Seconds wait first status check. wait time automatically increase 10 seconds successive check. max_delay_seconds Maximum interval wait status checks. wait interval reaches value, checks continue occur max_delay_seconds intervals extract complete timeout_seconds reached. Defaults 300 seconds (5 minutes). timeout_seconds Maximum total number seconds continue waiting extract throwing error. Defaults 10,800 seconds (3 hours). verbose TRUE, print status updates R console beginning wait interval upon extract completion. Defaults TRUE. api_key API key associated user account. Defaults value IPUMS_API_KEY environment variable. See set_ipums_api_key().","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Wait for an extract request to finish processing — wait_for_extract","text":"wait_for_extract(), ipums_extract object containing extract definition URLs download extract files. is_extract_ready(), logical value indicating whether extract ready download.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Wait for an extract request to finish processing — wait_for_extract","text":"status submitted extract one \"queued\", \"started\", \"produced\", \"canceled\", \"failed\", \"completed\". ready download, extract must \"completed\" status. However, requests \"completed\" may still unavailable download, extracts expire removed IPUMS servers set period time (72 hours microdata collections, 2 weeks IPUMS NHGIS). Therefore, functions also check download_links field extract request determine data available download. extract expired (, completed download links longer available), functions warn extract request must resubmitted.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/wait_for_extract.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Wait for an extract request to finish processing — wait_for_extract","text":"","code":"my_extract <- define_extract_ipumsi(   description = \"Botswana data\",   samples = c(\"bw2001a\", \"bw2011a\"),   variables = c(\"SEX\", \"AGE\", \"YEAR\") )  if (FALSE) { submitted_extract <- submit_extract(my_extract)  # Wait for a particular extract request to complete by providing its # associated `ipums_extract` object: downloadable_extract <- wait_for_extract(submitted_extract)  # Or by specifying the collection and number for the extract request: downloadable_extract <- wait_for_extract(\"ipumsi:1\")  # If you have a default collection, you can use the extract number alone: set_ipums_default_collection(\"ipumsi\")  downloadable_extract <- wait_for_extract(1)  # Use `download_extract()` to download the completed extract: files <- download_extract(downloadable_extract)  # Use `is_extract_ready()` if you don't want to tie up your R session by # waiting for completion is_extract_ready(\"usa:1\") }"},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":null,"dir":"Reference","previous_headings":"","what":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"Remove label attributes (value labels, variable labels, variable descriptions) data frame vector.","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"","code":"zap_ipums_attributes(x)"},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"x data frame labelled vector (instance, data frame column)","code":""},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"object type x without \"val_labels\", \"var_label\", \"var_desc\" attributes.","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/reference/zap_ipums_attributes.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Remove label attributes from a data frame or labelled vector — zap_ipums_attributes","text":"","code":"cps <- read_ipums_micro(ipums_example(\"cps_00157.xml\")) #> Use of data from IPUMS CPS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.  attributes(cps$YEAR) #> $label #> [1] \"Survey year\" #>  #> $var_desc #> [1] \"YEAR reports the year in which the survey was conducted.  YEARP is repeated on person records.\" #>  attributes(zap_ipums_attributes(cps$YEAR)) #> NULL  cps <- zap_ipums_attributes(cps) attributes(cps$YEAR) #> NULL attributes(cps$INCTOT) #> NULL"},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-070","dir":"Changelog","previous_headings":"","what":"ipumsr 0.7.0","title":"ipumsr 0.7.0","text":"CRAN release: 2023-10-20","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"function-retirements-0-7-0","dir":"Changelog","previous_headings":"","what":"Function retirements","title":"ipumsr 0.7.0","text":"ipumsr longer suggests raster, rgdal, sp (#23). Removing dependencies requires retirement several previously deprecated functions: read_terra_*() functions read_ipums_sp() (use read_ipums_sf() load spatial data sf format) read_ipums_codebook() (use read_nhgis_codebook() load NHGIS codebook) ipums_list_*() helper functions (instead, use ipums_list_files()) read_nhgis_sf() read_nhgis_sp() (instead, use read_ipums_sf() read_nhgis() load spatial tabular data separately, join ipums_shape_join_*() function)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"other-updates-0-7-0","dir":"Changelog","previous_headings":"","what":"Other updates","title":"ipumsr 0.7.0","text":"Fixes bug ipums_view() content display properly viewer pane (#19) RStudio now explicitly required launch files viewer pane using ipums_view(). R console users can still generate stand-alone HTML files can viewed different browser. Updates UI fixes various bugs ipums_website() (#54): Accepted project names now consistent provided ipums_ddi objects. Shorthand project names used IPUMS API also accepted. Users can now use syntax regardless whether providing ipums_ddi object project name. project argument deprecated. MacOS now supported var argument longer required. Omitting var launch URL homepage specified IPUMS project. homepage_if_missing argument now defaults FALSE. var_label argument deprecated Updates IPUMS projects listed ipums_data_collections(). Various documentation updates","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-063","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.3","title":"ipumsr 0.6.3","text":"CRAN release: 2023-09-01 ability read IPUMS DDI file contained within zip archive using read_ipums_ddi() deprecated. Users must now load DDI files providing direct path uncompressed .xml file. resolves inconsistency behavior read_ipums_micro_*() functions provided DDI file path compared ipums_ddi object created read_ipums_ddi(). ability read IPUMS files providing path containing directory deprecated. affects: read_nhgis() read_ipums_sf() read_nhgis_codebook() read_ipums_ddi() ipums_list_files() functions now require either zip archive (exception read_ipums_ddi()—see ) direct file path input. consequence read_ipums_sf() bind_multiple = TRUE requires zip archive input multiple files read combined. Documentation updates clarity","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-062","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.2","title":"ipumsr 0.6.2","text":"CRAN release: 2023-08-22 Fixes CRAN checks.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-061","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.1","title":"ipumsr 0.6.1","text":"CRAN release: 2023-08-18 Fixes CRAN checks.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-060","dir":"Changelog","previous_headings":"","what":"ipumsr 0.6.0","title":"ipumsr 0.6.0","text":"CRAN release: 2023-07-21","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-api-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS API","title":"ipumsr 0.6.0","text":"ipumsr now supports IPUMS API version 2, longer supports either beta version version 1 IPUMS API. means extract definitions saved JSON format longer compatible ipumsr via define_extract_from_json(). load extract definitions created previous API versions, two options: Rewrite extract definition represented JSON file using define_extract_*() function relevant IPUMS collection, update saved file save_extract_to_json(). Update JSON file converting snake_case fields camelCase. instance, \"data_format\" become \"dataFormat\". \"api_version\" field also need changed \"version\" set equal 2. See IPUMS developer documentation details API versioning breaking changes introduced version 2. ipums_extract object structure updated. IPUMS microdata projects, variables samples longer stored character vectors, lists. accommodates new API version 2 features (see ). Use names(x$variables) instead x$variables access variable (sample) names character vector. get_recent_extracts_info_*() functions deprecated. Additionally, tabular-formatted extract history longer supported, conversion functions extract_tbl_to_list() extract_list_to_tbl() therefore deprecated well. Use get_extract_history() browse previous extract definitions list format.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-readers-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS Readers","title":"ipumsr 0.6.0","text":"read_nhgis_sf() read_nhgis_sp() deprecated. Use read_ipums_sf() read_nhgis() load spatial tabular data, respectively. Join data ipums_shape_*_join() function. data_layer shape_layer arguments deprecated favor file_select throughout ipumsr. provides clarity intended purpose argument. Deprecated functions use original argument names remain unchanged. Support objects sp package deprecated upcoming retirement rgdal. Use read_ipums_sf() load spatial data sf object. convert Spatial* object, use sf::as_Spatial(). , see r-spatial’s post covering evolution several spatial packages. read_ipums_sf() longer defaults bind_multiple = TRUE. Individual ipums_list_*() functions moved ipums_list_files(). read_ipums_codebook() deprecated. Use read_nhgis_codebook() read NHGIS codebook files. IPUMS Terra codebook files longer supported (see ) Example files ipums_example() updated include new file names.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-terra-0-6-0","dir":"Changelog","previous_headings":"Breaking Changes + Deprecations","what":"IPUMS Terra","title":"ipumsr 0.6.0","text":"Support IPUMS Terra discontinued. includes deprecations read_terra_*() functions, types = \"raster\" option ipums_list_files(), read_ipums_codebook(). IPUMS Terra decommissioning, click .","code":""},{"path":[]},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-api-0-6-0-1","dir":"Changelog","previous_headings":"New Features","what":"IPUMS API","title":"ipumsr 0.6.0","text":"Adds API support IPUMS NHGIS IPUMS International! Use define_extract_nhgis() create NHGIS extract definition. Use define_extract_ipumsi() create IPUMS International extract definition. Adds support IPUMS API version 2 features! includes: Detailed variable specifications IPUMS microdata extract definitions, including case selections, attached characteristics, data quality flags. Use var_spec() add specifications variables extract definition. Additional definition-wide parameters IPUMS microdata extracts, including data_quality_flags case_select_who. Hierarchical extracts IPUMS microdata extracts. Set data_structure = \"hiearchical\" create hierarchical extract definition. Year selection time series tables IPUMS NHGIS extract definitions. Use tst_spec() add year selections time series tables. Adds API support IPUMS NHGIS metadata Use get_metadata_nhgis() browse NHGIS data sources. Metadata available summary form datasets, data tables, time series tables, shapefiles well individual datasets, data tables, time series tables. Allows users set default IPUMS collection using set_ipums_default_collection(). Users default collection need specify IPUMS collection functions require ; instead, default collection used. convenience users rely primarily single IPUMS collection. wait_for_extract() wait intervals longer double status check. Instead, intervals increase 10 seconds subsequent check.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipums-readers-0-6-0-1","dir":"Changelog","previous_headings":"New Features","what":"IPUMS readers","title":"ipumsr 0.6.0","text":"Adds handling fixed-width NHGIS extracts read_nhgis(). read_nhgis_codebook() allows reading raw codebook lines (opposed extracting codebook information ipums_ddi object) setting raw = TRUE. Furthermore, var_info generated NHGIS codebook files updated include contextual information data variables. read_nhgis() now supports additional arguments refine data loading process. Users can now specify col_types manually read subset data file using vars n_max. read_nhgis() now allows users retain extra header row included NHGIS files. Set remove_extra_header = FALSE . general, information contained extra header attached data NHGIS codebook file, cases extra header may differ slightly information found codebook.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"miscellaneous-0-6-0","dir":"Changelog","previous_headings":"","what":"Miscellaneous","title":"ipumsr 0.6.0","text":"Various bug fixes Updates documentation vignettes clarity","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-052","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.2","title":"ipumsr 0.5.2","text":"CRAN release: 2022-12-09 Add progress bar downloading extracts Removed validate argument extract revision functions improved warning messages invalid extract field names used arguments. Fixed bug preventing users providing API key directly submit_extract wait_for_extract.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-051","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.1","title":"ipumsr 0.5.1","text":"CRAN release: 2022-09-30 Added “Rmd Reproducible Research” template, sets workflow leverages IPUMS API facilitate sharing analysis. details, see blog post. Credit @ehrlichd template blog post! Moved raster package Suggests longer installed automatically install ipumsr. raster package required need read raster extracts IPUMS Terra collection, IPUMS Terra slated decommissioned shortly.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-050","dir":"Changelog","previous_headings":"","what":"ipumsr 0.5.0","title":"ipumsr 0.5.0","text":"CRAN release: 2022-06-04 Added functions interacting IPUMS API IPUMS USA IPUMS CPS. overview new functionality, see API vignette vignette(\"ipums-api\", package = \"ipumsr\"). Special thanks @robe2037, @renae-r, @ehrlichd work API functions!","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-045","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.5","title":"ipumsr 0.4.5","text":"CRAN release: 2020-07-21 Fixed bug causing read error labeled string variables (#61, thanks @chengchou). ipumsr now always uses haven::labelled() function create labelled vectors, order maintain compatibility developments haven vctrs packages (thanks @gergness!). ipumsr now requires R 3.5 greater, line new requirements package dependency raster.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-044","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.4","title":"ipumsr 0.4.4","text":"CRAN release: 2020-06-03 Modify lbl_define() test reflect changes haven’s labelled class definition.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-043","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.3","title":"ipumsr 0.4.3","text":"CRAN release: 2020-04-30 Add lbl_define() function enable use lbl_relabel() syntax creating new labelled vector unlabelled one (#51, thanks @chengchou). Remove pillar printing ipumsr, getting rid pesky warning (#47). Improved documentation lower_vars argument (#56, thanks @hrecht).","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-042","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.2","title":"ipumsr 0.4.2","text":"CRAN release: 2019-06-04 Incorporate bug fix knitr 1.23 affected encoding NHGIS vignette.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-041","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.1","title":"ipumsr 0.4.1","text":"CRAN release: 2019-05-15 Remove stringr & tidyr dependencies installation little easier (#41). Fix bug pillar printing haven’s labelled objects (#43)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-040","dir":"Changelog","previous_headings":"","what":"ipumsr 0.4.0","title":"ipumsr 0.4.0","text":"CRAN release: 2019-03-08 Add read_ipums_micro_yield() read_ipums_micro_list_yield() read data ‘yields’, concept similar ‘chunks’, little flexibility. See big data vignette (vignette(\"ipums-bigdata\", package = \"ipumsr\")) details. Fixed bug trying set variable attributes value labels (#34). Fixed bug implicit decimals double counted csv files. Argument rectype_convert removed longer anything. Fixed typo vignette “ipums-geography” (#37, @jacobkap). Creates pkgdown site (#38, @jacobkap).","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-030","dir":"Changelog","previous_headings":"","what":"ipumsr 0.3.0","title":"ipumsr 0.3.0","text":"CRAN release: 2018-09-27 Lots improvements users wish use “big data” sized IPUMS extracts. See vignette using command vignette(\"ipums-bigdata\", package = \"ipusmr\") full details. now chunked versions microdata reading functions let perform functions subsets data read (read_ipums_micro_chunked() & ipumsr::read_ipums_micro_list_chunked()) new function ipums_collect() combined dplyr::collect() set_ipums_attributes() add value variable labels data collected database. reading gzipped files, ipumsr longer store full text memory. Added pillar printing labelled classes tibbles. means label print labels alongside values printed tibble (subtle grey color terminal supports ). turn feature , use command `options(“ipumsr.show_pillar_labels” = FALSE). approach reading hierarchical data files much faster. Arguments read_ipums_sp() now order read_ipums_sf() read_ipums_sf() read_ipums_sp() gain 2 new arguments vars allows pick subset variables, add_layer_var lets add variable indicating layer came . can now use inside voice variable names new argument lower_vars read_ipums_ddi() read_ipums_micro() family functions variable names lower case. ipumsr compatible versions haven newer 2.0 (maintaining compatibility earlier versions). (#31)","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-020","dir":"Changelog","previous_headings":"","what":"ipumsr 0.2.0","title":"ipumsr 0.2.0","text":"CRAN release: 2018-04-20 IPUMS Terra now officially supported! Read raster, area microdata extracts using functions read_terra_raster(), read_terra_raster_list(), read_terra_area(), read_terra_area_sf(), read_terra_micro() Add support keyvar DDI, (eventually) help link data across record types hierarchical extracts. effective, requires support ipums.org website, hopefully coming soon (#25 - thanks @mpadge!) Improved main vignette instructions Safari users (#27) Fix selecting columns csv extracts (#26 - thanks forum user JCambon_OIS!) Fixes ipums_list_*() family functions.","code":""},{"path":"http://tech.popdata.org/ipumsr/news/index.html","id":"ipumsr-011","dir":"Changelog","previous_headings":"","what":"ipumsr 0.1.1","title":"ipumsr 0.1.1","text":"CRAN release: 2017-12-15 Fixed bug ipums_shape_*_join functions using integer ID columns. (#16) Allow unzipped folders Safari macOS unzips folders default (#17) lbl_relabel behavior improved labels aren’t assigned sequentially (#21)","code":""}]
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 9f654390..389863d3 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -6,9 +6,6 @@
   <url>
     <loc>http://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html</loc>
   </url>
-  <url>
-    <loc>http://tech.popdata.org/ipumsr/CONDUCT.html</loc>
-  </url>
   <url>
     <loc>http://tech.popdata.org/ipumsr/CONTRIBUTING.html</loc>
   </url>
diff --git a/tests/testthat/test_api_helpers.R b/tests/testthat/test_api_helpers.R
index c4011f09..d706f313 100644
--- a/tests/testthat/test_api_helpers.R
+++ b/tests/testthat/test_api_helpers.R
@@ -442,9 +442,11 @@ test_that("We can get correct API version info for each collection", {
 
   expect_setequal(
     collections$collection_name,
-    c("IPUMS USA", "IPUMS CPS", "IPUMS International", "IPUMS NHGIS",
+    c(
+      "IPUMS USA", "IPUMS CPS", "IPUMS International", "IPUMS NHGIS",
       "IPUMS IHGIS", "IPUMS ATUS", "IPUMS AHTUS", "IPUMS MTUS", "IPUMS DHS",
-      "IPUMS PMA", "IPUMS MICS", "IPUMS NHIS", "IPUMS MEPS", "IPUMS Higher Ed")
+      "IPUMS PMA", "IPUMS MICS", "IPUMS NHIS", "IPUMS MEPS", "IPUMS Higher Ed"
+    )
   )
 
   has_support <- dplyr::filter(collections, .data$api_support)
diff --git a/tests/testthat/test_api_process_extract.R b/tests/testthat/test_api_process_extract.R
index 4f9277a1..66a73d88 100644
--- a/tests/testthat/test_api_process_extract.R
+++ b/tests/testthat/test_api_process_extract.R
@@ -281,8 +281,10 @@ test_that("Can download microdata extract with extract object", {
 
   expect_equal(
     names(ready_usa_extract$download_links),
-    c("r_command_file", "basic_codebook", "data", "stata_command_file",
-      "sas_command_file", "spss_command_file", "ddi_codebook")
+    c(
+      "r_command_file", "basic_codebook", "data", "stata_command_file",
+      "sas_command_file", "spss_command_file", "ddi_codebook"
+    )
   )
   expect_equal(
     names(ready_usa_extract$download_links$data),
diff --git a/tests/testthat/test_nhgis.R b/tests/testthat/test_nhgis.R
index 396653df..60956f7a 100644
--- a/tests/testthat/test_nhgis.R
+++ b/tests/testthat/test_nhgis.R
@@ -261,7 +261,7 @@ test_that("Can still find codebook if direct data file path provided", {
 
   expect_equal(
     ipums_var_info(x)$var_label,
-    unname(purrr::map_chr(x, ~attributes(.x)$label))
+    unname(purrr::map_chr(x, ~ attributes(.x)$label))
   )
 })
 
diff --git a/vignettes/cps_select_data.jpg b/vignettes/cps_select_data.jpg
new file mode 100644
index 00000000..1b2c8879
Binary files /dev/null and b/vignettes/cps_select_data.jpg differ
diff --git a/vignettes/ipums-api-micro.Rmd b/vignettes/ipums-api-micro.Rmd
index 4e4afdc2..7cb69c68 100644
--- a/vignettes/ipums-api-micro.Rmd
+++ b/vignettes/ipums-api-micro.Rmd
@@ -67,8 +67,8 @@ In addition to microdata projects, the IPUMS API also supports IPUMS NHGIS
 data. For details about obtaining IPUMS NHGIS data using ipumsr, see
 the [NHGIS-specific vignette](ipums-api-nhgis.html).
 
-Before getting started, we'll load ipumsr and 
-[dplyr](https://dplyr.tidyverse.org/), which will be helpful for this demo:
+Before getting started, we'll load ipumsr and `{dplyr}`, which will be 
+helpful for this demo:
 
 ```{r, message=FALSE}
 library(ipumsr)
@@ -130,7 +130,7 @@ could do the following:
 ```{r}
 ipumsi_samps <- get_sample_info("ipumsi")
 
-ipumsi_samps %>% 
+ipumsi_samps %>%
   filter(grepl("Mexico", description))
 ```
 
@@ -321,8 +321,8 @@ var_spec("RACE", case_selections = "8")
 
 # For detailed case selection, change the `case_selection_type`
 var_spec(
-  "RACE", 
-  case_selections = c("811", "812"), 
+  "RACE",
+  case_selections = c("811", "812"),
   case_selection_type = "detailed"
 )
 ```
@@ -363,13 +363,13 @@ IPUMS allows users to create variables that reflect the characteristics of other
 household members. To do so, use the `attached_characteristics` argument of
 `var_spec()`.
 
-For instance, to attach the spouse's SEX value to a record:
+For instance, to attach the spouse's `SEX` value to a record:
 
 ```{r}
 var_spec("SEX", attached_characteristics = "spouse")
 ```
 
-This will add a new variable (in this case, SEX_SP) to the output data that 
+This will add a new variable (in this case, `SEX_SP`) to the output data that 
 will contain the sex of a person's spouse (if no such record exists, the 
 value will be 0).
 
@@ -394,7 +394,7 @@ To include data quality flags for an individual variable, use the
 var_spec("RACE", data_quality_flags = TRUE)
 ```
 
-This will produce a new variable (QRACE) containing the data quality flag for 
+This will produce a new variable (`QRACE`) containing the data quality flag for 
 the given variable.
 
 To add data quality flags for all variables that have them, set
diff --git a/vignettes/ipums-api-nhgis.Rmd b/vignettes/ipums-api-nhgis.Rmd
index 931def6c..d916f97b 100644
--- a/vignettes/ipums-api-nhgis.Rmd
+++ b/vignettes/ipums-api-nhgis.Rmd
@@ -55,14 +55,14 @@ get_truncated_metadata <- function(collection,
     path = ipumsr:::metadata_request_path(collection, type),
     queries = list(pageNumber = 1, pageSize = page_size)
   )
-  
+
   responses <- ipumsr:::ipums_api_paged_request(
     url = url,
     max_pages = max_pages,
     delay = 0,
     api_key = api_key
   )
-  
+
   metadata <- purrr::map_dfr(
     responses,
     function(res) {
@@ -70,11 +70,11 @@ get_truncated_metadata <- function(collection,
         httr::content(res, "text"),
         simplifyVector = TRUE
       )
-      
+
       content$data
     }
   )
-  
+
   # Recursively convert all metadata data.frames to tibbles and all
   # camelCase names to snake_case
   ipumsr:::convert_metadata(metadata)
@@ -152,15 +152,15 @@ ds <- get_metadata_nhgis(type = "datasets")
 head(ds)
 ```
 
-We can use basic functions from [dplyr](https://dplyr.tidyverse.org/) to
+We can use basic functions from `{dplyr}` to
 filter the metadata to those records of interest. For instance, if we
 wanted to find all the data sources related to agriculture from the 1900
 Census, we could filter on `group` and `description`:
 
 ```{r}
-ds %>% 
+ds %>%
   filter(
-    group == "1900 Census", 
+    group == "1900 Census",
     grepl("Agriculture", description)
   )
 ```
@@ -202,7 +202,7 @@ tst$geog_levels[[1]]
 ```
 
 To filter on these columns, we can use `map_lgl()` from
-[purrr](https://purrr.tidyverse.org/). For instance, to find all time
+`{purrr}`. For instance, to find all time
 series tables that include data from a particular year:
 
 ```{r}
@@ -212,9 +212,8 @@ tst %>%
   filter(map_lgl(years, ~ "1840" %in% .x$name))
 ```
 
-For more details on working with nested data frames, see the
-documentation for [dplyr](https://dplyr.tidyverse.org/) and
-[purrr](https://purrr.tidyverse.org/).
+For more details on working with nested data frames, see this
+[tidyr article](https://tidyr.tidyverse.org/articles/nest.html).
 
 ```{r, echo=FALSE, results="hide", message=FALSE}
 eject_cassette("nhgis-metadata-summary")
@@ -235,7 +234,7 @@ cAg_meta <- get_metadata_nhgis(dataset = "1900_cAg")
 ```
 
 This provides a comprehensive list of the possible specifications for
-the input data source. For instance, for the 1900_cAg dataset, we have
+the input data source. For instance, for the `1900_cAg` dataset, we have
 66 tables to choose from, and 3 possible geographic levels:
 
 ```{r}
@@ -277,14 +276,16 @@ included in the extract and indicate the desired format and layout.
 ### Basic extract definitions
 
 Let's say we're interested in getting state-level data on the number of
-farms and their average size from the 1900_cAg dataset that we
+farms and their average size from the `1900_cAg` dataset that we
 identified above. As we can see in the metadata, these data are
-contained in tables NT2 and NT3:
+contained in tables `NT2` and `NT3`:
 
 ```{r}
 cAg_meta$data_tables
 ```
 
+#### Dataset specifications
+
 To request these data, we need to make an explicit *dataset
 specification*. All datasets must be associated with a selection of data
 tables and geographic levels. We can use the `ds_spec()` helper function
@@ -294,8 +295,8 @@ this case, a `ds_spec` object):
 
 ```{r}
 dataset <- ds_spec(
-  "1900_cAg", 
-  data_tables = c("NT1", "NT2"), 
+  "1900_cAg",
+  data_tables = c("NT1", "NT2"),
   geog_levels = "state"
 )
 
@@ -314,8 +315,10 @@ nhgis_ext <- define_extract_nhgis(
 nhgis_ext
 ```
 
-(Dataset specifications can also include selections for `years` and
-`breakdown_values`, but these are not available for all datasets.)
+Dataset specifications can also include selections for `years` and
+`breakdown_values`, but these are not available for all datasets.
+
+#### Time series table specifications
 
 Similarly, to make a request for time series tables, use the
 `tst_spec()` helper. This makes a `tst_spec` object containing a time
@@ -328,13 +331,27 @@ a geographic level selection, and allow an optional selection of years:
 define_extract_nhgis(
   description = "Example time series table request",
   time_series_tables = tst_spec(
-    "CW3", 
+    "CW3",
     geog_levels = c("county", "tract"),
     years = c("1990", "2000")
   )
 )
 ```
 
+#### Shapefile specifications
+
+Shapefiles don't have any additional specification options, and
+therefore can be requested simply by providing their names:
+
+```{r}
+define_extract_nhgis(
+  description = "Example shapefiles request",
+  shapefiles = c("us_county_2021_tl2021", "us_county_2020_tl2020")
+)
+```
+
+#### Invalid specifications
+
 An attempt to define an extract that does not have all the required
 specifications for a given dataset or time series table will throw an
 error:
@@ -347,20 +364,10 @@ define_extract_nhgis(
 ```
 
 Note that it is still possible to make invalid extract requests (for
-instance, by requesting a dataset or table that doesn't exist). This
+instance, by requesting a dataset or data table that doesn't exist). This
 kind of issue will be caught upon submission to the API, not upon the
 creation of the extract definition.
 
-Shapefiles don't have any additional specification options, and
-therefore can be requested simply by providing their names:
-
-```{r}
-define_extract_nhgis(
-  description = "Example shapefiles request",
-  shapefiles = c("us_county_2021_tl2021", "us_county_2020_tl2020")
-)
-```
-
 ### More complicated extract definitions
 
 It's possible to request data for multiple datasets (or time series
@@ -382,7 +389,7 @@ For extracts with multiple datasets or time series tables, it may be
 easier to generate the specifications independently before creating your
 extract request object. You can quickly create multiple `ds_spec`
 objects by iterating across the specifications you want to include.
-Here, we use purrr to do so, but you could also use a `for` loop:
+Here, we use `{purrr}` to do so, but you could also use a `for` loop:
 
 ```{r}
 ds_names <- c("2019_ACS1", "2018_ACS1")
@@ -393,11 +400,7 @@ geogs <- c("county", "state")
 # data tabels and geog levels indicated above
 datasets <- purrr::map(
   ds_names,
-  ~ ds_spec(
-    name = .x, 
-    data_tables = tables, 
-    geog_levels = geogs
-  )
+  ~ ds_spec(name = .x, data_tables = tables, geog_levels = geogs)
 )
 
 nhgis_ext <- define_extract_nhgis(
diff --git a/vignettes/ipums-api.Rmd b/vignettes/ipums-api.Rmd
index acafd7e8..1e7935b8 100644
--- a/vignettes/ipums-api.Rmd
+++ b/vignettes/ipums-api.Rmd
@@ -45,16 +45,16 @@ modify_ready_extract_cassette_file <- function(cassette_file_name,
                                                fixture_path = NULL,
                                                n_requests = 1) {
   fixture_path <- fixture_path %||% vcr::vcr_test_path("fixtures")
-  
+
   ready_extract_cassette_file <- file.path(
     fixture_path, cassette_file_name
   )
-  
+
   ready_lines <- readLines(ready_extract_cassette_file)
   request_lines <- which(grepl("^- request:", ready_lines))
-  
+
   start_line <- request_lines[length(request_lines) - n_requests + 1]
-  
+
   writeLines(
     c(
       ready_lines[[1]],
@@ -70,7 +70,6 @@ ipumsr:
 
 -   **IPUMS extract** endpoints can be used to submit extract requests
     for processing and download completed extract files.
-
 -   **IPUMS metadata** endpoints can be used to discover and explore
     available IPUMS data as well as retrieve codes, names, and other
     extract parameters necessary to form extract requests.
@@ -251,7 +250,7 @@ submit it to the IPUMS API for processing.
 insert_cassette("submit-placeholder-extract-usa")
 
 # We submit these extracts so that the output of requests for things like
-# `get_extract_history` below is more "natural". 
+# `get_extract_history` below is more "natural".
 # Otherwise the output is a bunch of duplicate extract requests for the
 # primary extract in this vignette, as this vignette usually gets rebuilt
 # several times during editing before it is complete.
@@ -544,8 +543,8 @@ eject_cassette("check-extract-history")
 One exciting feature enabled by the IPUMS API is the ability to share a
 standardized extract definition with other IPUMS users so that they can
 create an identical extract request themselves. The terms of use for
-most IPUMS collections prohibit the redistribution of IPUMS data, but
-don't prohibit sharing data extract definitions.
+most IPUMS collections prohibit the public redistribution of IPUMS data, but
+don't prohibit the sharing of data extract definitions.
 
 ipumsr facilitates this type of sharing with `save_extract_as_json()`
 and `define_extract_from_json()`, which read and write `ipums_extract`
diff --git a/vignettes/ipums-bigdata.Rmd b/vignettes/ipums-bigdata.Rmd
index 9fc6fee8..2be50aab 100644
--- a/vignettes/ipums-bigdata.Rmd
+++ b/vignettes/ipums-bigdata.Rmd
@@ -9,7 +9,7 @@ vignette: >
 
 Browsing for IPUMS data can be a little like grocery shopping when
 you're hungry---you show up to grab a couple things, but everything
-looks so good that you end up with an overflowing cart[^1].
+looks so good that you end up with an overflowing cart.[^1]
 Unfortunately, this can lead to extracts so large that they don't fit in
 your computer's memory.
 
@@ -117,7 +117,7 @@ define_extract_usa(
 )
 ```
 
-If you're using the online interface, the "Select Cases" option will be 
+If you're using the online interface, the **Select Cases** option will be 
 available on the last page before submitting an extract request.
 
 ### Use a sampled subset of the data
@@ -126,11 +126,11 @@ Yet another option (also only for microdata projects) is to take a
 random subsample of the data before producing your extract.
 
 Sampled data is not available via the IPUMS API, but you can use the
-"Customize Sample Size" option in the online interface to do so. This also 
+**Customize Sample Size** option in the online interface to do so. This also 
 appears on the final page before submitting an extract request.
 
-If you've already submitted the extract, you can click the "REVISE" link
-on the "Download or Revise Extracts" page to access these features and
+If you've already submitted the extract, you can click the **REVISE** link
+on the *Download or Revise Extracts* page to access these features and
 produce a new data extract.
 
 ## Option 3: Process the data in pieces
@@ -144,7 +144,6 @@ increments:
     the readr
     [framework](https://readr.tidyverse.org/reference/read_delim_chunked.html)
     for reading chunked data.
-
 -   *Yielded* functions allow more flexibility by returning control to
     the user between the loading of each piece of data. These functions
     are unique to ipumsr and fixed-width data.
@@ -173,9 +172,8 @@ cps_ddi_file <- ipums_example("cps_00097.xml")
 Imagine we wanted to find the percent of people in the workforce grouped
 by their self-reported health. Since our example extract is small enough
 to fit in memory, we could load the full dataset with
-`read_ipums_micro()`, relabel the `EMPSTAT` variable into a binary
-variable (see `vignette("value-labels")`), and count the people in each
-group.
+`read_ipums_micro()`, use `lbl_relabel()` to relabel the `EMPSTAT` variable 
+into a binary variable, and count the people in each group.
 
 ```{r}
 read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%
@@ -245,8 +243,8 @@ regression models discussed [below](#chunked-reg)).
 In this case, we want to row-bind the data frames returned by
 `cb_function()`, so we use `IpumsDataFrameCallback`.
 
-Callback objects are [R6](https://CRAN.R-project.org/package=R6)
-objects, but you don't need to be familiar with R6 to use them[^2]. To
+Callback objects are `{R6}`
+objects, but you don't need to be familiar with R6 to use them.[^2] To
 initialize a callback object, simply use `$new()`:
 
 [^2]: If you're interested in learning more about R6, check out Hadley
@@ -543,16 +541,13 @@ benefits and drawbacks, and the database you choose to use will be
 specific to your use case. However, once you've chosen a database, there
 will be two general steps:
 
--   Importing data into the database
-
--   Connecting the database to R
+1.   Importing data into the database
+2.   Connecting the database to R
 
 R has several tools that support database integration, including
-[DBI](https://dbi.r-dbi.org/), [dbplyr](https://dbplyr.tidyverse.org/),
-[sparklyr](https://spark.rstudio.com/),
-[sparkR](https://spark.apache.org/docs/latest/sparkr.html),
-[bigrquery](https://bigrquery.r-dbi.org/), and others. In this example,
-we'll use RSQLite to load the data into an in-memory database. (We use
+`{DBI}`, `{dbplyr}`, `{sparklyr}`, `{bigrquery}`, and others. 
+In this example,
+we'll use `{RSQLite}` to load the data into an in-memory database. (We use
 RSQLite because it is easy to set up, but it is likely not efficient
 enough to fully resolve issues with large IPUMS data, so it may be wise
 to consider an alternative in practice.)
@@ -566,8 +561,8 @@ support CSV format), then you can use an ipumsr `chunked` function to
 load the data into a database without needing to store the entire
 dataset in R.
 
-(For more about rectangular vs. hierarchical extracts, see the
-"Hierarchical extracts" section of `vignette("ipums-read")`.)
+See the [IPUMS data reading vignette](ipums-read.html#hierarchical-extracts) 
+for more about rectangular vs. hierarchical extracts.
 
 ```{r, eval=installed_db_pkgs, results="hide"}
 library(DBI)
@@ -638,8 +633,8 @@ data <- example %>%
 ipums_val_labels(data$MONTH)
 ```
 
-For more about variable metadata in IPUMS data, see
-`vignette("value-labels")`.
+See the [value labels vignette](value-labels.html) more about variable 
+metadata in IPUMS data.
 
 ## Learning more
 
diff --git a/vignettes/ipums-read.Rmd b/vignettes/ipums-read.Rmd
index 433939f9..50a1429b 100644
--- a/vignettes/ipums-read.Rmd
+++ b/vignettes/ipums-read.Rmd
@@ -27,10 +27,9 @@ IPUMS extracts will be organized slightly differently for different
 projects will provide multiple files in a data extract. The files most
 relevant to ipumsr are:
 
--   The metadata file containing information about the variables
+-   The **metadata** file containing information about the variables
     included in the extract data
-
--   One or more data files, depending on the project and specifications
+-   One or more **data** files, depending on the project and specifications
     in the extract
 
 Both of these files are necessary to properly load data into R.
@@ -41,9 +40,8 @@ are required to correctly parse the data on load.
 Even for .csv files, the metadata file allows for the addition of
 contextual variable information to the loaded data. This makes it much
 easier to interpret the values in the data variables and effectively use
-them in your data processing pipeline. See the vignette about [value
-labels](value-labels.html) for more information on working with these
-labels.
+them in your data processing pipeline. See the [value labels](value-labels.html)
+vignette for more information on working with these labels.
 
 ## Reading microdata extracts
 
@@ -127,13 +125,14 @@ ipums_val_labels(cps_data2$MONTH)
 
 ### Hierarchical extracts
 
-IPUMS microdata can come in either "rectangular" or "hierarchical"
+IPUMS microdata can come in either *rectangular* or *hierarchical*
 format.
 
 Rectangular data are transformed such that every row of data represents
 the same type of record. For instance, each row will represent a person
 record, and all household-level information for that person will be
-included in the same row. (This is the case for the CPS example above.)
+included in the same row. (This is the case for `cps_data` shown
+in the [example above](#reading-microdata-extracts).)
 
 Hierarchical data have records of different types interspersed in a
 single file. For instance, a household record will be included in its
@@ -148,11 +147,11 @@ cps_hier_ddi <- read_ipums_ddi(ipums_example("cps_00159.xml"))
 read_ipums_micro(cps_hier_ddi)
 ```
 
-The long format consists of a single `data.frame` that includes rows
-with varying record types. In this example, some rows have a record type
-of "Household" and others have a record type of "Person". Variables that
-do not apply to a particular record type will be filled with `NA` in
-rows of that record type.
+The long format consists of a single [`tibble`](https://tibble.tidyverse.org/reference/tbl_df-class.html) that 
+includes rows with varying record types. In this example, some rows have a 
+record type of "Household" and others have a record type of "Person". 
+Variables that do not apply to a particular record type will be filled 
+with `NA` in rows of that record type.
 
 To read data in list format, use `read_ipums_micro_list()`. This
 function returns a list where each element contains all the records for
@@ -188,7 +187,7 @@ available:
 attributes(nhgis_data$D6Z001)
 ```
 
-Variable metadata for NHGIS data are slightly different than those
+However, variable metadata for NHGIS data are slightly different than those
 provided by microdata products. First, they come from a .txt codebook
 file rather than an .xml DDI file. Codebooks can still be loaded into an
 `ipums_ddi` object, but fields that do not apply to aggregate data
@@ -203,9 +202,9 @@ ipums_var_info(nhgis_cb) %>%
   select(var_name, var_label, var_desc)
 ```
 
-By design, NHGIS codebooks are human-readable. To view the codebook
-contents themselves without converting to an `ipums_ddi` object, set
-`raw = TRUE`.
+By design, NHGIS codebooks are human-readable, and it may be easier to interpret
+their contents in raw format. To view the codebook itself without converting 
+to an `ipums_ddi` object, set `raw = TRUE`.
 
 ```{r}
 nhgis_cb <- read_nhgis_codebook(nhgis_ex1, raw = TRUE)
@@ -215,11 +214,9 @@ cat(nhgis_cb[1:20], sep = "\n")
 
 ### Handling multiple files
 
-In the above example, `read_nhgis_codebook()` was able to identify and
-load the codebook file, even though the provided file path is the same
-that was provided to `read_nhgis()` earlier. However, for more
-complicated NHGIS extracts that include data from multiple data sources,
-the provided .zip archive will contain multiple codebook and data files.
+For more complicated NHGIS extracts that include data from multiple data 
+sources, the provided .zip archive will contain multiple codebook and data 
+files.
 
 You can view the files contained in an extract to determine if this is
 the case:
@@ -231,15 +228,13 @@ ipums_list_files(nhgis_ex2)
 ```
 
 In these cases, you can use the `file_select` argument to indicate which
-file to load. `file_select` supports most features of the [tidyselect
-selection
-language](https://tidyselect.r-lib.org/reference/language.html). (See
-`?selection_language` for documentation of the features supported in
+file to load. `file_select` supports most features of the 
+[tidyselect selection language](https://tidyselect.r-lib.org/reference/language.html). 
+(See `?selection_language` for documentation of the features supported in
 ipumsr.)
 
 ```{r, error=TRUE, message=FALSE}
 nhgis_data2 <- read_nhgis(nhgis_ex2, file_select = contains("nation"))
-
 nhgis_data3 <- read_nhgis(nhgis_ex2, file_select = contains("ts_nominal_state"))
 ```
 
@@ -271,7 +266,7 @@ read_nhgis(nhgis_ex2, file_select = 1)
 
 #### CSV data
 
-NHGIS data are most easily handled when in .csv format. `read_nhgis()`
+NHGIS data are most easily handled in .csv format. `read_nhgis()`
 uses `readr::read_csv()` to handle the generation of column type
 specifications. If the guessed specifications are incorrect, you can use the
 `col_types` argument to adjust. This is most likely to occur for
@@ -298,8 +293,7 @@ nhgis_fwf_data <- read_nhgis(nhgis_fwf, file_select = matches("ts_nominal"))
 nhgis_fwf_data
 ```
 
-Note that in this case numeric geographic codes are correctly loaded as
-character variables. The correct parsing of NHGIS fixed-width files is
+The correct parsing of NHGIS fixed-width files is
 driven by the column parsing information contained in the .do file
 provided in the .zip archive. This contains information not only about
 column positions and data types, but also implicit decimals in the data.
@@ -322,24 +316,20 @@ IPUMS distributes spatial data for several projects.
 
 -   For microdata projects, spatial data are distributed in shapefiles
     on dedicated geography pages separate from the standard extract
-    system. Look for a "Geography and GIS" link in the "Supplemental
-    Data" section of the project's website to find spatial data files
+    system. Look for a **Geography and GIS** link in the **Supplemental
+    Data** section of the project's website to find spatial data files
     and information.
-
 -   For NHGIS, spatial data can be obtained within the extract system.
     Shapefiles will be distributed in their own .zip archive alongside
     the .zip archive containing the extract's tabular data (if any
     tabular data are requested).
 
-Use `read_ipums_sf()` to load spatial data from any of these sources
-(ipumsr is phasing out support for objects from the `sp` package. If you
-prefer to work with these objects, use `sf::as_Spatial()` to convert
-from `sf` to `sp`).
+Use `read_ipums_sf()` to load spatial data from any of these sources as an
+`sf` object from `{sf}`.
 
 `read_ipums_sf()` also supports the loading of spatial files within .zip
-archives and the `file_select` syntax for file selection (we don't need
-`file_select` in this example because there is only one shapefile in
-this example extract).
+archives and the `file_select` syntax for file selection when multiple internal
+files are present.
 
 ```{r, eval = requireNamespace("sf")}
 nhgis_shp_file <- ipums_example("nhgis0972_shape_small.zip")
@@ -351,7 +341,7 @@ head(shp_data)
 
 These data can then be joined to associated tabular data. To preserve
 IPUMS attributes from the tabular data used in the join, use
-an`ipums_shape_*_join` function:
+an `ipums_shape_*_join()` function:
 
 ```{r, eval = requireNamespace("sf")}
 joined_data <- ipums_shape_left_join(
@@ -363,7 +353,7 @@ joined_data <- ipums_shape_left_join(
 attributes(joined_data$MSA_CMSAA)
 ```
 
-For NHGIS data, the join code typically corresponds to the "GISJOIN"
+For NHGIS data, the join code typically corresponds to the `GISJOIN`
 variable. However, for microdata projects, the variable name used for a
 geographic level in the tabular data may differ from that in the spatial
 data. Consult the documentation and metadata for these files to identify
@@ -382,7 +372,6 @@ types of spatial data:
 -   Harmonized (also called "integrated" or "consistent") files have
     been made consistent over time by combining geographies that share
     area for different time periods.
-
 -   Non-harmonized, or year-specific, files represent geographies at a
     specific point in time.
 
diff --git a/vignettes/ipums.Rmd b/vignettes/ipums.Rmd
index dbcb5566..804e118d 100644
--- a/vignettes/ipums.Rmd
+++ b/vignettes/ipums.Rmd
@@ -15,7 +15,7 @@ knitr::opts_chunk$set(
 library(ipumsr)
 ```
 
-This text provides an overview of how to find, request, download, and
+This article provides an overview of how to find, request, download, and
 read IPUMS data into R. For a general introduction to IPUMS and ipumsr,
 see the [ipumsr home page](https://tech.popdata.org/ipumsr/index.html).
 
@@ -40,21 +40,26 @@ fixed-width data files. IPUMS microdata projects provide metadata in DDI
 .csv formats.
 
 Users can submit extract requests and download extracts via either the
-IPUMS website or the IPUMS API, or via ipumsr functions that interface
-with the IPUMS API. The API currently supports access to the extract
-system only for [certain IPUMS
-projects](https://developer.ipums.org/docs/v2/apiprogram/apis/), which
-also determines the functionality that ipumsr can support.
+**IPUMS website** or the **IPUMS API**. ipumsr provides a set of client tools
+to interface with the API. Note that only [certain IPUMS projects](https://developer.ipums.org/docs/v2/apiprogram/apis/) are currently
+supported by the IPUMS API.
 
 ### Obtaining data via an IPUMS project website
 
-To create a new extract request via an IPUMS project website, navigate to the
-extract interface for the IPUMS project of interest by clicking **Select
-Data** in the heading of the project website. The project extract
-interface allows you to explore what's available, find documentation
-about data concepts and sources, and then specify the data you'd like to
-download. The data selection parameters will differ across projects; see
+To create a new extract request via an IPUMS project website 
+(e.g. [IPUMS CPS](https://cps.ipums.org/cps/)), navigate to the
+extract interface for that project by clicking **Select Data** in the 
+heading of the project website.
+
+```{r, echo=FALSE, out.width = "70%", fig.align="center"}
+knitr::include_graphics("cps_select_data.jpg")
+```
+
+The project's extract interface allows you to explore what's available, find 
+documentation about data concepts and sources, and specify the data you'd like 
+to download. The data selection parameters will differ across projects; see
 each project's documentation for more details on the available options.
+
 If you've never created an extract for the project you're interested in,
 a good way to learn the basics is to watch a project-specific video on
 creating extracts hosted on the [IPUMS Tutorials
@@ -66,50 +71,68 @@ Once your extract is ready, click the green **Download** button to
 download the data file. Then, right-click the **DDI** link in the
 Codebook column, and select **Save Link As...** (see below).
 
-Note that some browsers may display different text, but there should be
-an option to download the DDI file as .xml. For instance, on Safari,
-select **Download Linked File As...**. For ipumsr to read the metadata,
-it is necessary to **save the file in .xml format, *not* .html format**.
+![](microdata_annotated_screenshot.png)
 
-![](microdata_annotated_screenshot.png){width="1000"}
+Note that some browsers may display different text, but there should be
+an option to download the DDI file as .xml. (For instance, on Safari,
+select **Download Linked File As...**.) For ipumsr to read the metadata,
+you must **save the file in .xml format, *not* .html format**.
 
 #### Downloading from aggregate data projects
 
 Aggregate data projects include data and metadata together in a single
-.zip archive file. To download them, simply click on the green
+.zip archive. To download them, simply click on the green
 **Tables** button (for tabular data) and/or **GIS Files** button (for
 spatial boundary or location data) in the **Download Data** column.
 
 ### Obtaining data via the IPUMS API
 
 Users can also create and submit extract requests within R by using
-ipumsr functions that interface with the IPUMS API. The IPUMS API
-currently supports access to the extract system for the following
-collections:
+ipumsr functions that interface with the 
+[IPUMS API](https://developer.ipums.org/). The IPUMS API currently supports
+access to the extract system for [certain IPUMS collections](https://developer.ipums.org/docs/v2/apiprogram/apis/).
 
--   IPUMS USA
+#### Extract support
 
--   IPUMS CPS
+ipumsr provides an interface to the IPUMS extract system via the IPUMS API 
+for the following collections:
 
+-   IPUMS USA
+-   IPUMS CPS
 -   IPUMS International
+-   IPUMS NHGIS
+
+#### Metadata support
+
+ipumsr provides access to comprehensive metadata via the IPUMS API for the 
+following collections:
 
 -   IPUMS NHGIS
 
-The IPUMS API and ipumsr also support access to IPUMS NHGIS metadata, so
-users can query NHGIS metadata in R to explore what data are available
-and specify NHGIS data requests. At this time, creating requests for
-microdata generally requires using the corresponding project websites to
-find samples and variables of interest and obtain their identifiers for
-use in R extract definitions.
+Users can query NHGIS metadata to explore available data when specifying 
+NHGIS extract requests.
+
+A listing of available samples is provided for the following collections:
+
+-   IPUMS USA
+-   IPUMS CPS
+-   IPUMS International
+
+Increased access to metadata for these projects is in progress. Currently,
+creating extract requests for these projects requires using the
+corresponding project websites to find samples and variables of interest 
+and obtain their API identifiers for use in R extract definitions.
+
+#### Workflow
 
 Once you have identified the data you would like to request, the
 workflow for requesting and downloading data via API is straightforward.
+
 First, define the parameters of your extract. The available extract
 definition options will differ by IPUMS data collection. See the
 [microdata API request](ipums-api-micro.html) and [NHGIS API
 request](ipums-api-nhgis.html) vignettes for more details on defining an
-extract. (The NHGIS vignette also discusses how to access NHGIS
-metadata.)
+extract.
 
 ```{r}
 cps_extract_request <- define_extract_cps(
@@ -133,7 +156,7 @@ you can download the files directly to your local machine without ever
 having to leave R:
 
 ```{r, eval = FALSE}
-submitted_extract <- submit_extract(extract_request)
+submitted_extract <- submit_extract(cps_extract_request)
 downloadable_extract <- wait_for_extract(submitted_extract)
 data_files <- download_extract(downloadable_extract)
 ```
@@ -145,27 +168,32 @@ even if they weren't made with the API:
 past_extracts <- get_extract_history("nhgis")
 ```
 
-See the [introduction to the IPUMS API for R users](ipums-api.html) for
+See the [introduction to the IPUMS API](ipums-api.html) for
 more details about how to use ipumsr to interact with the IPUMS API.
 
 ## Reading IPUMS data
 
 Once you have downloaded an extract, you can load the data into R with
 the family of `read_*()` functions in ipumsr. These functions expand on
-those provided in [readr](https://readr.tidyverse.org/index.html) in two
+those provided in `{readr}` in two
 ways:
 
 -   ipumsr anticipates standard IPUMS file structures, limiting the need
     for users to manually extract and organize their downloaded files
     before reading.
-
 -   ipumsr uses an extract's metadata files to automatically attach
     contextual information to the data. This allows users to easily
     identify variable names, variable descriptions, and labeled data
-    values (from [haven](https://haven.tidyverse.org/)), which are
+    values (from `{haven}`), which are
     common in IPUMS files.
+    
+File loading is covered in depth in the [reading IPUMS data](ipums-read.html)
+vignette.
+
+#### Microdata files
 
-For microdata files, use the `read_ipums_micro_*()` family:
+For microdata files, use the `read_ipums_micro_*()` family with the DDI (.xml)
+metadata file for your extract:
 
 ```{r}
 cps_file <- ipums_example("cps_00157.xml")
@@ -174,18 +202,22 @@ cps_data <- read_ipums_micro(cps_file)
 head(cps_data)
 ```
 
+#### NHGIS files
+
 For NHGIS files, use `read_nhgis()`:
 
 ```{r}
 nhgis_file <- ipums_example("nhgis0972_csv.zip")
-nhgis_data <- read_nhgis(nhgis_file)
+nhgis_data <- read_nhgis(nhgis_file, verbose = FALSE)
 
 head(nhgis_data)
 ```
 
+#### Spatial boundary files
+
 ipumsr also supports the reading of IPUMS shapefiles (spatial boundary
 and location files) into the `sf` format provided by the
-[sf](https://r-spatial.github.io/sf/) package:
+`{sf}` package:
 
 ```{r, eval = requireNamespace("sf")}
 shp_file <- ipums_example("nhgis0972_shape_small.zip")
@@ -194,16 +226,14 @@ nhgis_shp <- read_ipums_sf(shp_file)
 head(nhgis_shp)
 ```
 
+#### Ancillary files
+
 ipumsr is primarily designed to read data produced by the IPUMS extract
 system. However, IPUMS does distribute other files, often available via
 direct download. In many cases, these can be loaded with ipumsr.
 Otherwise, these files can likely be handled by existing data reading
-packages like [readr](https://readr.tidyverse.org/) (for delimited
-files) or [haven](https://haven.tidyverse.org/) (for Stata, SPSS, or SAS
-files).
-
-See the vignette on [reading IPUMS data](ipums-read.html) for more
-information.
+packages like `{readr}` (for delimited files) or `{haven}` (for Stata, SPSS,
+or SAS files).
 
 ### Exploring file metadata
 
@@ -231,6 +261,8 @@ ipums_var_desc(cps_data$INCTOT)
 ipums_val_labels(cps_data$STATEFIP)
 ```
 
+#### Labelled values
+
 ipumsr also provides a family of `lbl_*()` functions to assist in
 accessing and manipulating the value-level metadata included in IPUMS
 data. This allows for value labels to be incorporated into the data
diff --git a/vignettes/value-labels.Rmd b/vignettes/value-labels.Rmd
index 667cf91e..1f04a1c3 100644
--- a/vignettes/value-labels.Rmd
+++ b/vignettes/value-labels.Rmd
@@ -27,24 +27,23 @@ IPUMS data come with three primary types of variable-level metadata:
     given variable measures.
 
 -   ***Value labels*** link particular data values to more meaningful
-    text labels. For instance, the `HEALTH` variable has data values
-    including 1 and 2, but these are actually stand-ins for "Excellent"
+    text labels. For instance, the `HEALTH` variable may have data values
+    including `1` and `2`, but these are actually stand-ins for "Excellent"
     and "Very good" health. This mapping would be contained in a
     value-label pair that includes a value and its associated label.
 
 The rest of this article will focus on value labels; for more about
-variable labels and descriptions, see `vignette("ipums")`.
+variable labels and descriptions, see `?ipums_var_info`.
 
 ## Value labels
 
-ipumsr uses the `labelled`
-[class](https://haven.tidyverse.org/reference/labelled.html) from the
-[haven](https://haven.tidyverse.org/index.html) package to handle value
+ipumsr uses the 
+[`labelled`](https://haven.tidyverse.org/reference/labelled.html) class from the
+`{haven}` package to handle value
 labels.
 
 You can see this in the column data types when loading IPUMS data. Note
-that `<int+lbl>` appears below `STATEFIP`, `ASECFLAG`, and other
-variables:
+that `<int+lbl>` appears below `MONTH` and `ASECFLAG`:
 
 ```{r, message=FALSE}
 library(ipumsr)
@@ -52,7 +51,7 @@ library(ipumsr)
 ddi <- read_ipums_ddi(ipums_example("cps_00160.xml"))
 cps <- read_ipums_micro(ddi, verbose = FALSE)
 
-cps
+cps[, 1:5]
 ```
 
 This indicates that the data contained in these columns are integers but
@@ -137,12 +136,11 @@ While `labelled` variables provide the benefits described above, they
 also present challenges.
 
 For example, you may have noticed that *both* of the means calculated
-above are suspect.
+above are suspect:
 
-In the case of `AGE_FACTOR`, the values have been remapped during
+-   In the case of `AGE_FACTOR`, the values have been remapped during
 conversion and several are inconsistent with the original data.
-
-In the case of `AGE`, we have considered all people over 90 to be
+-   In the case of `AGE`, we have considered all people over 90 to be
 exactly 90, and all people over 99 to be exactly 99---`labelled`
 variables don't ensure that calculations are correct any more than
 factors do!
@@ -251,7 +249,6 @@ provides a syntax to easily reference the values and labels in this
 user-defined function:
 
 -   The `.val` argument references the *values*
-
 -   The `.lbl` argument references the *labels*
 
 For instance, to convert all values equal to `999999999` to `NA`, we can
@@ -310,7 +307,6 @@ two-sided formula to handle the relabeling:
 
 -   On the left-hand side, use the `lbl()` helper to define a new
     value-label pair.
-
 -   On the right-hand side, provide a function that returns `TRUE` for
     those value-label pairs that should be relabelled with the new
     value-label pair from the left-hand side.
@@ -454,16 +450,14 @@ or numeric values, the data can move forward in the processing pipeline.
 
 ## Other resources
 
-The [haven](https://haven.tidyverse.org/index.html) package, which
+The `{haven}` package, which
 underlies ipumsr's handling of value labels, provides more details on
 the `labelled` class. See `vignette("semantics", package = "haven")`.
 
-The
-[labelled](http://larmarange.github.io/labelled/articles/intro_labelled.html)
-package provides other methods for manipulating value labels, some of
-which overlap those provided by ipumsr.
+The `{labelled}` package provides other methods for manipulating value labels, 
+some of which overlap those provided by ipumsr.
 
-The [questionr](https://juba.github.io/questionr/) package includes
+The `{questionr}` package includes
 functions for exploring `labelled` variables. In particular, the
 functions `describe`, `freq` and `lookfor` all print out to console
 information about the variable using the value labels.