diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5c5da50c..108dcf78 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing Thank you for considering improving this project! By participating, you -agree to abide by the [code of conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md). +agree to abide by the [code of conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html). # Issues (Reporting a problem or suggestion) If you've experience a problem with the package, or have a suggestion for it, @@ -17,6 +17,7 @@ We'll do our best to answer your question. # Pull Requests (Making changes to the package) We appreciate pull requests that follow these guidelines: + 1) Make sure that tests pass (and add new ones if possible). 2) Do your best to conform to the code style of the package, currently diff --git a/R/api_process_extract.R b/R/api_process_extract.R index dc43f63a..2b269713 100644 --- a/R/api_process_extract.R +++ b/R/api_process_extract.R @@ -766,7 +766,7 @@ extract_is_completed_and_has_links.micro_extract <- function(extract) { is_complete <- extract$status == "completed" has_codebook <- has_url(download_links, "ddi_codebook") - has_data <- has_url(download_links, "data") + has_data <- has_url(download_links, "data") is_complete && has_codebook && has_data } diff --git a/R/micro_read_chunked.R b/R/micro_read_chunked.R index 0fac526d..8f997597 100644 --- a/R/micro_read_chunked.R +++ b/R/micro_read_chunked.R @@ -168,25 +168,25 @@ #' # the full dataset in memory #' if (requireNamespace("biglm")) { #' lm_results <- read_ipums_micro_chunked( -#' ipums_example("cps_00160.xml"), -#' IpumsBiglmCallback$new( -#' INCTOT ~ AGE + HEALTH, # Model formula -#' function(x, pos) { -#' x %>% -#' mutate( -#' INCTOT = lbl_na_if( -#' INCTOT, -#' ~ grepl("Missing|N.I.U.", .lbl) -#' ), -#' HEALTH = as_factor(HEALTH) -#' ) -#' } -#' ), -#' chunk_size = 1000, -#' verbose = FALSE -#' ) +#' ipums_example("cps_00160.xml"), +#' IpumsBiglmCallback$new( +#' INCTOT ~ AGE + HEALTH, # Model formula +#' function(x, pos) { +#' x %>% +#' mutate( +#' INCTOT = lbl_na_if( +#' INCTOT, +#' ~ grepl("Missing|N.I.U.", .lbl) +#' ), +#' HEALTH = as_factor(HEALTH) +#' ) +#' } +#' ), +#' chunk_size = 1000, +#' verbose = FALSE +#' ) #' -#' summary(lm_results) +#' summary(lm_results) #' } read_ipums_micro_chunked <- function( ddi, diff --git a/R/viewer.R b/R/viewer.R index bf62c6d5..680e29cc 100644 --- a/R/viewer.R +++ b/R/viewer.R @@ -61,8 +61,10 @@ ipums_view <- function(x, out_file = NULL, launch = TRUE) { if (is.null(out_file)) { if (!launch) { rlang::warn(c( - paste0("Some operating systems may have trouble opening an HTML ", - "file from a temporary directory."), + paste0( + "Some operating systems may have trouble opening an HTML ", + "file from a temporary directory." + ), "i" = "Use `out_file` to specify an alternate output location." )) } diff --git a/README.Rmd b/README.Rmd index f4728d6a..5b1c4da4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -53,7 +53,7 @@ remotes::install_github("ipums/ipumsr") ## What is IPUMS? -[IPUMS](https://www.ipums.org/mission-purpose) is the world's largest +[IPUMS](https://www.ipums.org) is the world's largest publicly available population database, providing census and survey data from around the world integrated across time and space. IPUMS integration and documentation make it easy to study change, conduct @@ -61,7 +61,7 @@ comparative research, merge information across data types, and analyze individuals within family and community context. Data and services are available free of charge. -IPUMS consists of multiple projects, or collections, that provide +IPUMS consists of multiple projects, or *collections*, that provide different data products. - **Microdata** projects distribute data for individual survey units, @@ -71,7 +71,7 @@ statistics for particular geographic units along with corresponding GIS mapping files. ipumsr supports different levels of functionality for each IPUMS project, as -summarized in the following table: +summarized in the table below. ```{r} #| echo: false @@ -88,90 +88,90 @@ tbl_config <- list( list( img = "", proj = "IPUMS USA", - type = "Microdata", - desc = "U.S. Census and American Community Survey microdata (1850-present)", - read = checkmark(), - request = checkmark(), + type = "Microdata", + desc = "U.S. Census and American Community Survey microdata (1850-present)", + read = checkmark(), + request = checkmark(), metadata = "" ), list( img = "", - proj = "IPUMS CPS", - type = "Microdata", - desc = "Current Population Survey microdata including basic monthly surveys and supplements (1962-present)", - read = checkmark(), - request = checkmark(), + proj = "IPUMS CPS", + type = "Microdata", + desc = "Current Population Survey microdata including basic monthly surveys and supplements (1962-present)", + read = checkmark(), + request = checkmark(), metadata = "" ), list( img = "", proj = "IPUMS International", - type = "Microdata", - desc = "Census microdata covering over 100 countries, contemporary and historical", - read = checkmark(), - request = checkmark(), + type = "Microdata", + desc = "Census microdata covering over 100 countries, contemporary and historical", + read = checkmark(), + request = checkmark(), metadata = "" ), list( img = "", - proj = "IPUMS NHGIS", - type = "Aggregate Data", - desc = "Tabular U.S. Census data and GIS mapping files (1790-present)", + proj = "IPUMS NHGIS", + type = "Aggregate Data", + desc = "Tabular U.S. Census data and GIS mapping files (1790-present)", read = checkmark(), - request = checkmark(), + request = checkmark(), metadata = checkmark() ), list( img = "", - proj = "IPUMS IHGIS", - type = "Aggregate Data", - desc = "Tabular and GIS data from population, housing, and agricultural censuses around the world", - read = "", - request = "", + proj = "IPUMS IHGIS", + type = "Aggregate Data", + desc = "Tabular and GIS data from population, housing, and agricultural censuses around the world", + read = "", + request = "", metadata = "" ), list( img = "", - proj = "IPUMS Time Use", - type = "Microdata", - desc = "Time use microdata from the U.S. (1930-present) and thirteen other countries (1965-present)", - read = checkmark(), - request = "", + proj = "IPUMS Time Use", + type = "Microdata", + desc = "Time use microdata from the U.S. (1930-present) and thirteen other countries (1965-present)", + read = checkmark(), + request = "", metadata = "" ), list( img = "", - proj = "IPUMS Health Surveys", - type = "Microdata", + proj = "IPUMS Health Surveys", + type = "Microdata", desc = paste0( "Microdata from the U.S. ", "National Health Interview Survey (NHIS) (1963-present) and ", "Medical Expenditure Panel Survey (MEPS) (1996-present)" ), - read = checkmark(), - request = "", + read = checkmark(), + request = "", metadata = "" ), list( img = "", - proj = "IPUMS Global Health", + proj = "IPUMS Global Health", type = "Microdata", desc = paste0( "Health survey microdata for low- and middle-income countries, including ", "harmonized data collections for Demographic and Health Surveys (DHS) ", "and Performance Monitoring for Action (PMA) surveys" ), - read = checkmark(), - request = "", + read = checkmark(), + request = "", metadata = "" ), list( img = "", - proj = "IPUMS Higher Ed", - type = "Microdata", - desc = "Survey microdata on the science and engineering workforce in the U.S. from 1993 to 2013", - read = checkmark(), - request = "", + proj = "IPUMS Higher Ed", + type = "Microdata", + desc = "Survey microdata on the science and engineering workforce in the U.S. from 1993 to 2013", + read = checkmark(), + request = "", metadata = "" ) ) @@ -196,25 +196,28 @@ knitr::kable( ipumsr uses the [IPUMS API](https://developer.ipums.org/) to submit data requests, download data extracts, and get metadata, so the scope of -ipumsr functionality generally corresponds to the [available API -functionality](https://developer.ipums.org/docs/v2/apiprogram/apis/). As +functionality generally corresponds to that [available via the API](https://developer.ipums.org/docs/v2/apiprogram/apis/). As the IPUMS team extends the API to support more functionality for more projects, we aim to extend ipumsr capabilities accordingly. ## Getting started If you're new to IPUMS data, learn more about what's available through -the [IPUMS Projects Overview](https://www.ipums.org/overview). +the [IPUMS Projects Overview](https://www.ipums.org/overview). Then, see +`vignette("ipums")` for an overview of how to obtain IPUMS data. -The package vignettes are the best place to learn about what's available in -ipumsr itself: +The package vignettes are the best place to explore what ipumsr has to offer: - To read IPUMS data extracts into R, see `vignette("ipums-read")`. -- To interact with the IPUMS extract system via the IPUMS API, see - `vignette("ipums-api")`. + +- To interact with the IPUMS extract and metadata system via the IPUMS API, + see `vignette("ipums-api")`. + - For additional details about microdata and NHGIS extract requests, see `vignette("ipums-api-micro")` and `vignette("ipums-api-nhgis")`. + - To work with labelled values in IPUMS data, see `vignette("value-labels")`. + - For techniques for working with large data extracts, see `vignette("ipums-bigdata")`. @@ -243,9 +246,9 @@ We greatly appreciate feedback and development contributions. Please submit any bug reports, pull requests, or other suggestions on [GitHub](https://github.com/ipums/ipumsr/issues). Before contributing, please be sure to read the [Contributing -Guidelines](https://github.com/ipums/ipumsr/blob/master/CONTRIBUTING.md) -and the [Code of -Conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md). +Guidelines](https://tech.popdata.org/ipumsr/CONTRIBUTING.html) +and the +[Code of Conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html). If you have general questions or concerns about IPUMS data, check out our [user forum](https://forum.ipums.org) or send an email to diff --git a/README.md b/README.md index a4c61cbe..59288cba 100644 --- a/README.md +++ b/README.md @@ -42,15 +42,15 @@ remotes::install_github("ipums/ipumsr") ## What is IPUMS? -[IPUMS](https://www.ipums.org/mission-purpose) is the world’s largest -publicly available population database, providing census and survey data -from around the world integrated across time and space. IPUMS -integration and documentation make it easy to study change, conduct -comparative research, merge information across data types, and analyze -individuals within family and community context. Data and services are -available free of charge. - -IPUMS consists of multiple projects, or collections, that provide +[IPUMS](https://www.ipums.org) is the world’s largest publicly available +population database, providing census and survey data from around the +world integrated across time and space. IPUMS integration and +documentation make it easy to study change, conduct comparative +research, merge information across data types, and analyze individuals +within family and community context. Data and services are available +free of charge. + +IPUMS consists of multiple projects, or *collections*, that provide different data products. - **Microdata** projects distribute data for individual survey units, @@ -60,7 +60,7 @@ different data products. GIS mapping files. ipumsr supports different levels of functionality for each IPUMS -project, as summarized in the following table: +project, as summarized in the table below. @@ -298,26 +298,31 @@ from 1993 to 2013 ipumsr uses the [IPUMS API](https://developer.ipums.org/) to submit data requests, download data extracts, and get metadata, so the scope of -ipumsr functionality generally corresponds to the [available API -functionality](https://developer.ipums.org/docs/v2/apiprogram/apis/). As -the IPUMS team extends the API to support more functionality for more -projects, we aim to extend ipumsr capabilities accordingly. +functionality generally corresponds to that [available via the +API](https://developer.ipums.org/docs/v2/apiprogram/apis/). As the IPUMS +team extends the API to support more functionality for more projects, we +aim to extend ipumsr capabilities accordingly. ## Getting started If you’re new to IPUMS data, learn more about what’s available through -the [IPUMS Projects Overview](https://www.ipums.org/overview). +the [IPUMS Projects Overview](https://www.ipums.org/overview). Then, see +`vignette("ipums")` for an overview of how to obtain IPUMS data. -The package vignettes are the best place to learn about what’s available -in ipumsr itself: +The package vignettes are the best place to explore what ipumsr has to +offer: - To read IPUMS data extracts into R, see `vignette("ipums-read")`. -- To interact with the IPUMS extract system via the IPUMS API, see - `vignette("ipums-api")`. + +- To interact with the IPUMS extract and metadata system via the IPUMS + API, see `vignette("ipums-api")`. + - For additional details about microdata and NHGIS extract requests, see `vignette("ipums-api-micro")` and `vignette("ipums-api-nhgis")`. + - To work with labelled values in IPUMS data, see `vignette("value-labels")`. + - For techniques for working with large data extracts, see `vignette("ipums-bigdata")`. @@ -346,9 +351,8 @@ We greatly appreciate feedback and development contributions. Please submit any bug reports, pull requests, or other suggestions on [GitHub](https://github.com/ipums/ipumsr/issues). Before contributing, please be sure to read the [Contributing -Guidelines](https://github.com/ipums/ipumsr/blob/master/CONTRIBUTING.md) -and the [Code of -Conduct](https://github.com/ipums/ipumsr/blob/master/CONDUCT.md). +Guidelines](https://tech.popdata.org/ipumsr/CONTRIBUTING.html) and the +[Code of Conduct](https://tech.popdata.org/ipumsr/CODE_OF_CONDUCT.html). If you have general questions or concerns about IPUMS data, check out our [user forum](https://forum.ipums.org) or send an email to diff --git a/docs/CONDUCT.html b/docs/CONDUCT.html deleted file mode 100644 index d3f87ab2..00000000 --- a/docs/CONDUCT.html +++ /dev/null @@ -1,112 +0,0 @@ - -Contributor Code of Conduct • ipumsr - Skip to contents - - -
-
-
- -
- -

As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.

-

We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

-

Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.

-

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.

-

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.

-

This Code of Conduct is adapted from the Contributor Covenant (http:contributor-covenant.org), version 1.0.0, available at http://contributor-covenant.org/version/1/0/0/

-
- -
- - -
- - - - - - - diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 903aef60..08651bbe 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -84,7 +84,7 @@
-

Thank you for considering improving this project! By participating, you agree to abide by the code of conduct.

+

Thank you for considering improving this project! By participating, you agree to abide by the code of conduct.

Issues (Reporting a problem or suggestion)

@@ -93,8 +93,9 @@

Issues (Reporting a problem or

Pull Requests (Making changes to the package)

-

We appreciate pull requests that follow these guidelines: 1) Make sure that tests pass (and add new ones if possible).

-
  1. Do your best to conform to the code style of the package, currently based on the tidyverse style guide. See the styler package to easily catch stylistic errors.

  2. +

    We appreciate pull requests that follow these guidelines:

    +
    1. Make sure that tests pass (and add new ones if possible).

    2. +
    3. Do your best to conform to the code style of the package, currently based on the tidyverse style guide. See the styler package to easily catch stylistic errors.

    4. Please add you name and affiliation to the NOTICE.txt file.

    5. Summarize your changes in the NEWS.md file.

    diff --git a/docs/articles/cps_select_data.jpg b/docs/articles/cps_select_data.jpg new file mode 100644 index 00000000..1b2c8879 Binary files /dev/null and b/docs/articles/cps_select_data.jpg differ diff --git a/docs/articles/ipums-api-micro.html b/docs/articles/ipums-api-micro.html index 5de7916b..98ff9856 100644 --- a/docs/articles/ipums-api-micro.html +++ b/docs/articles/ipums-api-micro.html @@ -144,8 +144,8 @@

    Supported microdata collectionsIn addition to microdata projects, the IPUMS API also supports IPUMS NHGIS data. For details about obtaining IPUMS NHGIS data using ipumsr, see the NHGIS-specific vignette.

    -

    Before getting started, we’ll load ipumsr and dplyr, which will be helpful for -this demo:

    +

    Before getting started, we’ll load ipumsr and dplyr, +which will be helpful for this demo:

    @@ -203,7 +203,7 @@

    IPUMS microdata metadata (forthcom
     ipumsi_samps <- get_sample_info("ipumsi")
     
    -ipumsi_samps %>% 
    +ipumsi_samps %>%
       filter(grepl("Mexico", description))
     #> # A tibble: 70 × 2
     #>    name    description       
    @@ -426,8 +426,8 @@ 

    Case selections # For detailed case selection, change the `case_selection_type` var_spec( - "RACE", - case_selections = c("811", "812"), + "RACE", + case_selections = c("811", "812"), case_selection_type = "detailed" ) #> $name @@ -478,7 +478,8 @@

    Attached characteristicsattached_characteristics argument of var_spec().

    -

    For instance, to attach the spouse’s SEX value to a record:

    +

    For instance, to attach the spouse’s SEX value to a +record:

     var_spec("SEX", attached_characteristics = "spouse")
     #> $name
    @@ -489,9 +490,9 @@ 

    Attached characteristics#> #> attr(,"class") #> [1] "var_spec" "ipums_spec" "list"

    -

    This will add a new variable (in this case, SEX_SP) to the output -data that will contain the sex of a person’s spouse (if no such record -exists, the value will be 0).

    +

    This will add a new variable (in this case, SEX_SP) to +the output data that will contain the sex of a person’s spouse (if no +such record exists, the value will be 0).

    Multiple attached characteristics can be attached for a single variable:

    -

    This will produce a new variable (QRACE) containing the data quality -flag for the given variable.

    +

    This will produce a new variable (QRACE) containing the +data quality flag for the given variable.

    To add data quality flags for all variables that have them, set data_quality_flags = TRUE in your extract definition directly:

    diff --git a/docs/articles/ipums-api-nhgis.html b/docs/articles/ipums-api-nhgis.html index a67fe991..79716f76 100644 --- a/docs/articles/ipums-api-nhgis.html +++ b/docs/articles/ipums-api-nhgis.html @@ -188,14 +188,14 @@

    Summary metadata#> 4 1820_cPop 1820 Census Population Data [US, States & Counties] 401 #> 5 1830_cPop 1830 Census Population Data [US, States & Counties] 501 #> 6 1840_cAg 1840 Census Agriculture Data [US, States & Counties] 601

    -

    We can use basic functions from dplyr to filter the metadata to -those records of interest. For instance, if we wanted to find all the -data sources related to agriculture from the 1900 Census, we could -filter on group and description:

    +

    We can use basic functions from dplyr to filter the +metadata to those records of interest. For instance, if we wanted to +find all the data sources related to agriculture from the 1900 Census, +we could filter on group and description:

    -ds %>% 
    +ds %>%
       filter(
    -    group == "1900 Census", 
    +    group == "1900 Census",
         grepl("Agriculture", description)
       )
     #> # A tibble: 2 × 4
    @@ -249,8 +249,9 @@ 

    Summary metadata#> <chr> <chr> <int> #> 1 state State 4 #> 2 county State--County 25

    -

    To filter on these columns, we can use map_lgl() from purrr. For instance, to find all -time series tables that include data from a particular year:

    +

    To filter on these columns, we can use map_lgl() from +purrr. For instance, to find all time series tables that +include data from a particular year:

     # Iterate over each `years` entry, identifying whether that entry
     # contains "1840" in its `name` column.
    @@ -262,9 +263,8 @@ 

    Summary metadata#> 1 A00 Total Population Nominal 100. <tibble> <tibble> #> 2 A08 Persons by Sex [2] Nominal 102. <tibble> <tibble> #> # ℹ 1 more variable: geog_levels <list>

    -

    For more details on working with nested data frames, see the -documentation for dplyr and -purrr.

    +

    For more details on working with nested data frames, see this tidyr +article.

    Detailed metadata @@ -275,8 +275,9 @@

    Detailed metadata
     cAg_meta <- get_metadata_nhgis(dataset = "1900_cAg")

    This provides a comprehensive list of the possible specifications for -the input data source. For instance, for the 1900_cAg dataset, we have -66 tables to choose from, and 3 possible geographic levels:

    +the input data source. For instance, for the 1900_cAg +dataset, we have 66 tables to choose from, and 3 possible geographic +levels:

     cAg_meta$data_tables
     #> # A tibble: 66 × 4
    @@ -352,9 +353,9 @@ 

    Defining an IPUMS NHGIS extract

    Basic extract definitions

    Let’s say we’re interested in getting state-level data on the number -of farms and their average size from the 1900_cAg dataset that we -identified above. As we can see in the metadata, these data are -contained in tables NT2 and NT3:

    +of farms and their average size from the 1900_cAg dataset +that we identified above. As we can see in the metadata, these data are +contained in tables NT2 and NT3:

     cAg_meta$data_tables
     #> # A tibble: 66 × 4
    @@ -371,6 +372,9 @@ 

    Basic extract definitions#> 9 NT9 AYL Farms with Buildings 9 #> 10 NT10 AWT Acres of Farmland 10 #> # ℹ 56 more rows

    +
    +

    Dataset specifications +

    To request these data, we need to make an explicit dataset specification. All datasets must be associated with a selection of data tables and geographic levels. We can use the ds_spec() @@ -380,8 +384,8 @@

    Basic extract definitions
     dataset <- ds_spec(
    -  "1900_cAg", 
    -  data_tables = c("NT1", "NT2"), 
    +  "1900_cAg",
    +  data_tables = c("NT1", "NT2"),
       geog_levels = "state"
     )
     
    @@ -406,9 +410,13 @@ 

    Basic extract definitions#> Dataset: 1900_cAg #> Tables: NT1, NT2 #> Geog Levels: state

    -

    (Dataset specifications can also include selections for +

    Dataset specifications can also include selections for years and breakdown_values, but these are not -available for all datasets.)

    +available for all datasets.

    +

    +
    +

    Time series table specifications +

    Similarly, to make a request for time series tables, use the tst_spec() helper. This makes a tst_spec object containing a time series table specification.

    @@ -419,7 +427,7 @@

    Basic extract definitionsdefine_extract_nhgis( description = "Example time series table request", time_series_tables = tst_spec( - "CW3", + "CW3", geog_levels = c("county", "tract"), years = c("1990", "2000") ) @@ -430,10 +438,29 @@

    Basic extract definitions#> Time Series Table: CW3 #> Geog Levels: county, tract #> Years: 1990, 2000

    +
+
+

Shapefile specifications +

+

Shapefiles don’t have any additional specification options, and +therefore can be requested simply by providing their names:

+
+define_extract_nhgis(
+  description = "Example shapefiles request",
+  shapefiles = c("us_county_2021_tl2021", "us_county_2020_tl2020")
+)
+#> Unsubmitted IPUMS NHGIS extract 
+#> Description: Example shapefiles request
+#> 
+#> Shapefiles: us_county_2021_tl2021, us_county_2020_tl2020
+
+
+

Invalid specifications +

An attempt to define an extract that does not have all the required specifications for a given dataset or time series table will throw an error:

-
+
 define_extract_nhgis(
   description = "Invalid extract",
   datasets = ds_spec("1900_STF1", data_tables = "NP1")
@@ -442,20 +469,10 @@ 

Basic extract definitions#> ! Invalid `ds_spec` specification: #> `geog_levels` must not contain missing values.

Note that it is still possible to make invalid extract requests (for -instance, by requesting a dataset or table that doesn’t exist). This -kind of issue will be caught upon submission to the API, not upon the -creation of the extract definition.

-

Shapefiles don’t have any additional specification options, and -therefore can be requested simply by providing their names:

-
-define_extract_nhgis(
-  description = "Example shapefiles request",
-  shapefiles = c("us_county_2021_tl2021", "us_county_2020_tl2020")
-)
-#> Unsubmitted IPUMS NHGIS extract 
-#> Description: Example shapefiles request
-#> 
-#> Shapefiles: us_county_2021_tl2021, us_county_2020_tl2020
+instance, by requesting a dataset or data table that doesn’t exist). +This kind of issue will be caught upon submission to the API, not upon +the creation of the extract definition.

+

More complicated extract definitions @@ -489,8 +506,8 @@

More complicated extract definitio easier to generate the specifications independently before creating your extract request object. You can quickly create multiple ds_spec objects by iterating across the specifications you -want to include. Here, we use purrr to do so, but you could also use a -for loop:

+want to include. Here, we use purrr to do so, but you +could also use a for loop:

 ds_names <- c("2019_ACS1", "2018_ACS1")
 tables <- c("B01001", "B01002")
@@ -500,11 +517,7 @@ 

More complicated extract definitio # data tabels and geog levels indicated above datasets <- purrr::map( ds_names, - ~ ds_spec( - name = .x, - data_tables = tables, - geog_levels = geogs - ) + ~ ds_spec(name = .x, data_tables = tables, geog_levels = geogs) ) nhgis_ext <- define_extract_nhgis( diff --git a/docs/articles/ipums-api.html b/docs/articles/ipums-api.html index 80df8a5e..4699e070 100644 --- a/docs/articles/ipums-api.html +++ b/docs/articles/ipums-api.html @@ -122,12 +122,14 @@

The IPUMS API provides two asset types, both of which are supported by ipumsr:

    -
  • IPUMS extract endpoints can be used to submit +

  • +IPUMS extract endpoints can be used to submit extract requests for processing and download completed extract -files.

  • -
  • IPUMS metadata endpoints can be used to discover +files.

  • +
  • +IPUMS metadata endpoints can be used to discover and explore available IPUMS data as well as retrieve codes, names, and -other extract parameters necessary to form extract requests.

  • +other extract parameters necessary to form extract requests.

Use of the IPUMS API enables the adoption of a programmatic workflow that can help users to:

@@ -565,8 +567,8 @@

Share an extract definitionOne exciting feature enabled by the IPUMS API is the ability to share a standardized extract definition with other IPUMS users so that they can create an identical extract request themselves. The terms of use for -most IPUMS collections prohibit the redistribution of IPUMS data, but -don’t prohibit sharing data extract definitions.

+most IPUMS collections prohibit the public redistribution of IPUMS data, +but don’t prohibit the sharing of data extract definitions.

ipumsr facilitates this type of sharing with save_extract_as_json() and define_extract_from_json(), which read and write diff --git a/docs/articles/ipums-bigdata.html b/docs/articles/ipums-bigdata.html index f428736d..5385618c 100644 --- a/docs/articles/ipums-bigdata.html +++ b/docs/articles/ipums-bigdata.html @@ -121,8 +121,8 @@

Browsing for IPUMS data can be a little like grocery shopping when you’re hungry—you show up to grab a couple things, but everything looks -so good that you end up with an overflowing cart1. Unfortunately, this +so good that you end up with an overflowing cart.1 Unfortunately, this can lead to extracts so large that they don’t fit in your computer’s memory.

If you’ve got an extract that’s too big, both the IPUMS website and @@ -208,8 +208,9 @@

Select cases#> #> Samples: (1 total) us2013a #> Variables: (2 total) MARST, SEX

-

If you’re using the online interface, the “Select Cases” option will -be available on the last page before submitting an extract request.

+

If you’re using the online interface, the Select +Cases option will be available on the last page before +submitting an extract request.

Use a sampled subset of the data @@ -217,11 +218,12 @@

Use a sampled subset of the dataYet another option (also only for microdata projects) is to take a random subsample of the data before producing your extract.

Sampled data is not available via the IPUMS API, but you can use the -“Customize Sample Size” option in the online interface to do so. This -also appears on the final page before submitting an extract request.

-

If you’ve already submitted the extract, you can click the “REVISE” -link on the “Download or Revise Extracts” page to access these features -and produce a new data extract.

+Customize Sample Size option in the online interface to +do so. This also appears on the final page before submitting an extract +request.

+

If you’ve already submitted the extract, you can click the +REVISE link on the Download or Revise Extracts +page to access these features and produce a new data extract.

@@ -230,14 +232,16 @@

Option 3: Process the data in piece

ipumsr provides two related options for reading data sources in increments:

Reading chunked data @@ -262,10 +266,9 @@

Chunked tabulationImagine we wanted to find the percent of people in the workforce grouped by their self-reported health. Since our example extract is small enough to fit in memory, we could load the full dataset with -read_ipums_micro(), relabel the EMPSTAT -variable into a binary variable (see -vignette("value-labels")), and count the people in each -group.

+read_ipums_micro(), use lbl_relabel() to +relabel the EMPSTAT variable into a binary variable, and +count the people in each group.

 read_ipums_micro(cps_ddi_file, verbose = FALSE) %>%
   mutate(
@@ -345,11 +348,11 @@ 

Chunked tabulationIn this case, we want to row-bind the data frames returned by cb_function(), so we use IpumsDataFrameCallback.

-

Callback objects are R6 objects, but you -don’t need to be familiar with R6 to use them2. To initialize a -callback object, simply use $new():

+which is available for free online.</p>'>2 To initialize a callback object, simply use +$new():

 cb <- IpumsDataFrameCallback$new(cb_function)

At this point, we’re ready to load the data in chunks. We use @@ -715,15 +718,17 @@

Option 4: Use a database -
    -
  • Importing data into the database

  • -
  • Connecting the database to R

  • -
-

R has several tools that support database integration, including DBI, dbplyr, sparklyr, sparkR, bigrquery, and others. In this -example, we’ll use RSQLite to load the data into an in-memory database. -(We use RSQLite because it is easy to set up, but it is likely not -efficient enough to fully resolve issues with large IPUMS data, so it -may be wise to consider an alternative in practice.)

+
    +
  1. Importing data into the database
  2. +
  3. Connecting the database to R
  4. +
+

R has several tools that support database integration, including +DBI, dbplyr, sparklyr, +bigrquery, and others. In this example, we’ll use +RSQLite to load the data into an in-memory database. (We +use RSQLite because it is easy to set up, but it is likely not efficient +enough to fully resolve issues with large IPUMS data, so it may be wise +to consider an alternative in practice.)

Importing data into the database

@@ -733,9 +738,9 @@

Importing data into the databasechunked function to load the data into a database without needing to store the entire dataset in R.

-

(For more about rectangular vs. hierarchical extracts, see the -“Hierarchical extracts” section of -vignette("ipums-read").)

+

See the IPUMS data +reading vignette for more about rectangular vs. hierarchical +extracts.

 library(DBI)
 library(RSQLite)
@@ -830,8 +835,8 @@ 

Connecting to a database with dbpl #> 10 10 October #> 11 11 November #> 12 12 December

-

For more about variable metadata in IPUMS data, see -vignette("value-labels").

+

See the value labels vignette more +about variable metadata in IPUMS data.

Reading microdata extracts @@ -292,13 +292,13 @@

Reading microdata extracts

Hierarchical extracts

-

IPUMS microdata can come in either “rectangular” or “hierarchical” -format.

+

IPUMS microdata can come in either rectangular or +hierarchical format.

Rectangular data are transformed such that every row of data represents the same type of record. For instance, each row will represent a person record, and all household-level information for that -person will be included in the same row. (This is the case for the CPS -example above.)

+person will be included in the same row. (This is the case for +cps_data shown in the example above.)

Hierarchical data have records of different types interspersed in a single file. For instance, a household record will be included in its own row followed by the person records associated with that @@ -324,11 +324,11 @@

Hierarchical extracts#> 9 H [Househ… 1962 84 3 [Mar… 1790. 27 [Min… NA NA NA #> 10 P [Person… 1962 84 NA NA NA 1 1790. 6.38e3 #> # ℹ 11,043 more rows

-

The long format consists of a single data.frame that -includes rows with varying record types. In this example, some rows have -a record type of “Household” and others have a record type of “Person”. -Variables that do not apply to a particular record type will be filled -with NA in rows of that record type.

+

The long format consists of a single tibble +that includes rows with varying record types. In this example, some rows +have a record type of “Household” and others have a record type of +“Person”. Variables that do not apply to a particular record type will +be filled with NA in rows of that record type.

To read data in list format, use read_ipums_micro_list(). This function returns a list where each element contains all the records for a given record type:

@@ -423,12 +423,13 @@

Reading IPUMS NHGIS extracts#> #> $var_desc #> [1] "Table D6Z: Year Structure Built (Universe: Housing Units)"

-

Variable metadata for NHGIS data are slightly different than those -provided by microdata products. First, they come from a .txt codebook -file rather than an .xml DDI file. Codebooks can still be loaded into an -ipums_ddi object, but fields that do not apply to aggregate -data will be empty. In general, NHGIS codebooks provide only variable -labels and descriptions, along with citation information.

+

However, variable metadata for NHGIS data are slightly different than +those provided by microdata products. First, they come from a .txt +codebook file rather than an .xml DDI file. Codebooks can still be +loaded into an ipums_ddi object, but fields that do not +apply to aggregate data will be empty. In general, NHGIS codebooks +provide only variable labels and descriptions, along with citation +information.

-

By design, NHGIS codebooks are human-readable. To view the codebook -contents themselves without converting to an ipums_ddi -object, set raw = TRUE.

+

By design, NHGIS codebooks are human-readable, and it may be easier +to interpret their contents in raw format. To view the codebook itself +without converting to an ipums_ddi object, set +raw = TRUE.

 nhgis_cb <- read_nhgis_codebook(nhgis_ex1, raw = TRUE)
 
@@ -479,12 +481,9 @@ 

Reading IPUMS NHGIS extracts

Handling multiple files

-

In the above example, read_nhgis_codebook() was able to -identify and load the codebook file, even though the provided file path -is the same that was provided to read_nhgis() earlier. -However, for more complicated NHGIS extracts that include data from -multiple data sources, the provided .zip archive will contain multiple -codebook and data files.

+

For more complicated NHGIS extracts that include data from multiple +data sources, the provided .zip archive will contain multiple codebook +and data files.

You can view the files contained in an extract to determine if this is the case:

@@ -503,7 +502,6 @@ 

Handling multiple files
 nhgis_data2 <- read_nhgis(nhgis_ex2, file_select = contains("nation"))
-
 nhgis_data3 <- read_nhgis(nhgis_ex2, file_select = contains("ts_nominal_state"))

The matching codebook should automatically be loaded and attached to the data:

@@ -539,7 +537,7 @@

NHGIS data formats

CSV data

-

NHGIS data are most easily handled when in .csv format. +

NHGIS data are most easily handled in .csv format. read_nhgis() uses readr::read_csv() to handle the generation of column type specifications. If the guessed specifications are incorrect, you can use the col_types @@ -611,12 +609,10 @@

Fixed-width data#> # A00AA1940 <dbl>, A00AA1950 <dbl>, A00AA1960 <dbl>, A00AA1970 <dbl>, #> # A00AA1980 <dbl>, A00AA1990 <dbl>, A00AA2000 <dbl>, A00AA2010 <dbl>, #> # A00AA2020 <dbl>

-

Note that in this case numeric geographic codes are correctly loaded -as character variables. The correct parsing of NHGIS fixed-width files -is driven by the column parsing information contained in the .do file -provided in the .zip archive. This contains information not only about -column positions and data types, but also implicit decimals in the -data.

+

The correct parsing of NHGIS fixed-width files is driven by the +column parsing information contained in the .do file provided in the +.zip archive. This contains information not only about column positions +and data types, but also implicit decimals in the data.

If you no longer have access to the .do file, it is best to resubmit and/or re-download the extract (you may also consider converting to .csv format in the process). If you have moved the .do file, provide its file @@ -636,25 +632,21 @@

Reading spatial dataread_ipums_sf() to load spatial data from any of -these sources (ipumsr is phasing out support for objects from the -sp package. If you prefer to work with these objects, use -sf::as_Spatial() to convert from sf to -sp).

+these sources as an sf object from sf.

read_ipums_sf() also supports the loading of spatial files within .zip archives and the file_select syntax for -file selection (we don’t need file_select in this example -because there is only one shapefile in this example extract).

+file selection when multiple internal files are present.

 nhgis_shp_file <- ipums_example("nhgis0972_shape_small.zip")
 
@@ -677,8 +669,8 @@ 

Reading spatial data#> 6 1640 1642 21 G1640 1640 5608404797. 415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>

These data can then be joined to associated tabular data. To preserve -IPUMS attributes from the tabular data used in the join, use -anipums_shape_*_join function:

+IPUMS attributes from the tabular data used in the join, use an +ipums_shape_*_join() function:

 joined_data <- ipums_shape_left_join(
   nhgis_data,
@@ -692,12 +684,12 @@ 

Reading spatial data#> #> $var_desc #> [1] ""

-

For NHGIS data, the join code typically corresponds to the “GISJOIN” -variable. However, for microdata projects, the variable name used for a -geographic level in the tabular data may differ from that in the spatial -data. Consult the documentation and metadata for these files to identify -the correct join columns and use the by argument to join on -these columns.

+

For NHGIS data, the join code typically corresponds to the +GISJOIN variable. However, for microdata projects, the +variable name used for a geographic level in the tabular data may differ +from that in the spatial data. Consult the documentation and metadata +for these files to identify the correct join columns and use the +by argument to join on these columns.

Once joined, data include both statistical and spatial information along with the variable metadata.

@@ -707,11 +699,11 @@

Harmonized vs. non-harmonized data< that geographic boundaries shift over time. IPUMS therefore provides multiple types of spatial data:

    -
  • Harmonized (also called “integrated” or “consistent”) files have +

  • Harmonized (also called “integrated” or “consistent”) files have been made consistent over time by combining geographies that share area -for different time periods.

  • -
  • Non-harmonized, or year-specific, files represent geographies at -a specific point in time.

  • +for different time periods. +
  • Non-harmonized, or year-specific, files represent geographies at a +specific point in time.

Furthermore, some NHGIS time series tables have been standardized such that the statistics have been adjusted to apply to a year-specific diff --git a/docs/articles/ipums.html b/docs/articles/ipums.html index ad89e620..ca80e422 100644 --- a/docs/articles/ipums.html +++ b/docs/articles/ipums.html @@ -119,9 +119,9 @@

-

This text provides an overview of how to find, request, download, and -read IPUMS data into R. For a general introduction to IPUMS and ipumsr, -see the ipumsr home +

This article provides an overview of how to find, request, download, +and read IPUMS data into R. For a general introduction to IPUMS and +ipumsr, see the ipumsr home page.

Obtaining IPUMS data @@ -143,24 +143,25 @@

Obtaining IPUMS datacertain -IPUMS projects, which also determines the functionality that ipumsr -can support.

+the IPUMS website or the IPUMS API. +ipumsr provides a set of client tools to interface with the API. Note +that only certain +IPUMS projects are currently supported by the IPUMS API.

Obtaining data via an IPUMS project website

-

To create a new extract request via an IPUMS project website, -navigate to the extract interface for the IPUMS project of interest by -clicking Select Data in the heading of the project -website. The project extract interface allows you to explore what’s -available, find documentation about data concepts and sources, and then +

To create a new extract request via an IPUMS project website (e.g. IPUMS CPS), navigate to the +extract interface for that project by clicking Select +Data in the heading of the project website.

+

+

The project’s extract interface allows you to explore what’s +available, find documentation about data concepts and sources, and specify the data you’d like to download. The data selection parameters will differ across projects; see each project’s documentation for more -details on the available options. If you’ve never created an extract for -the project you’re interested in, a good way to learn the basics is to -watch a project-specific video on creating extracts hosted on the IPUMS Tutorials +details on the available options.

+

If you’ve never created an extract for the project you’re interested +in, a good way to learn the basics is to watch a project-specific video +on creating extracts hosted on the IPUMS Tutorials page.

Downloading from microdata projects @@ -169,18 +170,18 @@

Downloading from microdata projects button to download the data file. Then, right-click the DDI link in the Codebook column, and select Save Link As… (see below).

+

Note that some browsers may display different text, but there should -be an option to download the DDI file as .xml. For instance, on Safari, -select Download Linked File As…. For ipumsr to read the -metadata, it is necessary to save the file in .xml format, +be an option to download the DDI file as .xml. (For instance, on Safari, +select Download Linked File As….) For ipumsr to read +the metadata, you must save the file in .xml format, not .html format.

-

Downloading from aggregate data projects

Aggregate data projects include data and metadata together in a -single .zip archive file. To download them, simply click on the green +single .zip archive. To download them, simply click on the green Tables button (for tabular data) and/or GIS Files button (for spatial boundary or location data) in the Download Data column.

@@ -190,27 +191,53 @@

Downloading from aggregate dat

Obtaining data via the IPUMS API

Users can also create and submit extract requests within R by using -ipumsr functions that interface with the IPUMS API. The IPUMS API -currently supports access to the extract system for the following +ipumsr functions that interface with the IPUMS API. The IPUMS API +currently supports access to the extract system for certain +IPUMS collections.

+
+

Extract support +

+

ipumsr provides an interface to the IPUMS extract system via the +IPUMS API for the following collections:

+
    +
  • IPUMS USA
  • +
  • IPUMS CPS
  • +
  • IPUMS International
  • +
  • IPUMS NHGIS
  • +
+
+
+

Metadata support +

+

ipumsr provides access to comprehensive metadata via the IPUMS API +for the following collections:

+
    +
  • IPUMS NHGIS
  • +
+

Users can query NHGIS metadata to explore available data when +specifying NHGIS extract requests.

+

A listing of available samples is provided for the following collections:

    -
  • IPUMS USA

  • -
  • IPUMS CPS

  • -
  • IPUMS International

  • -
  • IPUMS NHGIS

  • +
  • IPUMS USA
  • +
  • IPUMS CPS
  • +
  • IPUMS International
-

The IPUMS API and ipumsr also support access to IPUMS NHGIS metadata, -so users can query NHGIS metadata in R to explore what data are -available and specify NHGIS data requests. At this time, creating -requests for microdata generally requires using the corresponding -project websites to find samples and variables of interest and obtain -their identifiers for use in R extract definitions.

+

Increased access to metadata for these projects is in progress. +Currently, creating extract requests for these projects requires using +the corresponding project websites to find samples and variables of +interest and obtain their API identifiers for use in R extract +definitions.

+
+
+

Workflow +

Once you have identified the data you would like to request, the -workflow for requesting and downloading data via API is straightforward. -First, define the parameters of your extract. The available extract +workflow for requesting and downloading data via API is +straightforward.

+

First, define the parameters of your extract. The available extract definition options will differ by IPUMS data collection. See the microdata API request and NHGIS API request vignettes for more -details on defining an extract. (The NHGIS vignette also discusses how -to access NHGIS metadata.)

+details on defining an extract.

 cps_extract_request <- define_extract_cps(
   description = "2018-2019 CPS Data",
@@ -230,16 +257,17 @@ 

Obtaining data via the IPUMS API
-submitted_extract <- submit_extract(extract_request)
+submitted_extract <- submit_extract(cps_extract_request)
 downloadable_extract <- wait_for_extract(submitted_extract)
 data_files <- download_extract(downloadable_extract)

You can also get the specifications of your previous extract requests, even if they weren’t made with the API:

 past_extracts <- get_extract_history("nhgis")
-

See the introduction to the IPUMS API for R -users for more details about how to use ipumsr to interact with the -IPUMS API.

+

See the introduction to the IPUMS API +for more details about how to use ipumsr to interact with the IPUMS +API.

+

@@ -247,18 +275,23 @@

Reading IPUMS datareadr in two ways:

+functions expand on those provided in readr in two +ways:

    -
  • ipumsr anticipates standard IPUMS file structures, limiting the -need for users to manually extract and organize their downloaded files -before reading.

  • -
  • ipumsr uses an extract’s metadata files to automatically attach +

  • ipumsr anticipates standard IPUMS file structures, limiting the need +for users to manually extract and organize their downloaded files before +reading.
  • +
  • ipumsr uses an extract’s metadata files to automatically attach contextual information to the data. This allows users to easily identify -variable names, variable descriptions, and labeled data values (from haven), which are common in -IPUMS files.

  • +variable names, variable descriptions, and labeled data values (from +haven), which are common in IPUMS files.
-

For microdata files, use the read_ipums_micro_*() -family:

+

File loading is covered in depth in the reading IPUMS data vignette.

+
+

Microdata files +

+

For microdata files, use the read_ipums_micro_*() family +with the DDI (.xml) metadata file for your extract:

+
+
+

NHGIS files +

For NHGIS files, use read_nhgis():

 nhgis_file <- ipums_example("nhgis0972_csv.zip")
-nhgis_data <- read_nhgis(nhgis_file)
-#> Use of data from NHGIS is subject to conditions including that users should cite the data appropriately. Use command `ipums_conditions()` for more details.
-#> Rows: 71 Columns: 25
-#> ── Column specification ────────────────────────────────────────────────────────
-#> Delimiter: ","
-#> chr  (9): GISJOIN, STUSAB, CMSA, PMSA, PMSAA, AREALAND, AREAWAT, ANPSADPI, F...
-#> dbl (13): YEAR, MSA_CMSAA, INTPTLAT, INTPTLNG, PSADC, D6Z001, D6Z002, D6Z003...
-#> lgl  (3): DIVISIONA, REGIONA, STATEA
-#> 
-#>  Use `spec()` to retrieve the full column specification for this data.
-#>  Specify the column types or set `show_col_types = FALSE` to quiet this message.
+nhgis_data <- read_nhgis(nhgis_file, verbose = FALSE)
 
 head(nhgis_data)
 #> # A tibble: 6 × 25
@@ -303,6 +330,10 @@ 

Reading IPUMS data#> # FUNCSTAT <chr>, INTPTLAT <dbl>, INTPTLNG <dbl>, PSADC <dbl>, D6Z001 <dbl>, #> # D6Z002 <dbl>, D6Z003 <dbl>, D6Z004 <dbl>, D6Z005 <dbl>, D6Z006 <dbl>, #> # D6Z007 <dbl>, D6Z008 <dbl>

+
+
+

Spatial boundary files +

ipumsr also supports the reading of IPUMS shapefiles (spatial boundary and location files) into the sf format provided by the sf package:

@@ -326,15 +357,17 @@

Reading IPUMS data#> 5 0080 1692 28 G0080 0080 2401347006. 218892. G16920080 #> 6 1640 1642 21 G1640 1640 5608404797. 415671. G16421640 #> # ℹ 1 more variable: geometry <MULTIPOLYGON [m]>

+

+
+

Ancillary files +

ipumsr is primarily designed to read data produced by the IPUMS extract system. However, IPUMS does distribute other files, often available via direct download. In many cases, these can be loaded with ipumsr. Otherwise, these files can likely be handled by existing data -reading packages like readr -(for delimited files) or haven (for Stata, SPSS, or SAS -files).

-

See the vignette on reading IPUMS data -for more information.

+reading packages like readr (for delimited files) or +haven (for Stata, SPSS, or SAS files).

+

Exploring file metadata

@@ -381,6 +414,9 @@

Exploring file metadata#> 9 11 District of Columbia #> 10 12 Florida #> # ℹ 65 more rows

+
+

Labelled values +

ipumsr also provides a family of lbl_*() functions to assist in accessing and manipulating the value-level metadata included in IPUMS data. This allows for value labels to be incorporated into the @@ -416,6 +452,7 @@

Exploring file metadataSee the value labels vignette for more details.

+ diff --git a/docs/articles/value-labels.html b/docs/articles/value-labels.html index 39e16d51..ad18fa4c 100644 --- a/docs/articles/value-labels.html +++ b/docs/articles/value-labels.html @@ -133,47 +133,44 @@

IPUMS variable metadata
  • Value labels link particular data values to more meaningful text labels. For instance, the -HEALTH variable has data values including 1 and 2, but -these are actually stand-ins for “Excellent” and “Very good” health. -This mapping would be contained in a value-label pair that includes a -value and its associated label.

  • +HEALTH variable may have data values including +1 and 2, but these are actually stand-ins for +“Excellent” and “Very good” health. This mapping would be contained in a +value-label pair that includes a value and its associated +label.

    The rest of this article will focus on value labels; for more about -variable labels and descriptions, see -vignette("ipums").

    +variable labels and descriptions, see ?ipums_var_info.

    Value labels

    -

    ipumsr uses the labelled class -from the haven -package to handle value labels.

    +

    ipumsr uses the labelled +class from the haven package to handle value labels.

    You can see this in the column data types when loading IPUMS data. -Note that <int+lbl> appears below -STATEFIP, ASECFLAG, and other variables:

    +Note that <int+lbl> appears below MONTH +and ASECFLAG:

     library(ipumsr)
     
     ddi <- read_ipums_ddi(ipums_example("cps_00160.xml"))
     cps <- read_ipums_micro(ddi, verbose = FALSE)
     
    -cps
    -#> # A tibble: 10,883 × 15
    -#>     YEAR SERIAL MONTH      CPSID ASECFLAG ASECWTH STATEFIP PERNUM  CPSIDP ASECWT
    -#>    <dbl>  <dbl> <int+lb>   <dbl> <int+lb>   <dbl> <int+lb>  <dbl>   <dbl>  <dbl>
    -#>  1  2016  24138 3 [Marc… 2.02e13 1 [ASEC]   3249. 55 [Wis…      1 2.02e13  3249.
    -#>  2  2016  24139 3 [Marc… 2.02e13 1 [ASEC]   3154. 55 [Wis…      1 2.02e13  3154.
    -#>  3  2016  24139 3 [Marc… 2.02e13 1 [ASEC]   3154. 55 [Wis…      2 2.02e13  3154.
    -#>  4  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      1 2.02e13  1652.
    -#>  5  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      2 2.02e13  1503.
    -#>  6  2016  24140 3 [Marc… 2.02e13 1 [ASEC]   1652. 55 [Wis…      3 2.02e13  1652.
    -#>  7  2016  24141 3 [Marc… 2.02e13 1 [ASEC]   3049. 55 [Wis…      1 2.02e13  3049.
    -#>  8  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      1 2.02e13  1637.
    -#>  9  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      2 2.02e13  1637.
    -#> 10  2016  24142 3 [Marc… 2.02e13 1 [ASEC]   1637. 55 [Wis…      3 2.02e13  1887.
    -#> # ℹ 10,873 more rows
    -#> # ℹ 5 more variables: AGE <int+lbl>, EDUC <int+lbl>, INCTOT <dbl+lbl>,
    -#> #   MIGRATE1 <int+lbl>, HEALTH <int+lbl>
    +cps[, 1:5] +#> # A tibble: 10,883 × 5 +#> YEAR SERIAL MONTH CPSID ASECFLAG +#> <dbl> <dbl> <int+lbl> <dbl> <int+lbl> +#> 1 2016 24138 3 [March] 2.02e13 1 [ASEC] +#> 2 2016 24139 3 [March] 2.02e13 1 [ASEC] +#> 3 2016 24139 3 [March] 2.02e13 1 [ASEC] +#> 4 2016 24140 3 [March] 2.02e13 1 [ASEC] +#> 5 2016 24140 3 [March] 2.02e13 1 [ASEC] +#> 6 2016 24140 3 [March] 2.02e13 1 [ASEC] +#> 7 2016 24141 3 [March] 2.02e13 1 [ASEC] +#> 8 2016 24142 3 [March] 2.02e13 1 [ASEC] +#> 9 2016 24142 3 [March] 2.02e13 1 [ASEC] +#> 10 2016 24142 3 [March] 2.02e13 1 [ASEC] +#> # ℹ 10,873 more rows

    This indicates that the data contained in these columns are integers but include value labels. You can use the function is.labelled() to determine if a variable is indeed @@ -289,14 +286,16 @@

    Cautions regarding labelled

    While labelled variables provide the benefits described above, they also present challenges.

    For example, you may have noticed that both of the means -calculated above are suspect.

    -

    In the case of AGE_FACTOR, the values have been remapped -during conversion and several are inconsistent with the original -data.

    -

    In the case of AGE, we have considered all people over +calculated above are suspect:

    +
      +
    • In the case of AGE_FACTOR, the values have been +remapped during conversion and several are inconsistent with the +original data.
    • +
    • In the case of AGE, we have considered all people over 90 to be exactly 90, and all people over 99 to be exactly 99—labelled variables don’t ensure that calculations are -correct any more than factors do!

      +correct any more than factors do!
    • +

    Furthermore, many R functions ignore value labels or even actively remove them from the data:

    @@ -425,10 +424,10 @@ 

    Syntax for value label functionsRelabel values
      -
    • On the left-hand side, use the lbl() helper to -define a new value-label pair.

    • -
    • On the right-hand side, provide a function that returns +

    • On the left-hand side, use the lbl() helper to define a +new value-label pair.
    • +
    • On the right-hand side, provide a function that returns TRUE for those value-label pairs that should be relabelled -with the new value-label pair from the left-hand side.

    • +with the new value-label pair from the left-hand side.

    The function again uses the .val and .lbl syntax mentioned above to refer to values and @@ -728,18 +727,17 @@

    Add new labels

    Other resources

    -

    The haven -package, which underlies ipumsr’s handling of value labels, provides -more details on the labelled class. See -vignette("semantics", package = "haven").

    -

    The labelled -package provides other methods for manipulating value labels, some of -which overlap those provided by ipumsr.

    -

    The questionr package -includes functions for exploring labelled variables. In -particular, the functions describe, freq and -lookfor all print out to console information about the -variable using the value labels.

    +

    The haven package, which underlies ipumsr’s handling +of value labels, provides more details on the labelled +class. See vignette("semantics", package = "haven").

    +

    The labelled package provides other methods for +manipulating value labels, some of which overlap those provided by +ipumsr.

    +

    The questionr package includes functions for exploring +labelled variables. In particular, the functions +describe, freq and lookfor all +print out to console information about the variable using the value +labels.

    Finally, the foreign and prettyR packages don’t use the labelled class, but provide similar functionality for handling value labels, which could be adapted for use diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a859415..00000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573b..00000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $('

    '); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1d..00000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504cd..00000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/docsearch.json b/docs/docsearch.json deleted file mode 100644 index 8ec26afd..00000000 --- a/docs/docsearch.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "index_name": "ipumsr", - "start_urls": [ - { - "url": "http://tech.popdata.org/ipumsr/index.html", - "selectors_key": "homepage", - "tags": [ - "homepage" - ] - }, - { - "url": "http://tech.popdata.org/ipumsr/reference", - "selectors_key": "reference", - "tags": [ - "reference" - ] - }, - { - "url": "http://tech.popdata.org/ipumsr/articles", - "selectors_key": "articles", - "tags": [ - "articles" - ] - } - ], - "stop_urls": [ - "/reference/$", - "/reference/index.html", - "/articles/$", - "/articles/index.html" - ], - "sitemap_urls": [ - "http://tech.popdata.org/ipumsr/sitemap.xml" - ], - "selectors": { - "homepage": { - "lvl0": { - "selector": ".contents h1", - "default_value": "ipumsr Home page" - }, - "lvl1": { - "selector": ".contents h2" - }, - "lvl2": { - "selector": ".contents h3", - "default_value": "Context" - }, - "lvl3": ".ref-arguments td, .ref-description", - "text": ".contents p, .contents li, .contents .pre" - }, - "reference": { - "lvl0": { - "selector": ".contents h1" - }, - "lvl1": { - "selector": ".contents .name", - "default_value": "Argument" - }, - "lvl2": { - "selector": ".ref-arguments th", - "default_value": "Description" - }, - "lvl3": ".ref-arguments td, .ref-description", - "text": ".contents p, .contents li" - }, - "articles": { - "lvl0": { - "selector": ".contents h1" - }, - "lvl1": { - "selector": ".contents .name" - }, - "lvl2": { - "selector": ".contents h2, .contents h3", - "default_value": "Context" - }, - "text": ".contents p, .contents li" - } - }, - "selectors_exclude": [ - ".dont-index" - ], - "min_indexed_level": 2, - "custom_settings": { - "separatorsToIndex": "_", - "attributesToRetrieve": [ - "hierarchy", - "content", - "anchor", - "url", - "url_without_anchor" - ] - } -} - diff --git a/docs/index.html b/docs/index.html index acd73e89..721c4528 100644 --- a/docs/index.html +++ b/docs/index.html @@ -142,15 +142,15 @@

    Installation

    What is IPUMS?

    -

    IPUMS is the world’s largest publicly available population database, providing census and survey data from around the world integrated across time and space. IPUMS integration and documentation make it easy to study change, conduct comparative research, merge information across data types, and analyze individuals within family and community context. Data and services are available free of charge.

    -

    IPUMS consists of multiple projects, or collections, that provide different data products.

    +

    IPUMS is the world’s largest publicly available population database, providing census and survey data from around the world integrated across time and space. IPUMS integration and documentation make it easy to study change, conduct comparative research, merge information across data types, and analyze individuals within family and community context. Data and services are available free of charge.

    +

    IPUMS consists of multiple projects, or collections, that provide different data products.

    • Microdata projects distribute data for individual survey units, like people or households.
    • Aggregate data projects distribute summary tables of aggregate statistics for particular geographic units along with corresponding GIS mapping files.
    -

    ipumsr supports different levels of functionality for each IPUMS project, as summarized in the following table:

    +

    ipumsr supports different levels of functionality for each IPUMS project, as summarized in the table below.

    @@ -369,19 +369,19 @@

    What is IPUMS?

    -

    ipumsr uses the IPUMS API to submit data requests, download data extracts, and get metadata, so the scope of ipumsr functionality generally corresponds to the available API functionality. As the IPUMS team extends the API to support more functionality for more projects, we aim to extend ipumsr capabilities accordingly.

    +

    ipumsr uses the IPUMS API to submit data requests, download data extracts, and get metadata, so the scope of functionality generally corresponds to that available via the API. As the IPUMS team extends the API to support more functionality for more projects, we aim to extend ipumsr capabilities accordingly.

    Getting started

    -

    If you’re new to IPUMS data, learn more about what’s available through the IPUMS Projects Overview.

    -

    The package vignettes are the best place to learn about what’s available in ipumsr itself:

    +

    If you’re new to IPUMS data, learn more about what’s available through the IPUMS Projects Overview. Then, see vignette("ipums") for an overview of how to obtain IPUMS data.

    +

    The package vignettes are the best place to explore what ipumsr has to offer:

    The IPUMS support website also houses many project-specific R-based training exercises. However, note that some of these exercises may not be be up to date with ipumsr’s current functionality.

    @@ -397,7 +397,7 @@