Preparation for CRAN v0.1.1 release

* Added `anthopolos()` function to compute the Racial Isolation Index (RI) based on based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002) for specified counties/tracts 2009-2020 * Added `bravo()` function to compute the Educational Isolation Index (EI) based on based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384) for specified counties/tracts 2009-2020 * Added `gini()` function to retrieve the Gini Index based on [Gini (1921)](https://www.doi.org/10.2307/2223319) for specified counties/tracts 2009-2020 * `Matrix` and `sf` are now Depends * Updated vignette and README for new features * Fixed typos throughout documentation * Updated Description in DESCRIPTION * Updated 'package.R' with new details and section * Updated CITATION with new citations for the additional metrics
idblr · Aug 14, 2022 · 0a51633 · 0a51633
1 parent 8544ff3
commit 0a51633
Show file tree

Hide file tree

Showing 29 changed files with 1,784 additions and 195 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: ndi
 Title: Neighborhood Deprivation Indices
-Version: 0.1.0
-Date: 2022-08-10
+Version: 0.1.1
+Date: 2022-08-14
 Authors@R:
     c(person(given = "Ian D.",
              family = "Buller",
@@ -11,14 +11,24 @@ Authors@R:
       person(given = "NCI",
              role = c("cph", "fnd")))
 Maintainer: Ian D. Buller <[email protected]>
-Description: Compute various neighborhood deprivation indices (NDI), including:
+Description: Computes various metrics of socio-economic deprivation and disparity in
+             the United States. Some metrics are considered "spatial" because they 
+             consider the values of neighboring (i.e., adjacent) census geographies in
+             their computation, while other metrics are "aspatial" because they only 
+             consider the value within each census geography. Two types of aspatial 
+             neighborhood deprivation indices (NDI) are available: including:
              (1) based on Messer et al. (2006) <doi:10.1007/s11524-006-9094-x>
              and (2) based on Andrews et al. (2020) <doi:10.1080/17445647.2020.1750066> 
              and Slotman et al. (2022) <doi:10.1016/j.dib.2022.108002>
-             who uses variables chosen by Roux and Mair (2010)
+             who use variables chosen by Roux and Mair (2010)
              <doi:10.1111/j.1749-6632.2009.05333.x>. Both are a decomposition
              of multiple demographic characteristics from the U.S. Census Bureau
-             American Community Survey 5-year estimates.
+             American Community Survey 5-year estimates (ACS-5; 2010-2020). Using data
+             from the ACS-5 (2009-2020), the package can also (1) compute the spatial
+             Racial Isolation Index (RI) based on Anthopolos et al. (2011)
+             <doi:10.1016/j.sste.2011.06.002>s, (2) compute spatial the Educational Isolation
+             Index (EI) based on Bravo et al. (2021) <doi:10.3390/ijerph18179384>,and
+             (3) retrieve the aspatial Gini Index based on Gini (1921) <doi:10.2307/2223319>.
 License: Apache License (>= 2.0)
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -28,7 +38,9 @@ Depends:
 Imports: 
     dplyr,
     MASS,
+    Matrix,
     psych,
+    sf,
     stats,
     stringr,
     tidycensus,
@@ -38,7 +50,6 @@ Suggests:
     testthat,
     tigris,
     R.rsp,
-    sf,
     spelling
 VignetteBuilder: R.rsp
 Language: en-US

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,11 +1,18 @@
 # Generated by roxygen2: do not edit by hand
 
+export(anthopolos)
+export(bravo)
+export(gini)
 export(messer)
 export(powell_wiley)
 import(dplyr)
 importFrom(MASS,ginv)
+importFrom(Matrix,sparseMatrix)
 importFrom(psych,alpha)
 importFrom(psych,principal)
+importFrom(sf,st_drop_geometry)
+importFrom(sf,st_geometry)
+importFrom(sf,st_intersects)
 importFrom(stats,complete.cases)
 importFrom(stats,cor)
 importFrom(stats,cov2cor)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,16 @@
 # ndi (development version)
 
+# ndi v0.1.1
+* Added `anthopolos()` function to compute the Racial Isolation Index (RI) based on based on [Anthopolos et al. (2011)](https://www.doi.org/10.1016/j.sste.2011.06.002) for specified counties/tracts 2009-2020
+* Added `bravo()` function to compute the Educational Isolation Index (EI) based on based on [Bravo et al. (2021)](https://www.doi.org/10.3390/ijerph18179384) for specified counties/tracts 2009-2020
+* Added `gini()` function to retrieve the Gini Index based on [Gini (1921)](https://www.doi.org/10.2307/2223319) for specified counties/tracts 2009-2020
+* `Matrix` and `sf` are now Depends
+* Updated vignette and README for new features
+* Fixed typos throughout documentation
+* Updated Description in DESCRIPTION
+* Updated 'package.R' with new details and section
+* Updated CITATION with new citations for the additional metrics
+
 # ndi v0.1.0
 * Fixed invalid URL and typos in package README.md
 

diff --git a/R/anthopolos.R b/R/anthopolos.R
@@ -0,0 +1,216 @@
+#' Racial Isolation Index based on Anthopolos et al. (2011) 
+#' 
+#' Compute the Racial Isolation Index (Anthopolos) values for selected subgroup(s).
+#'
+#' @param geo Character string specifying the geography of the data either census tracts \code{geo = "tract"} (the default) or counties \code{geo = "county"}.
+#' @param year Numeric. The year to compute the estimate. The default is 2020 and the years between 2009 and 2020 are currently available.
+#' @param subgroup Character string specifying the racial/ethnic subgroup(s). See Details for available choices.
+#' @param quiet Logical. If TRUE, will display messages about potential missing census information. The default is FALSE.
+#' @param ... Arguments passed to \code{\link[tidycensus]{get_acs}} to select state, county, and other arguments for census characteristics
+#'
+#' @details This function will compute the Racial Isolation Index (RI) of U.S. census tracts or counties for a specified geographical extent (e.g., entire U.S. or a single state) based on Anthopolos et al. (2011) \doi{10.1016/j.sste.2011.06.002} who originally designed the metric for the racial isolation of non-Hispanic Black individuals. This function provides the computation of RI for any of the U.S. Census Bureau race/ethnicity subgroups (including Hispanic and non-Hispanic individuals).
+#' 
+#' The function uses the \code{\link[tidycensus]{get_acs}} function to obtain U.S. Census Bureau 5-year American Community Survey characteristics used for the geospatial computation. The yearly estimates available for 2009 through 2020 when ACS-5 data are available but are available from other U.S. Census Bureau surveys. The twenty racial/ethnic subgroups (U.S. Census Bureau definitions) are:
+#' \itemize{
+#'  \item{B03002_002: }{Not Hispanic or Latino "NHoL"}
+#'  \item{B03002_003: }{Not Hispanic or Latino, White alone "NHoLW"}
+#'  \item{B03002_004: }{Not Hispanic or Latino, Black or African American alone "NHoLB"}
+#'  \item{B03002_005: }{Not Hispanic or Latino, American Indian and Alaska Native alone "NHoLAIAN"}
+#'  \item{B03002_006: }{Not Hispanic or Latino, Asian alone "NHoLA"}
+#'  \item{B03002_007: }{Not Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "NHoLNHOPI"}
+#'  \item{B03002_008: }{Not Hispanic or Latino, Some other race alone "NHoLSOR"}
+#'  \item{B03002_009: }{Not Hispanic or Latino, Two or more races "NHoLTOMR"}
+#'  \item{B03002_010: }{Not Hispanic or Latino, Two races including Some other race "NHoLTRiSOR"}
+#'  \item{B03002_011: }{Not Hispanic or Latino, Two races excluding Some other race, and three or more races "NHoLTReSOR"}
+#'  \item{B03002_012: }{Hispanic or Latino "HoL"}
+#'  \item{B03002_013: }{Hispanic or Latino, White alone "HoLW"}
+#'  \item{B03002_014: }{Hispanic or Latino, Black or African American alone "HoLB"}
+#'  \item{B03002_015: }{Hispanic or Latino, American Indian and Alaska Native alone "HoLAIAN"}
+#'  \item{B03002_016: }{Hispanic or Latino, Asian alone "HoLA"}
+#'  \item{B03002_017: }{Hispanic or Latino, Native Hawaiian and Other Pacific Islander alone "HoLNHOPI"}
+#'  \item{B03002_018: }{Hispanic or Latino, Some other race alone "HoLSOR"}
+#'  \item{B03002_019: }{Hispanic or Latino, Two or more races "HoLTOMR"}
+#'  \item{B03002_020: }{Hispanic or Latino, Two races including Some other race "HoLTRiSOR"}
+#'  \item{B03002_021: }{Hispanic or Latino, Two races excluding Some other race, and three or more races "HoLTReSOR"}
+#' }
+#' 
+#' Use the internal \code{state} and \code{county} arguments within the \code{\link[tidycensus]{get_acs}} function to specify geographic extent of the data output. NOTE: Current version does not correct for edge effects (e.g., census geographies along the specified spatial extent border, coastline, or U.S.-Mexico / U.S.-Canada border) may have few neighboring census geographies and RI values in these census geographies may be unstable. A stop-gap solution for the former source of edge effect is to compute the RI for neighboring census geographies (i.e., the states bordering a study area of interest) and then use the estimates of the study area of interest.
+#' 
+#' A census geography (and its neighbors) that has nearly all of its population who identify with the specified race/ethnicity subgroup(s) (e.g., non-Hispanic or Latino, Black or African American alone) will have an RI value that is close to 1. In contrast, a census geography (and its neighbors) that is nearly none of its population who identify with the specified race/ethnicity subgroup(s) (e.g., not non-Hispanic or Latino, Black or African American alone) will have an RI value that is close to 0.
+#' 
+#' @return An object of class 'list'. This is a named list with the following components:
+#' 
+#' \describe{
+#' \item{\code{ri}}{An object of class 'tbl' for the GEOID, name, RI, and raw census values of specified census geographies.}
+#' \item{\code{missing}}{An object of class 'tbl' of the count and proportion of missingness for each census variable used to compute the RI.}
+#' }
+#' 
+#' @import dplyr
+#' @importFrom Matrix sparseMatrix
+#' @importFrom sf st_drop_geometry st_geometry st_intersects
+#' @importFrom stringr str_trim
+#' @importFrom tidycensus get_acs
+#' @importFrom tidyr gather separate
+#' @export
+#' 
+#' @seealso \code{\link[tidycensus]{get_acs}} for additional arguments for geographic extent selection (i.e., \code{state} and \code{county}).
+#'
+#' @examples
+#' \dontrun{
+#' # Wrapped in \dontrun{} because these examples require a Census API key.
+#'   
+#'   # Tract-level metric (2020)
+#'   anthopolos(geo = "tract", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB"))
+#'   
+#'   # County-level metric (2020)
+#'   anthopolos(geo = "county", state = "GA", year = 2020, subgroup = c("NHoLB", "HoLB"))
+#'   
+#' }
+#' 
+anthopolos <- function(geo = "tract", year = 2020, subgroup, quiet = FALSE, ...) {
+
+  # Check arguments
+  match.arg(geo, choices = c("county", "tract"))
+  stopifnot(is.numeric(year), year %in% 2009:2020)
+  match.arg(subgroup, several.ok = TRUE,
+            choices = c("NHoL", "NHoLW", "NHoLB", "NHoLAIAN", "NHoLA", "NHoLNHOPI",
+                        "NHoLSOR", "NHoLTOMR", "NHoLTRiSOR", "NHoLTReSOR",
+                        "HoL", "HoLW", "HoLB", "HoLAIAN", "HoLA", "HoLNHOPI",
+                        "HoLSOR", "HoLTOMR", "HoLTRiSOR", "HoLTReSOR"))
+
+  # select census variables
+  vars <- c(TotalPop = "B03002_001",
+            NHoL = "B03002_002",
+            NHoLW = "B03002_003",
+            NHoLB = "B03002_004",
+            NHoLAIAN = "B03002_005",
+            NHoLA = "B03002_006",
+            NHoLNHOPI = "B03002_007",
+            NHoLSOR = "B03002_008",
+            NHoLTOMR = "B03002_009",
+            NHoLTRiSOR = "B03002_010",
+            NHoLTReSOR = "B03002_011",
+            HoL = "B03002_012",
+            HoLW = "B03002_013",
+            HoLB = "B03002_014",
+            HoLAIAN = "B03002_015",
+            HoLA = "B03002_016",
+            HoLNHOPI = "B03002_017",
+            HoLSOR = "B03002_018",
+            HoLTOMR = "B03002_019",
+            HoLTRiSOR = "B03002_020",
+            HoLTReSOR = "B03002_021")
+
+  selected_vars <- vars[c("TotalPop", subgroup)]
+  out_names <- names(selected_vars) # save for output
+  prefix <- "subgroup"
+  suffix <- seq(1:length(subgroup))
+  names(selected_vars) <- c("TotalPop", paste(prefix, suffix, sep = ""))
+  in_names <- paste(names(selected_vars), "E", sep = "")
+
+  # acquire RI variables and sf geometries
+  ri_vars <- suppressMessages(suppressWarnings(tidycensus::get_acs(geography = geo,
+                                                                   year = year, 
+                                                                   output = "wide",
+                                                                   variables = selected_vars, 
+                                                                   geometry = TRUE, ...)))
+
+
+  if (geo == "tract") {
+    ri_vars <- ri_vars %>%
+      tidyr::separate(NAME, into = c("tract", "county", "state"), sep = ",") %>%
+      dplyr::mutate(tract = gsub("[^0-9\\.]","", tract))
+  } else {
+    ri_vars <- ri_vars %>% tidyr::separate(NAME, into = c("county", "state"), sep = ",") 
+  }
+
+  ri_vars <- ri_vars %>% 
+    dplyr::mutate(county = stringr::str_trim(county),
+                  subgroup = rowSums(sf::st_drop_geometry(ri_vars[ , in_names[-1]])))
+
+  # Compute RI
+  ## From Anthopolos et al. (2011) https://doi.org/10.1016/j.sste.2011.06.002
+  ## RI_{im} = (Sigma_{j∈∂_{i}} w_{ij} * T_{jm}) / (Sigma_{j∈∂_{i}} w_{ij} * T_{j})
+  ## Where:
+  ## ∂_{i} denotes the set of index units i and its neighbors
+  ## Given M mutually exclusive racial/ethnic subgroups, m indexes the subgroups of M
+  ## T_{i} denotes the total population in region i (TotalPop)
+  ## T_{im} denotes the population of the selected subgroup(s) (subgroup1, ...)
+  ## w_{ij} denotes a nXn first-order adjacency matrix, where n is the number of census geometries in the study area
+  ### and the entries of w_{ij} are set to 1 if a boundary is shared by region i and region j and zero otherwise
+  ### Entries of the main diagonal (since i∈∂_{i}, w_{ij} = w_{ii} when j = i) of w_{ij} are set to 1.5
+  ### such that the weight of the index unit, i, is larger than the weights assigned to adjacent tracts
+
+  ## Geospatial adjacency matrix (wij)
+  tmp <- sf::st_intersects(sf::st_geometry(ri_vars), sparse = TRUE)
+  names(tmp) <- as.character(seq_len(nrow(ri_vars)))
+  tmpL <- length(tmp)
+  tmpcounts <- unlist(Map(length, tmp))
+  tmpi <- rep(1:tmpL, tmpcounts)
+  tmpj <- unlist(tmp)
+  wij <- Matrix::sparseMatrix(i = tmpi, j = tmpj, x = 1, dims = c(tmpL, tmpL))
+  diag(wij) <- 1.5
+
+  ## Compute
+  ri_vars <- sf::st_drop_geometry(ri_vars) # drop geometries (can join back later)
+  RIim <- list()
+  for (i in 1:dim(wij)[1]){
+    RIim[[i]] <- sum(as.matrix(wij[i, ])*ri_vars[ , "subgroup"]) / sum(as.matrix(wij[i, ])*ri_vars[, "TotalPopE"])
+  }
+  ri_vars$RI <- unlist(RIim)
+
+  # warning for missingness of census characteristics
+  missingYN <- ri_vars %>%
+    dplyr::select(in_names)
+  names(missingYN) <- out_names
+  missingYN <- missingYN %>%
+    tidyr::gather(key = "variable", value = "val") %>%
+    dplyr::mutate(missing = is.na(val)) %>%
+    dplyr::group_by(variable) %>%
+    dplyr::mutate(total = n()) %>%
+    dplyr::group_by(variable, total, missing) %>%
+    dplyr::count() %>%
+    dplyr::mutate(percent = round(n / total * 100,2),
+                  percent = paste0(percent," %")) %>%
+    dplyr::filter(missing == TRUE)
+
+  if (quiet == FALSE) {
+    # warning for missing census data
+    if (nrow(missingYN) != 0) {
+      message("Warning: Missing census data")
+    } else {
+      returnValue(missingYN)
+    }
+  }
+
+  # format output
+  if (geo == "tract") {
+    ri <- ri_vars %>%
+      dplyr::select(c("GEOID",
+                      "state",
+                      "county",
+                      "tract",
+                      "RI",
+                      in_names))
+    names(ri) <- c("GEOID", "state", "county", "tract", "RI", out_names)
+  } else {
+    ri <- ri_vars %>%
+      dplyr::select(c("GEOID",
+                      "state",
+                      "county",
+                      "RI",
+                      in_names))
+    names(ri) <- c("GEOID", "state", "county", "RI", out_names)
+  }
+
+  ri <- ri %>%
+    dplyr::mutate(county = stringr::str_trim(county), 
+                  state = stringr::str_trim(state)) %>%
+    dplyr::arrange(GEOID) %>%
+    dplyr::as_tibble() 
+
+  out <- list(ri = ri,
+              missing = missingYN)
+
+  return(out)
+}