Skip to content

Commit

Permalink
remote tests: support for standard arguments #9
Browse files Browse the repository at this point in the history
  • Loading branch information
brownag committed Jan 18, 2021
1 parent 75fa042 commit 2c73c8f
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 44 deletions.
124 changes: 80 additions & 44 deletions R/create_GDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,90 @@
#' @return TRUE if successful
#' @export
create_GDS <- function(...) {
download_GDS()
parse_GDS()

parse_GDS(...)

}

download_GDS <- function() {
download_GDS <- function(outpath = "./inst/extdata",
output_types = "txt",
keep_pdf = FALSE) {

download.file(destfile = "GDS.pdf",
url = "https://www.nrcs.usda.gov/Internet/FSE_DOCUMENTS/nrcs142p2_051068.pdf")
download.file(destfile = "GDS.pdf",
url = "https://www.nrcs.usda.gov/Internet/FSE_DOCUMENTS/nrcs142p2_051068.pdf")

system(sprintf("pdftotext -raw -nodiag GDS.pdf"))
# system(sprintf("pdftohtml GDS.pdf"))

file.remove("GDS.pdf")

dir.create("inst/extdata/GDS", recursive = TRUE)
file.copy("GDS.txt","inst/extdata/GDS/GDS.txt")

# htm <- list.files(pattern = "html")
# file.copy(htm,"inst/extdata/GDS")

# img <- list.files(pattern = "png|jpg")
# file.copy(img,"inst/extdata/GDS")
# file.remove(c("GDS.txt", img, htm))
system(sprintf("pdftotext -raw -nodiag GDS.pdf"))


dir.create(file.path(outpath, "GDS"), recursive = TRUE)

if(file.exists("GDS.txt")) {
file.copy("GDS.txt", file.path(outpath, "GDS/GDS.txt"))
file.remove("GDS.txt")
}

if ("html" %in% output_types) {
system(sprintf("pdftohtml GDS.pdf"))

htm <- list.files(pattern = "html")
file.copy(htm, file.path(outpath, "GDS"))

img <- list.files(pattern = "png|jpg")
file.copy(img, "GDS")

file.remove(c(img, htm))
}

if (!keep_pdf) {
if (file.exists("GDS.pdf"))
file.remove("GDS.pdf")
}
}

parse_GDS <- function() {
x <- readLines('inst/extdata/GDS/GDS.txt', warn = FALSE)

# get GDS abbreviated outline (Phys. Location, Geomor. Description, Surface Morphometry)
gds.outline.bounds <- grep('ABBREVIATED OUTLINE|DETAILED OUTLINE', x)
stopifnot(length(gds.outline.bounds) == 2)

abbreviated.outline <- data.frame(content = x[gds.outline.bounds[1]:(gds.outline.bounds[2] - 4)])
abbreviated.outline$part <- cumsum(grepl("PART I+", abbreviated.outline$content))
abbreviated.outline$tier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$part),
function(x) cumsum(grepl("^[A-Z]\\)", x)))$x)
abbreviated.outline$subtier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$tier),
function(x) cumsum(grepl("^[1-9]\\)", x)))$x)

write(jsonlite::toJSON(abbreviated.outline, pretty = TRUE, auto_unbox = TRUE),
file = "inst/extdata/GDS/GDS_outline_abbrev.json")

# TODO: detailed outline; using structure parsed from abbreviated

# TODO: Physiographic Location
# TODO: Geomorphic Description
# - comprehensive lists: landscape, landform, microfeature, anthroscape, anthropogenic landforms, anthropogenic microfeatures
# - geomorphic environments and other groupings: associations of terms grouped by process or setting
# TODO: Surface Morphometry
# - Several important figures and tables -- pdftohtml?
#' parse_GDS
#'
#' @param outpath A directory path to create "inst/extdata/NSSH" folder structure.
#' @param download_pdf Download official PDF file? default: "ifneeded"; options: TRUE/FALSE
#' @param output_types Options include \code{c("txt","html")} for processed PDF files.
#' @param keep_pdf Keep PDF files after processing TXT?
#'
parse_GDS <- function(outpath = "./inst/extdata",
download_pdf = "ifneeded",
output_types = c("txt"), #, "html"
keep_pdf = FALSE) {

gds_path <- file.path(outpath, "GDS/GDS.txt")

if (!file.exists(gds_path) | as.character(download_pdf)[1] == "TRUE")
if (!as.character(download_pdf)[1] == "FALSE")
download_GDS(outpath, keep_pdf = keep_pdf, output_types = output_types)

if (file.exists(gds_path)) {
x <- readLines(gds_path, warn = FALSE)

# get GDS abbreviated outline (Phys. Location, Geomor. Description, Surface Morphometry)
gds.outline.bounds <- grep('ABBREVIATED OUTLINE|DETAILED OUTLINE', x)
stopifnot(length(gds.outline.bounds) == 2)

abbreviated.outline <- data.frame(content = x[gds.outline.bounds[1]:(gds.outline.bounds[2] - 4)])
abbreviated.outline$part <- cumsum(grepl("PART I+", abbreviated.outline$content))
abbreviated.outline$tier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$part),
function(x) cumsum(grepl("^[A-Z]\\)", x)))$x)
abbreviated.outline$subtier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$tier),
function(x) cumsum(grepl("^[1-9]\\)", x)))$x)

write(jsonlite::toJSON(abbreviated.outline, pretty = TRUE, auto_unbox = TRUE),
file = file.path(outpath, "/GDS/GDS_outline_abbrev.json"))
} else {
message("Skipped GDS download")
}
# TODO: detailed outline; using structure parsed from abbreviated

# TODO: Physiographic Location
# TODO: Geomorphic Description
# - comprehensive lists: landscape, landform, microfeature, anthroscape, anthropogenic landforms, anthropogenic microfeatures
# - geomorphic environments and other groupings: associations of terms grouped by process or setting
# TODO: Surface Morphometry
# - Several important figures and tables -- pdftohtml?

}
25 changes: 25 additions & 0 deletions man/parse_GDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2c73c8f

Please sign in to comment.