From 2c73c8fd0b5d79cb0e8e6e6b5e517eafafa3f4af Mon Sep 17 00:00:00 2001 From: "Andrew G. Brown" Date: Mon, 18 Jan 2021 11:38:35 -0800 Subject: [PATCH] remote tests: support for standard arguments #9 --- R/create_GDS.R | 124 ++++++++++++++++++++++++++++++----------------- man/parse_GDS.Rd | 25 ++++++++++ 2 files changed, 105 insertions(+), 44 deletions(-) create mode 100644 man/parse_GDS.Rd diff --git a/R/create_GDS.R b/R/create_GDS.R index 7fcbe65cfd..2d48498b0b 100644 --- a/R/create_GDS.R +++ b/R/create_GDS.R @@ -4,54 +4,90 @@ #' @return TRUE if successful #' @export create_GDS <- function(...) { - download_GDS() - parse_GDS() + + parse_GDS(...) + } -download_GDS <- function() { +download_GDS <- function(outpath = "./inst/extdata", + output_types = "txt", + keep_pdf = FALSE) { - download.file(destfile = "GDS.pdf", - url = "https://www.nrcs.usda.gov/Internet/FSE_DOCUMENTS/nrcs142p2_051068.pdf") + download.file(destfile = "GDS.pdf", + url = "https://www.nrcs.usda.gov/Internet/FSE_DOCUMENTS/nrcs142p2_051068.pdf") - system(sprintf("pdftotext -raw -nodiag GDS.pdf")) - # system(sprintf("pdftohtml GDS.pdf")) - - file.remove("GDS.pdf") - - dir.create("inst/extdata/GDS", recursive = TRUE) - file.copy("GDS.txt","inst/extdata/GDS/GDS.txt") - - # htm <- list.files(pattern = "html") - # file.copy(htm,"inst/extdata/GDS") - - # img <- list.files(pattern = "png|jpg") - # file.copy(img,"inst/extdata/GDS") - # file.remove(c("GDS.txt", img, htm)) + system(sprintf("pdftotext -raw -nodiag GDS.pdf")) + + + dir.create(file.path(outpath, "GDS"), recursive = TRUE) + + if(file.exists("GDS.txt")) { + file.copy("GDS.txt", file.path(outpath, "GDS/GDS.txt")) + file.remove("GDS.txt") + } + + if ("html" %in% output_types) { + system(sprintf("pdftohtml GDS.pdf")) + + htm <- list.files(pattern = "html") + file.copy(htm, file.path(outpath, "GDS")) + + img <- list.files(pattern = "png|jpg") + file.copy(img, "GDS") + + file.remove(c(img, htm)) + } + + if (!keep_pdf) { + if (file.exists("GDS.pdf")) + file.remove("GDS.pdf") + } } -parse_GDS <- function() { - x <- readLines('inst/extdata/GDS/GDS.txt', warn = FALSE) - - # get GDS abbreviated outline (Phys. Location, Geomor. Description, Surface Morphometry) - gds.outline.bounds <- grep('ABBREVIATED OUTLINE|DETAILED OUTLINE', x) - stopifnot(length(gds.outline.bounds) == 2) - - abbreviated.outline <- data.frame(content = x[gds.outline.bounds[1]:(gds.outline.bounds[2] - 4)]) - abbreviated.outline$part <- cumsum(grepl("PART I+", abbreviated.outline$content)) - abbreviated.outline$tier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$part), - function(x) cumsum(grepl("^[A-Z]\\)", x)))$x) - abbreviated.outline$subtier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$tier), - function(x) cumsum(grepl("^[1-9]\\)", x)))$x) - - write(jsonlite::toJSON(abbreviated.outline, pretty = TRUE, auto_unbox = TRUE), - file = "inst/extdata/GDS/GDS_outline_abbrev.json") - - # TODO: detailed outline; using structure parsed from abbreviated - - # TODO: Physiographic Location - # TODO: Geomorphic Description - # - comprehensive lists: landscape, landform, microfeature, anthroscape, anthropogenic landforms, anthropogenic microfeatures - # - geomorphic environments and other groupings: associations of terms grouped by process or setting - # TODO: Surface Morphometry - # - Several important figures and tables -- pdftohtml? +#' parse_GDS +#' +#' @param outpath A directory path to create "inst/extdata/NSSH" folder structure. +#' @param download_pdf Download official PDF file? default: "ifneeded"; options: TRUE/FALSE +#' @param output_types Options include \code{c("txt","html")} for processed PDF files. +#' @param keep_pdf Keep PDF files after processing TXT? +#' +parse_GDS <- function(outpath = "./inst/extdata", + download_pdf = "ifneeded", + output_types = c("txt"), #, "html" + keep_pdf = FALSE) { + + gds_path <- file.path(outpath, "GDS/GDS.txt") + + if (!file.exists(gds_path) | as.character(download_pdf)[1] == "TRUE") + if (!as.character(download_pdf)[1] == "FALSE") + download_GDS(outpath, keep_pdf = keep_pdf, output_types = output_types) + + if (file.exists(gds_path)) { + x <- readLines(gds_path, warn = FALSE) + + # get GDS abbreviated outline (Phys. Location, Geomor. Description, Surface Morphometry) + gds.outline.bounds <- grep('ABBREVIATED OUTLINE|DETAILED OUTLINE', x) + stopifnot(length(gds.outline.bounds) == 2) + + abbreviated.outline <- data.frame(content = x[gds.outline.bounds[1]:(gds.outline.bounds[2] - 4)]) + abbreviated.outline$part <- cumsum(grepl("PART I+", abbreviated.outline$content)) + abbreviated.outline$tier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$part), + function(x) cumsum(grepl("^[A-Z]\\)", x)))$x) + abbreviated.outline$subtier <- do.call('c', aggregate(abbreviated.outline$content, by = list(abbreviated.outline$tier), + function(x) cumsum(grepl("^[1-9]\\)", x)))$x) + + write(jsonlite::toJSON(abbreviated.outline, pretty = TRUE, auto_unbox = TRUE), + file = file.path(outpath, "/GDS/GDS_outline_abbrev.json")) + } else { + message("Skipped GDS download") + } + # TODO: detailed outline; using structure parsed from abbreviated + + # TODO: Physiographic Location + # TODO: Geomorphic Description + # - comprehensive lists: landscape, landform, microfeature, anthroscape, anthropogenic landforms, anthropogenic microfeatures + # - geomorphic environments and other groupings: associations of terms grouped by process or setting + # TODO: Surface Morphometry + # - Several important figures and tables -- pdftohtml? + } diff --git a/man/parse_GDS.Rd b/man/parse_GDS.Rd new file mode 100644 index 0000000000..bdcb95dc14 --- /dev/null +++ b/man/parse_GDS.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_GDS.R +\name{parse_GDS} +\alias{parse_GDS} +\title{parse_GDS} +\usage{ +parse_GDS( + outpath = "./inst/extdata", + download_pdf = "ifneeded", + output_types = c("txt"), + keep_pdf = FALSE +) +} +\arguments{ +\item{outpath}{A directory path to create "inst/extdata/NSSH" folder structure.} + +\item{download_pdf}{Download official PDF file? default: "ifneeded"; options: TRUE/FALSE} + +\item{output_types}{Options include \code{c("txt","html")} for processed PDF files.} + +\item{keep_pdf}{Keep PDF files after processing TXT?} +} +\description{ +parse_GDS +}