diff --git a/.Rbuildignore b/.Rbuildignore index 5decb7c8..e7b0d76d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,5 +1,5 @@ .Rhistory -vignettes/figure +vignettes/figures appveyor.yml .travis.yml .gitignore diff --git a/DESCRIPTION b/DESCRIPTION index 37105f77..8990c3b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,11 +39,11 @@ Imports: readr (>= 1.0.0), jsonlite Suggests: - xtable, + htmlTable, knitr, testthat -VignetteBuilder: knitr BuildVignettes: true +VignetteBuilder: knitr BugReports: https://github.com/USGS-R/dataRetrieval/issues URL: https://pubs.usgs.gov/tm/04/a10/ RoxygenNote: 6.0.1 diff --git a/NEWS b/NEWS index cd848f4d..d8fbf35b 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +dataRetrieval 2.7.1 +========== +* Converted vignette to html + dataRetrieval 2.7.0 ========== * Added National Groundwater Monitoring Network services diff --git a/R/whatNWISData.r b/R/whatNWISData.r index caa3fecb..6782f19a 100644 --- a/R/whatNWISData.r +++ b/R/whatNWISData.r @@ -69,8 +69,12 @@ whatNWISdata <- function(siteNumbers,service="all",parameterCd="all",statCd="all siteNumber <- paste(siteNumbers,collapse=",") if(!("all" %in% service)){ - service <- match.arg(service, c("dv","uv","qw","ad","id","pk","sv","gw","aw","all","ad","iv","rt"), several.ok = TRUE) - } + service <- match.arg(service, c("dv","uv","qw","ad","id","pk","sv","gw","aw","all","ad","iv"), several.ok = TRUE) + + if(service == "uv"){ + service <- "iv" + } + } if(!("all" %in% parameterCd)){ if(anyNA(parameterCd)){ diff --git a/inst/doc/dataRetrieval.R b/inst/doc/dataRetrieval.R index c915b2b5..b4abb4d6 100644 --- a/inst/doc/dataRetrieval.R +++ b/inst/doc/dataRetrieval.R @@ -1,24 +1,22 @@ -## ----openLibrary, echo=FALSE------------------------------ -library(xtable) -options(continue=" ") -options(width=60) +## ----setup, include=FALSE, message=FALSE------------------ library(knitr) +library(dataRetrieval) - -## ----include=TRUE ,echo=FALSE,eval=TRUE------------------- -opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="") +options(continue=" ") +options(width=60) +knitr::opts_chunk$set(echo = TRUE, + warning = FALSE, + message = FALSE, + fig.height = 7, + fig.width = 7) + +opts_chunk$set(highlight=TRUE, + tidy=TRUE, + keep.space=TRUE, + keep.blank.space=FALSE, + keep.comment=TRUE) knit_hooks$set(inline = function(x) { if (is.numeric(x)) round(x, 3)}) -knit_hooks$set(crop = hook_pdfcrop) - -bold.colHeaders <- function(x) { - x <- gsub("\\^(\\d)","$\\^\\1$",x) - x <- gsub("\\%","\\\\%",x) - x <- gsub("\\_"," ",x) - returnX <- paste("\\multicolumn{1}{c}{\\textbf{\\textsf{", x, "}}}", sep = "") -} -addSpace <- function(x) ifelse(x != "1", "[5pt]","") -library(dataRetrieval) ## ----workflow, echo=TRUE,eval=FALSE----------------------- # library(dataRetrieval) @@ -39,38 +37,98 @@ library(dataRetrieval) # pCode <- readNWISpCode(parameterCd) # -## ----tableParameterCodes, echo=FALSE,results='asis'------- +## ----echo=FALSE------------------------------------------- + +library(htmlTable) + +Functions <- c("readNWISdata","", + "readNWISdv","","","","", + "readNWISqw","","","","", + "readNWISuv","","","", + "readNWISrating","", + "readNWISmeas","","", + "readNWISpeak","","", + "readNWISgwl","","", + "readNWISuse","","","", + "readNWISstat","","","","","", + "readNWISpCode", + "readNWISsite", + "whatNWISsites", + "whatNWISdata","", + "readWQPdata", + "readWQPqw","","","", + "whatWQPsites") +Arguments <- c("...","service", #readNWISdata + "siteNumber","parameterCd","startDate","endDate","statCd", #readNWISdv + "siteNumber","parameterCd","startDate","endDate","expanded", #readNWISqw + "siteNumber","parameterCd","startDate","endDate", #readNWISuv + "siteNumber","type", #readNWISrating + "siteNumber","startDate","endDate", #readNWISmeas + "siteNumber","startDate","endDate", #readNWISpeak + "siteNumber","startDate","endDate", #readNWISgwl + "stateCd","countyCd","years","categories", #readNWISuse + "siteNumbers","parameterCd","startDate","endDate","statReportType","statType", #readNWISstat + "parameterCd", #readNWISpCode + "siteNumber", #readNWISsite + "...", #whatNWISsites + "siteNumber","service", #whatNWISdata + "...", #readWQPdata + "siteNumber","parameterCd","startDate","endDate", #readWQPqw + "...") #whatWQPsites +Description <- c("NWIS data using user-specified queries","", #readNWISdata + "NWIS daily data","","","","", #readNWISdv + "NWIS water quality data","","","","", #readNWISqw + "NWIS instantaneous value data","","","", #readNWISuv + "NWIS rating table for active streamgage","", #readNWISrating + "NWIS surface-water measurements","","", #readNWISmeas + "NWIS peak flow data","","", #readNWISpeak + "NWIS groundwater level measurements","","", #readNWISgwl + "NWIS water use","","","", #readNWISuse + "NWIS statistical service","","","","","", #readNWISstat + "NWIS parameter code information", #readNWISpCode + "NWIS site information", #readNWISsite + "NWIS site search using user-specified queries", + "NWIS data availability, including period of record and count","", + "WQP data using user-specified queries", + "WQP data","","","", + "WQP site search using user-specified queries") + +data.df <- data.frame(`Function Name` = Functions, Arguments, Description, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 1: dataRetrieval functions", + rnames=FALSE, align=c("l","l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + +## ----tableParameterCodes, echo=FALSE---------------------- + + pCode <- c('00060', '00065', '00010','00045','00400') -shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") +shortName <- c("Discharge [ft3/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) -print(xtable(data.df, - label="tab:params", - caption="Common USGS Parameter Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.text.function = function(x) {x}, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) +htmlTable(data.df, + caption="Table 2: Common USGS Parameter Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") -## ----tableStatCodes, echo=FALSE,results='asis'------------ + +## ----tableStatCodes, echo=FALSE--------------------------- StatCode <- c('00001', '00002', '00003','00008') shortName <- c("Maximum","Minimum","Mean", "Median") data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) -print(xtable(data.df,label="tab:stat", - caption="Commonly used USGS Stat Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) +htmlTable(data.df, + caption="Table 3: Commonly used USGS Stat Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + ## ----getSite, echo=TRUE, eval=FALSE----------------------- @@ -89,33 +147,28 @@ print(xtable(data.df,label="tab:stat", # # -## ----tablegda, echo=FALSE,eval=FALSE---------------------- -# tableData <- with(dailyDataAvailable, -# data.frame( -# siteNumber= site_no, -# srsname=srsname, -# startDate=as.character(begin_date), -# endDate=as.character(end_date), -# count=as.character(count_nu), -# units=parameter_units, -# # statCd = stat_cd, -# stringsAsFactors=FALSE) -# ) -# -# tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s" -# tableData$units[which(tableData$units == "uS/cm @25C")] <- "$\\mu$S/cm @25C" -# -# -# print(xtable(tableData,label="tab:gda", -# caption="Reformatted version of output from \\texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]"), -# caption.placement="top", -# size = "\\footnotesize", -# latex.environment=NULL, -# sanitize.text.function = function(x) {x}, -# sanitize.colnames.function = bold.colHeaders, -# sanitize.rownames.function = addSpace -# ) -# +## ----echo=FALSE------------------------------------------- + +tableData <- data.frame( + siteNumber = c("01491000","01491000","01645000","01491000","01491000","01491000"), + srsname = c("Temperature, water","Stream flow, mean daily", + "Stream flow, mean daily", + "Specific conductance", + "Suspended sediment concentration (SSC)", + "Suspended sediment discharge" ), + startDate = c("2010-10-01","1948-01-01","1930-09-26","2010-10-01","1980-10-01","1980-10-01"), + endDate = c("2012-05-09","2017-05-17","2017-05-17","2012-05-09","1991-09-30","1991-09-30"), + count = c("529","25340","31646","527","4017","4017"), + units = c("deg C","ft3/s","ft3/s","uS/cm @25C","mg/l","tons/day"), + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]", + rnames=FALSE, + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + ## ----label=getPCodeInfo, echo=TRUE, eval=FALSE------------ # # Using defaults: @@ -167,7 +220,7 @@ variableInfo <- attr(temperatureAndFlow, "variableInfo") siteInfo <- attr(temperatureAndFlow, "siteInfo") -## ----getNWIStemperaturePlot, echo=TRUE, fig.cap="Temperature and discharge plot of Choptank River in 2012.",out.width='1\\linewidth',out.height='1\\linewidth',fig.show='hold'---- +## --------------------------------------------------------- variableInfo <- attr(temperatureAndFlow, "variableInfo") siteInfo <- attr(temperatureAndFlow, "siteInfo") @@ -205,8 +258,8 @@ legend("topleft", variableInfo$param_units, # startDate, endDate) # # # Or the wide return: -# # dfWide <- readNWISqw(siteNumber, parameterCd, -# # startDate, endDate, reshape=TRUE) +# dfWide <- readNWISqw(siteNumber, parameterCd, +# startDate, endDate, reshape=TRUE) # ## ----qwmeta, echo=TRUE, eval=FALSE------------------------ @@ -232,15 +285,57 @@ legend("topleft", variableInfo$param_units, # surfaceData <- readNWISmeas(siteNumber) # +## ----eval=FALSE------------------------------------------- +# allegheny <- readNWISuse(stateCd = "Pennsylvania", +# countyCd = "Allegheny") +# +# +# national <- readNWISuse(stateCd = NULL, +# countyCd = NULL, +# transform = TRUE) +# + +## ----eval=FALSE------------------------------------------- +# discharge_stats <- readNWISstat(siteNumbers=c("02319394"), +# parameterCd=c("00060"), +# statReportType="annual") +# + ## ----label=getQWData, echo=TRUE, eval=FALSE--------------- # specificCond <- readWQPqw('WIDNR_WQX-10032762', -# 'Specific conductance','2011-05-01','2011-09-30') +# 'Specific conductance', +# '2011-05-01','2011-09-30') ## ----siteSearch, eval=FALSE------------------------------- # sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), # parameterCd=c("00010","00060"), # hasDataTypeCd="dv") +## ----echo=FALSE------------------------------------------- + +Service <- c("dv","iv","gwlevels","qwdata","measurements","peak","stat") +Description <- c("Daily","Instantaneous","Groundwater Levels","Water Quality","Surface Water Measurements","Peak Flow","Statistics Service") +URL <- c("https://waterservices.usgs.gov/rest/DV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/IV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html", + "https://nwis.waterdata.usgs.gov/nwis/qwdata", + "https://waterdata.usgs.gov/nwis/measurements/", + "https://nwis.waterdata.usgs.gov/usa/nwis/peak/", + "https://waterservices.usgs.gov/rest/Statistics-Service-Test-Tool.html") + +tableData <- data.frame(Service, + Description, + URL, + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 5: NWIS general data calls", + rnames=FALSE, align=c("l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + ## ----dataExample, eval=FALSE------------------------------ # dischargeWI <- readNWISdata(service="dv", # stateCd="WI", @@ -258,10 +353,19 @@ legend("topleft", variableInfo$param_units, # ## ----phData, eval=FALSE----------------------------------- -# # dataPH <- readWQPdata(statecode="US:55", # characteristicName="pH") -# + +## ----eval=FALSE------------------------------------------- +# type <- "Stream" +# sites <- whatWQPdata(countycode="US:55:025",siteType=type) + +## ----eval=FALSE------------------------------------------- +# site <- whatWQPsamples(siteid="USGS-01594440") + +## ----eval=FALSE------------------------------------------- +# type <- "Stream" +# sites <- whatWQPmetrics(countycode="US:55:025",siteType=type) ## ----meta1, eval=FALSE------------------------------------ # @@ -289,10 +393,7 @@ legend("topleft", variableInfo$param_units, # comment(peakData) # # #Which is equivalent to: -# # attr(peakData, "comment") - -## ----helpFunc,eval = FALSE-------------------------------- -# ?readNWISpCode +# attr(peakData, "comment") ## ----seeVignette,eval = FALSE----------------------------- # vignette(dataRetrieval) diff --git a/inst/doc/dataRetrieval.Rmd b/inst/doc/dataRetrieval.Rmd new file mode 100644 index 00000000..1085d715 --- /dev/null +++ b/inst/doc/dataRetrieval.Rmd @@ -0,0 +1,762 @@ +--- +title: "Introduction to the dataRetrieval package" +date: "`r format(Sys.time(), '%d %B, %Y')`" +author: Laura A. DeCicco and Robert M. Hirsch +output: + rmarkdown::html_vignette: + toc: true + number_sections: true +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{Introduction to the dataRetrieval package} + \usepackage[utf8]{inputenc} +--- + + +```{r setup, include=FALSE, message=FALSE} +library(knitr) +library(dataRetrieval) + +options(continue=" ") +options(width=60) +knitr::opts_chunk$set(echo = TRUE, + warning = FALSE, + message = FALSE, + fig.height = 7, + fig.width = 7) + +opts_chunk$set(highlight=TRUE, + tidy=TRUE, + keep.space=TRUE, + keep.blank.space=FALSE, + keep.comment=TRUE) +knit_hooks$set(inline = function(x) { + if (is.numeric(x)) round(x, 3)}) +``` + +The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). + +For information on getting started in R and installing the package, see [Getting Started](#getting-started-in-r). Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. + +A quick workflow for USGS `dataRetrieval` functions: + +```{r workflow, echo=TRUE,eval=FALSE} +library(dataRetrieval) +# Choptank River near Greensboro, MD +siteNumber <- "01491000" +ChoptankInfo <- readNWISsite(siteNumber) +parameterCd <- "00060" + +#Raw daily data: +rawDailyData <- readNWISdv(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +# Sample data Nitrate: +parameterCd <- "00618" +qwData <- readNWISqw(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +pCode <- readNWISpCode(parameterCd) + +``` + +USGS data are made available through the National Water Information System (NWIS). + +Table 1 describes the functions available in the `dataRetrieval` package. + +```{r echo=FALSE} + +library(htmlTable) + +Functions <- c("readNWISdata","", + "readNWISdv","","","","", + "readNWISqw","","","","", + "readNWISuv","","","", + "readNWISrating","", + "readNWISmeas","","", + "readNWISpeak","","", + "readNWISgwl","","", + "readNWISuse","","","", + "readNWISstat","","","","","", + "readNWISpCode", + "readNWISsite", + "whatNWISsites", + "whatNWISdata","", + "readWQPdata", + "readWQPqw","","","", + "whatWQPsites") +Arguments <- c("...","service", #readNWISdata + "siteNumber","parameterCd","startDate","endDate","statCd", #readNWISdv + "siteNumber","parameterCd","startDate","endDate","expanded", #readNWISqw + "siteNumber","parameterCd","startDate","endDate", #readNWISuv + "siteNumber","type", #readNWISrating + "siteNumber","startDate","endDate", #readNWISmeas + "siteNumber","startDate","endDate", #readNWISpeak + "siteNumber","startDate","endDate", #readNWISgwl + "stateCd","countyCd","years","categories", #readNWISuse + "siteNumbers","parameterCd","startDate","endDate","statReportType","statType", #readNWISstat + "parameterCd", #readNWISpCode + "siteNumber", #readNWISsite + "...", #whatNWISsites + "siteNumber","service", #whatNWISdata + "...", #readWQPdata + "siteNumber","parameterCd","startDate","endDate", #readWQPqw + "...") #whatWQPsites +Description <- c("NWIS data using user-specified queries","", #readNWISdata + "NWIS daily data","","","","", #readNWISdv + "NWIS water quality data","","","","", #readNWISqw + "NWIS instantaneous value data","","","", #readNWISuv + "NWIS rating table for active streamgage","", #readNWISrating + "NWIS surface-water measurements","","", #readNWISmeas + "NWIS peak flow data","","", #readNWISpeak + "NWIS groundwater level measurements","","", #readNWISgwl + "NWIS water use","","","", #readNWISuse + "NWIS statistical service","","","","","", #readNWISstat + "NWIS parameter code information", #readNWISpCode + "NWIS site information", #readNWISsite + "NWIS site search using user-specified queries", + "NWIS data availability, including period of record and count","", + "WQP data using user-specified queries", + "WQP data","","","", + "WQP site search using user-specified queries") + +data.df <- data.frame(`Function Name` = Functions, Arguments, Description, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 1: dataRetrieval functions", + rnames=FALSE, align=c("l","l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + + +# USGS Web Retrievals + +In this section, examples of National Water Information System (NWIS) retrievals show how to get raw data into R. This data includes [site information](#site-information), measured [parameter information](#parameter-information), historical [daily values](#daily-data), [unit values](#unit-data) (which include real-time data but can also include other sensor data stored at regular time intervals), [water quality data](#water-quality-data), [groundwater level data](#groundwater-level-data), [peak flow data](#peak-flow-data), [rating curve data](#rating-curve-data), [surface-water measurement data](#surface-water-measurement-data), [water use data](#water-use-data), and [statistics data](#statistics-data). The section [Embedded Metadata](#embedded-metadata) shows instructions for getting metadata that is attached to each returned data frame. + +The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the `dataRetrieval` package as `siteNumber`). Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this `siteNumber`. There are many ways to do this, one is the [National Water Information System: Mapper](https://maps.waterdata.usgs.gov/mapper/index.html). + +Once the `siteNumber` is known, the next required input for USGS data retrievals is the "parameter code". This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents "Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen", with units of "mg/l as N". + +Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table 2. + +```{r tableParameterCodes, echo=FALSE} + + +pCode <- c('00060', '00065', '00010','00045','00400') +shortName <- c("Discharge [ft3/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") + +data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 2: Common USGS Parameter Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + +``` + +Two output columns that may not be obvious are "srsname" and "casrn". Srsname stands for "Substance Registry Services". More information on the srs name can be found [here](http://ofmpub.epa.gov/sor_internet/registry/substreg/home/overview/home.do). + +Casrn stands for "Chemical Abstracts Service (CAS) Registry Number". More information on CAS can be found [here](http://www.cas.org/content/chemical-substances/faqs). + +For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and `siteNumber` is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code. + +Some common codes are shown in Table 3. + +```{r tableStatCodes, echo=FALSE} +StatCode <- c('00001', '00002', '00003','00008') +shortName <- c("Maximum","Minimum","Mean", "Median") + +data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 3: Commonly used USGS Stat Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + + +``` + +Examples for using these site numbers, parameter codes, and statistic codes will be presented in subsequent sections. + +There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as "Ice". Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns), unless the user sets an argument `convertType` to `FALSE`. In that case, the data is returned as a data frame that is entirely character columns. + +## Site Information + + +### readNWISsite + +Use the `readNWISsite` function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. `readNWISsite` can also access information about multiple sites with a vector input. + + +```{r getSite, echo=TRUE, eval=FALSE} +siteNumbers <- c("01491000","01645000") +siteINFO <- readNWISsite(siteNumbers) +``` + +Site information is obtained from: +[https://waterservices.usgs.gov/rest/Site-Test-Tool.html](https://waterservices.usgs.gov/rest/Site-Test-Tool.html) + +Information on the returned data can be found with the `comment` function as described in the [Metadata](#embedded-metadata) section. + +```{r siteNames3, echo=TRUE, eval=FALSE} +comment(siteINFO) +``` + + +### whatNWISdata + +To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the `whatNWISdata` function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: "dv" (daily values), "uv", or "iv" (unit values), "qw" (water-quality), "sv" (sites visits), "pk" (peak measurements), "gw" (groundwater levels), "ad" (sites included in USGS Annual Water Data Reports External Link), "aw" (sites monitored by the USGS Active Groundwater Level Network External Link), and "id" (historical instantaneous values). + +In the following example, we limit the retrieved data to only daily data. The default for "service" is `all`, which returns all of the available data for that site. Likewise, there are arguments for parameter code (`parameterCd`) and statistic code (`statCd`) to filter the results. The default for both is to return all possible values (`all`). The returned `count_nu` for "uv" data is the count of days with returned data, not the actual count of returned values. + + +```{r getSiteExtended, echo=TRUE, eval=FALSE} +# Continuing from the previous example: +# This pulls out just the daily, mean data: + +dailyDataAvailable <- whatNWISdata(siteNumbers, + service="dv", statCd="00003") + + +``` + +```{r echo=FALSE} + +tableData <- data.frame( + siteNumber = c("01491000","01491000","01645000","01491000","01491000","01491000"), + srsname = c("Temperature, water","Stream flow, mean daily", + "Stream flow, mean daily", + "Specific conductance", + "Suspended sediment concentration (SSC)", + "Suspended sediment discharge" ), + startDate = c("2010-10-01","1948-01-01","1930-09-26","2010-10-01","1980-10-01","1980-10-01"), + endDate = c("2012-05-09","2017-05-17","2017-05-17","2012-05-09","1991-09-30","1991-09-30"), + count = c("529","25340","31646","527","4017","4017"), + units = c("deg C","ft3/s","ft3/s","uS/cm @25C","mg/l","tons/day"), + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]", + rnames=FALSE, + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + +See [Creating Tables](#creating-tables-in-microsoft-software-from-r) for instructions on converting an R data frame to a table in Microsoft® software Excel or Word to display a data availability table similar to Table 4. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. + +## Parameter Information + +To obtain all of the available information concerning a measured parameter (or multiple parameters), use the `readNWISpCode` function: + +```{r label=getPCodeInfo, echo=TRUE, eval=FALSE} +# Using defaults: +parameterCd <- "00618" +parameterINFO <- readNWISpCode(parameterCd) +``` + + +## Daily Data + +To obtain daily records of USGS data, use the `readNWISdv` function. The arguments for this function are `siteNumber`, `parameterCd`, `startDate`, `endDate`, and `statCd` (defaults to "00003"). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from [https://waterservices.usgs.gov/rest/DV-Test-Tool.html](https://waterservices.usgs.gov/rest/DV-Test-Tool.html). + +The dates (start and end) must be in the format "YYYY-MM-DD" (note: the user must include the quotes). Setting the start date to "" (no space) will prompt the program to ask for the earliest date, and setting the end date to "" (no space) will prompt for the latest available date. + +```{r label=getNWISDaily, echo=TRUE, eval=FALSE} + +# Choptank River near Greensboro, MD: +siteNumber <- "01491000" +parameterCd <- "00060" # Discharge +startDate <- "2009-10-01" +endDate <- "2012-09-30" + +discharge <- readNWISdv(siteNumber, + parameterCd, startDate, endDate) +``` + +The column "datetime" in the returned data frame is automatically imported as a variable of class "Date" in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often "A" (approved for publication) or "P" (provisional data subject to revision). + +Another example would be a request for mean and maximum daily temperature and discharge in early 2012: + +```{r label=getNWIStemperature, echo=TRUE, eval=FALSE} +siteNumber <- "01491000" +parameterCd <- c("00010","00060") # Temperature and discharge +statCd <- c("00001","00003") # Mean and maximum +startDate <- "2012-01-01" +endDate <- "2012-05-01" + +temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, + startDate, endDate, statCd=statCd) + +``` + +```{r label=getNWIStemperature2, echo=FALSE, eval=TRUE} +filePath <- system.file("extdata", package="dataRetrieval") +fileName <- "temperatureAndFlow.RData" +fullPath <- file.path(filePath, fileName) +load(fullPath) + +``` + +The column names can be shortened and simplified using the `renameNWISColumns` function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discussed further in the [metadata](#embedded-metadata) section. + + +```{r label=renameColumns, echo=TRUE} +names(temperatureAndFlow) + +temperatureAndFlow <- renameNWISColumns(temperatureAndFlow) +names(temperatureAndFlow) + +``` + +```{r label=attr1, echo=TRUE} +#Information about the data frame attributes: +names(attributes(temperatureAndFlow)) + +statInfo <- attr(temperatureAndFlow, "statisticInfo") +variableInfo <- attr(temperatureAndFlow, "variableInfo") +siteInfo <- attr(temperatureAndFlow, "siteInfo") + +``` + + + +An example of plotting the above data: + +```{r} +variableInfo <- attr(temperatureAndFlow, "variableInfo") +siteInfo <- attr(temperatureAndFlow, "siteInfo") + +par(mar=c(5,5,5,5)) #sets the size of the plot window + +plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, + ylab=variableInfo$parameter_desc[1],xlab="" ) +par(new=TRUE) +plot(temperatureAndFlow$Date, temperatureAndFlow$Flow, + col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE + ) +axis(4,col="red",col.axis="red") +mtext(variableInfo$parameter_desc[2],side=4,line=3,col="red") +title(paste(siteInfo$station_nm,"2012")) +legend("topleft", variableInfo$param_units, + col=c("black","red"),lty=c(NA,1),pch=c(1,NA)) +``` + + +## Unit Data + +Any data collected at regular time intervals (such as 15-minute or hourly) are known as "unit values". Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function `readNWISuv`. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. + +```{r label=readNWISuv, eval=FALSE} + +parameterCd <- "00060" # Discharge +startDate <- "2012-05-12" +endDate <- "2012-05-13" +dischargeUnit <- readNWISuv(siteNumber, parameterCd, + startDate, endDate) +dischargeUnit <- renameNWISColumns(dischargeUnit) +``` + +The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the `startDate` through the `endDate` (starting and ending with midnight locally-collected time). The dateTime column is converted to UTC (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm). + +To override the UTC timezone, specify a valid timezone in the tz argument. Default is "", which will keep the dateTime column in UTC. Other valid timezones are: + +``` +America/New_York +America/Chicago +America/Denver +America/Los_Angeles +America/Anchorage +America/Honolulu +America/Jamaica +America/Managua +America/Phoenix +America/Metlakatla +``` + +Data are retrieved from [https://waterservices.usgs.gov/rest/IV-Test-Tool.html](https://waterservices.usgs.gov/rest/IV-Test-Tool.html). There are occasions where NWIS values are not reported as numbers, instead a common example is "Ice". Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in [metadata](#embedded-metadata) section. + + +## Water Quality Data + +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function `readNWISqw`, with the input arguments: `siteNumber`, `parameterCd`, `startDate`, and `endDate`. Additionally, the argument `expanded` is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (`expanded=FALSE`), or a more complete and verbose output (`expanded=TRUE`). `expanded = TRUE` includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument "reshape", that converts the expanded dataset to a "wide" format (each requested parameter code gets individual columns). The defaults are `expanded=TRUE`, and `reshape=FALSE`. + +```{r label=getQW, echo=TRUE, eval=FALSE} + +# Dissolved Nitrate parameter codes: +parameterCd <- c("00618","71851") +startDate <- "1985-10-01" +endDate <- "2012-09-30" + +dfLong <- readNWISqw(siteNumber, parameterCd, + startDate, endDate) + +# Or the wide return: +dfWide <- readNWISqw(siteNumber, parameterCd, + startDate, endDate, reshape=TRUE) + +``` + +Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in the [metadata](#embedded-metadata) section. Additional metadata, such as information about the column names can be found by using the `comment` function, also described in the [metadata](#embedded-metadata) section. + +```{r qwmeta, echo=TRUE, eval=FALSE} + +comment(dfLong) + +``` + + +## Groundwater Level Data + +Groundwater level measurements can be obtained with the `readNWISgwl` function. Information on the returned data can be found with the `comment` function, and attached attributes as described in the [metadata](#embedded-metadata) section. + +```{r gwlexample, echo=TRUE, eval=FALSE} +siteNumber <- "434400121275801" +groundWater <- readNWISgwl(siteNumber) +``` + + +## Peak Flow Data + +Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `readNWISpeak` function. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +```{r peakexample, echo=TRUE, eval=FALSE} +siteNumber <- '01594440' +peakData <- readNWISpeak(siteNumber) + +``` + + +## Rating Curve Data + +Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +Rating curves can be obtained with the `readNWISrating` function. + +```{r ratingexample, echo=TRUE, eval=FALSE} +ratingData <- readNWISrating(siteNumber, "base") +attr(ratingData, "RATING") + +``` + + +## Surface-Water Measurement Data + +These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +Surface-water measurement data can be obtained with the `readNWISmeas` function. + +```{r surfexample, echo=TRUE, eval=FALSE} +surfaceData <- readNWISmeas(siteNumber) + +``` + + +## Water Use Data +Retrieves water use data from USGS Water Use Data for the Nation. See [https://waterdata.usgs.gov/nwis/wu](https://waterdata.usgs.gov/nwis/wu) for more information. All available use categories for the supplied arguments are retrieved. + +```{r eval=FALSE} +allegheny <- readNWISuse(stateCd = "Pennsylvania", + countyCd = "Allegheny") + + +national <- readNWISuse(stateCd = NULL, + countyCd = NULL, + transform = TRUE) + +``` + +## Statistics Data +Retrieves site statistics from the USGS Statistics Web Service beta. + +```{r eval=FALSE} +discharge_stats <- readNWISstat(siteNumbers=c("02319394"), + parameterCd=c("00060"), + statReportType="annual") + +``` + + +# Water Quality Portal Web Retrievals + +There are additional water quality data sets available from the [Water Quality Data Portal](https://www.waterqualitydata.us/). These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future. Because only USGS uses parameter codes, a "characteristic name" must be supplied. The `readWQPqw` function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. + + +```{r label=getQWData, echo=TRUE, eval=FALSE} +specificCond <- readWQPqw('WIDNR_WQX-10032762', + 'Specific conductance', + '2011-05-01','2011-09-30') +``` + +A tool for finding NWIS characteristic names can be found [here](https://www.waterqualitydata.us/public_srsnames/) + + +# Generalized Retrievals + +The previous examples all took specific input arguments: `siteNumber`, `parameterCd` (or characteristic name), `startDate`, `endDate`, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. + +## NWIS + +### Sites: whatNWISsites + +The function `whatNWISsites` can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the `...` argument, which allows the user to use any arbitrary input argument. We can then use the service [here](https://waterservices.usgs.gov/rest/Site-Test-Tool.html) to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: + +[https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv](https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv) + +The following `dataRetrieval` code can be used to get those sites: + +```{r siteSearch, eval=FALSE} +sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), + parameterCd=c("00010","00060"), + hasDataTypeCd="dv") +``` + +### Data: readNWISdata + +For NWIS data, the function `readNWISdata` can be used. The argument listed in the R help file is `...` and `service` (only for data requests). Table 5 describes the services are available. + +```{r echo=FALSE} + +Service <- c("dv","iv","gwlevels","qwdata","measurements","peak","stat") +Description <- c("Daily","Instantaneous","Groundwater Levels","Water Quality","Surface Water Measurements","Peak Flow","Statistics Service") +URL <- c("https://waterservices.usgs.gov/rest/DV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/IV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html", + "https://nwis.waterdata.usgs.gov/nwis/qwdata", + "https://waterdata.usgs.gov/nwis/measurements/", + "https://nwis.waterdata.usgs.gov/usa/nwis/peak/", + "https://waterservices.usgs.gov/rest/Statistics-Service-Test-Tool.html") + +tableData <- data.frame(Service, + Description, + URL, + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 5: NWIS general data calls", + rnames=FALSE, align=c("l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + + +The `...` argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi2, and the most recent daily discharge data. That request would be done as follows: + +```{r dataExample, eval=FALSE} +dischargeWI <- readNWISdata(service="dv", + stateCd="WI", + parameterCd="00060", + drainAreaMin="50", + statCd="00003") + +siteInfo <- attr(dischargeWI, "siteInfo") + +``` + + +## WQP + +Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found [here](https://www.waterqualitydata.us/webservices_documentation). + +### Sites: whatWQPsites + +To discover available sites in the WQP in New Jersey that have measured Chloride, use the function `whatWQPsites`. + +```{r NJChloride, eval=FALSE} + +sitesNJ <- whatWQPsites(statecode="US:34", + characteristicName="Chloride") + +``` + + +### Data: readWQPdata + +To get data from the WQP using generalized Web service calls, use the function `readWQPdata`. For example, to get all the pH data in Wisconsin: + +```{r phData, eval=FALSE} +dataPH <- readWQPdata(statecode="US:55", + characteristicName="pH") +``` + + +### Availability: whatWQPdata + +The function `whatWQPdata` returns a data frame with information on the amount of data collected at a site. For example: + +```{r eval=FALSE} +type <- "Stream" +sites <- whatWQPdata(countycode="US:55:025",siteType=type) +``` + +This returns a data frame with all of the sites that were measured in streams in Dane County, WI. Also, in that table, there is a measure of `activityCount` (how often the site was sampled), and `resultCount` (how many individual results are available). + +### Samples: whatWQPsamples + +The function `whatWQPsamples` returns information on the individual samples collected at a site. For example: + +```{r eval=FALSE} +site <- whatWQPsamples(siteid="USGS-01594440") +``` + +This returns one row for each instance that a sample was collect. + +### Metrics: whatWQPmetrics + +The function `whatWQPmetrics` provides metric information. This is only currently available for STORET data: + +```{r eval=FALSE} +type <- "Stream" +sites <- whatWQPmetrics(countycode="US:55:025",siteType=type) +``` + + +# Embedded Metadata + +All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a `url` (returning a character of the url used to obtain the data), `siteInfo` (returning a data frame with information on sites), and `queryTime` (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows: + +```{r meta1, eval=FALSE} + +attr(dischargeWI, "url") + +attr(dischargeWI, "queryTime") + +siteInfo <- attr(dischargeWI, "siteInfo") + +``` + +Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes: + +```{r meta2, eval=FALSE} + +names(attributes(dischargeWI)) + +``` + +For data obtained from `readNWISuv`, `readNWISdv`, `readNWISgwl` there are two attributes that are particularly useful: `siteInfo` and `variableInfo`. + +```{r meta3, eval=FALSE} + +siteInfo <- attr(dischargeWI, "siteInfo") + +variableInfo <- attr(dischargeWI, "variableInfo") + + +``` + +Data obtained from `readNWISpeak`, `readNWISmeas`, and `readNWISrating`, the `comment` attribute is useful. + +```{r meta5, eval=FALSE} +comment(peakData) + +#Which is equivalent to: +attr(peakData, "comment") +``` + + +# Getting Started in R + +This section describes the options for downloading and installing the `dataRetrieval` package. + +## New to R? + +If you are new to R, you will need to first install the latest version of R, which can be found [here] (www.R-project.org). + +At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to the image below. To see the raw code for a particular code, type the name of the function, without parentheses. + + +``` +?readNWISpCode +``` + + +![A simple R help file](Rhelp.png) + + +Additionally, many R packages have vignette files attached (such as this paper). To view the vignette: +```{r seeVignette,eval = FALSE} +vignette(dataRetrieval) +``` + + +## R User: Installing dataRetrieval + +The following command installs `dataRetrieval` and subsequent required packages: + +```{r installFromCran,eval = FALSE} +install.packages("dataRetrieval") +``` + +After installing the package, you need to open the library each time you re-start R. This is done with the simple command: + +```{r openLibraryTest, eval=FALSE} +library(dataRetrieval) +``` + + + +# Creating Tables in Microsoft® Software from R + +There are a few steps that are required in order to create a table in Microsoft® software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData: + +```{r label=getSiteApp, echo=TRUE, eval=FALSE} +availableData <- whatNWISdata(siteNumber, "dv") +dailyData <- availableData["00003" == availableData$stat_cd,] + +tableData <- with(dailyData, + data.frame( + shortName=srsname, + Start=begin_date, + End=end_date, + Count=count_nu, + Units=parameter_units) + ) +``` + +First, save the data frame as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements): + + +```{r label=saveData, echo=TRUE, eval=FALSE} +write.table(tableData, file="tableData.tsv",sep="\t", + row.names = FALSE,quote=FALSE) +``` + +This will save a file in your working directory called tableData.tsv. You can see your working directory by typing `getwd()` in the R console. Opening the file in a general-purpose text editor, you should see the following: + +``` +shortName Start End Count Units +Temperature, water 2010-10-01 2012-06-24 575 deg C +Stream flow, mean. daily 1948-01-01 2013-03-13 23814 ft3/s +Specific conductance 2010-10-01 2012-06-24 551 uS/cm 25C +Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +``` + +Next, follow the steps below to open this file in Excel: + +1. Open Excel +2. Click on the File tab +3. Click on the Open option +4. Navigate to the working directory (as shown in the results of `getwd()`) +5. Next to the File name text box, change the drop down type to All Files (*.*) +6. Double click tableData.tsv +7. A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. +8. In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. +9. Use the many formatting tools within Excel to customize the table + +From Excel, it is simple to copy and paste the tables in other Microsoft® software. An example using one of the default Excel table formats is here. Additional formatting could be required in Excel, for example converting u to $\mu$. + +![A simple table produced in Microsoft ® Excel.](table1.png) + + +# Disclaimer +This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information. + + diff --git a/inst/doc/dataRetrieval.Rnw b/inst/doc/dataRetrieval.Rnw deleted file mode 100644 index 16dfa930..00000000 --- a/inst/doc/dataRetrieval.Rnw +++ /dev/null @@ -1,960 +0,0 @@ -%\VignetteIndexEntry{Introduction to the dataRetrieval package} -%\VignetteEngine{knitr::knitr} -%\VignetteDepends{} -%\VignetteSuggests{xtable, testthat} -%\VignetteImports{XML, httr, reshape2,lubridate,utils,stats} -%\VignettePackage{dataRetrieval} - -\documentclass[a4paper,11pt]{article} - -\usepackage{amsmath} -\usepackage{times} -\usepackage{hyperref} -\usepackage[numbers, round]{natbib} -\usepackage[american]{babel} -\usepackage{authblk} -\usepackage{subfig} -\usepackage{placeins} -\usepackage{footnote} -\usepackage{tabularx} -\usepackage{threeparttable} -\usepackage{parskip} - -\usepackage{csquotes} -\usepackage{setspace} - -% \doublespacing - -\renewcommand{\topfraction}{0.85} -\renewcommand{\textfraction}{0.1} -\usepackage{graphicx} - - -\usepackage{mathptmx}% Times Roman font -\usepackage[scaled=.90]{helvet}% Helvetica, served as a model for arial - -% \usepackage{indentfirst} -% \setlength\parindent{20pt} -\setlength{\parskip}{0pt} - -\usepackage{courier} - -\usepackage{titlesec} -\usepackage{titletoc} - -\titleformat{\section} - {\normalfont\sffamily\bfseries\LARGE} - {\thesection}{0.5em}{} -\titleformat{\subsection} - {\normalfont\sffamily\bfseries\Large} - {\thesubsection}{0.5em}{} -\titleformat{\subsubsection} - {\normalfont\sffamily\large} - {\thesubsubsection}{0.5em}{} - -\titlecontents{section} -[2em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{subsection} -[4.6em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{subsubsection} -[6.9em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{table} -[0em] % adjust left margin -{\sffamily} % font formatting -{Table\hspace*{2em} \contentslabel {2em}} % section label and offset -{\hspace*{4em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{figure} -[0em] % adjust left margin -{\sffamily} % font formatting -{Figure\hspace*{2em} \contentslabel {2em}} % section label and offset -{\hspace*{4em}} -{\titlerule*[0.25pc]{.}\contentspage} - -%Italisize and change font of urls: -\urlstyle{sf} -\renewcommand\UrlFont\itshape - -\usepackage{caption} -\captionsetup{ - font={sf}, - labelfont={bf,sf}, - labelsep=period, - justification=justified, - singlelinecheck=false -} - - - -\textwidth=6.2in -\textheight=8.5in -\parskip=.3cm -\oddsidemargin=.1in -\evensidemargin=.1in -\headheight=-.3in - - -%------------------------------------------------------------ -% newcommand -%------------------------------------------------------------ -\newcommand{\scscst}{\scriptscriptstyle} -\newcommand{\scst}{\scriptstyle} -\newcommand{\Robject}[1]{{\texttt{#1}}} -\newcommand{\Rfunction}[1]{{\texttt{#1}}} -\newcommand{\Rclass}[1]{\textit{#1}} -\newcommand{\Rpackage}[1]{\textit{#1}} -\newcommand{\Rexpression}[1]{\texttt{#1}} -\newcommand{\Rmethod}[1]{{\texttt{#1}}} -\newcommand{\Rfunarg}[1]{{\texttt{#1}}} - -\begin{document} - -<>= -library(xtable) -options(continue=" ") -options(width=60) -library(knitr) - -@ - -\renewenvironment{knitrout}{\begin{singlespace}}{\end{singlespace}} -\renewcommand*\listfigurename{Figures} - -\renewcommand*\listtablename{Tables} - - -%------------------------------------------------------------ -\title{The dataRetrieval R package} -%------------------------------------------------------------ -\author[1]{Laura A. De Cicco} -\author[1]{Robert M. Hirsch} -\affil[1]{United States Geological Survey} - - -<>= -opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="") -knit_hooks$set(inline = function(x) { - if (is.numeric(x)) round(x, 3)}) -knit_hooks$set(crop = hook_pdfcrop) - -bold.colHeaders <- function(x) { - x <- gsub("\\^(\\d)","$\\^\\1$",x) - x <- gsub("\\%","\\\\%",x) - x <- gsub("\\_"," ",x) - returnX <- paste("\\multicolumn{1}{c}{\\textbf{\\textsf{", x, "}}}", sep = "") -} -addSpace <- function(x) ifelse(x != "1", "[5pt]","") -library(dataRetrieval) -@ - -\noindent{\huge\textsf{\textbf{The dataRetrieval R package}}} - -\noindent\textsf{By Laura A. De Cicco and Robert M. Hirsch} - -\noindent\textsf{\today} - -% \maketitle -% -% \newpage - -\tableofcontents -\listoffigures -\listoftables - -\newpage - -%------------------------------------------------------------ -\section{Introduction to dataRetrieval} -%------------------------------------------------------------ -The dataRetrieval package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). - -For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. - -A quick workflow for USGS dataRetrieval functions: - -<>= -library(dataRetrieval) -# Choptank River near Greensboro, MD -siteNumber <- "01491000" -ChoptankInfo <- readNWISsite(siteNumber) -parameterCd <- "00060" - -#Raw daily data: -rawDailyData <- readNWISdv(siteNumber,parameterCd, - "1980-01-01","2010-01-01") - -# Sample data Nitrate: -parameterCd <- "00618" -qwData <- readNWISqw(siteNumber,parameterCd, - "1980-01-01","2010-01-01") - -pCode <- readNWISpCode(parameterCd) - -@ - -USGS data are made available through the National Water Information System (NWIS). - -Table \ref{tab:func} describes the functions available in the dataRetrieval package. - -\begin{table}[!ht] -\begin{minipage}{\linewidth} -{\footnotesize -\caption{dataRetrieval functions} -\label{tab:func} -\begin{tabular}{lll} - \hline -\multicolumn{1}{c}{\textbf{\textsf{Function Name}}} & -\multicolumn{1}{c}{\textbf{\textsf{Arguments}}} & -\multicolumn{1}{c}{\textbf{\textsf{Description}}} \\ [0pt] - \hline - \texttt{readNWISdata} & \texttt{...} & NWIS data using user-specified queries\\ - & service & \\ - [5pt]\texttt{readNWISdv} & siteNumber & NWIS daily data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - & statCd & \\ - [5pt]\texttt{readNWISqw} & siteNumber & NWIS water quality data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - & expanded & \\ - [5pt]\texttt{readNWISuv} & siteNumber & NWIS instantaneous value data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISrating} & siteNumber & NWIS rating table for active streamgage \\ - & type & \\ - [5pt]\texttt{readNWISmeas} & siteNumber & NWIS surface-water measurements \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISpeak} & siteNumber & NWIS peak flow data \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISgwl} & siteNumber & NWIS groundwater level measurements \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISpCode} & parameterCd & NWIS parameter code information\\ - [5pt]\texttt{readNWISsite} & siteNumber & NWIS site information \\ - [5pt]\texttt{whatNWISsites} & \texttt{...} & NWIS site search using user-specified queries \\ - [5pt]\texttt{whatNWISdata} & siteNumber & NWIS data availability, including period of record and count \\ - & service & \\ - [5pt]\texttt{readWQPdata} & \texttt{...} & WQP data using user-specified queries \\ - [5pt]\texttt{readWQPqw} & siteNumber & WQP data \\ - & parameterCd (or characteristic name) & \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{whatWQPsites} & \texttt{...} & WQP site search using user-specified queries \\ - \hline -\end{tabular} -} -\end{minipage} -\end{table} - -\clearpage - -%------------------------------------------------------------ -\section{USGS Web Retrievals} -\label{sec:genRetrievals} -%------------------------------------------------------------ -In this section, examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), water quality data (\ref{sec:usgsWQP}), groundwater level data (\ref{sec:gwl}), peak flow data (\ref{sec:peak}), rating curve data (\ref{sec:rating}, and surface-water measurement data (\ref{sec:meas}). Section \ref{sec:metadata} shows instructions for getting metadata that is attached to each returned data frame. - -The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the dataRetrieval package as \enquote{siteNumber}). Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this siteNumber. There are many ways to do this, one is the National Water Information System: Mapper \url{https://maps.waterdata.usgs.gov/mapper/index.html}. - -Once the siteNumber is known, the next required input for USGS data retrievals is the \enquote{parameter code}. This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents \enquote{Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen}, with units of \enquote{mg/l as N}. - -Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table \ref{tab:params}. - -<>= -pCode <- c('00060', '00065', '00010','00045','00400') -shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") - -data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) - -print(xtable(data.df, - label="tab:params", - caption="Common USGS Parameter Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.text.function = function(x) {x}, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -Two output columns that may not be obvious are \enquote{srsname} and \enquote{casrn}. Srsname stands for \enquote{Substance Registry Services}. More information on the srs name can be found here: - -\url{http://ofmpub.epa.gov/sor_internet/registry/substreg/home/overview/home.do} - -Casrn stands for \enquote{Chemical Abstracts Service (CAS) Registry Number}. More information on CAS can be found here: - -\url{http://www.cas.org/content/chemical-substances/faqs} - -For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and siteNumber is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code. - -Some common codes are shown in Table \ref{tab:stat}. - -<>= -StatCode <- c('00001', '00002', '00003','00008') -shortName <- c("Maximum","Minimum","Mean", "Median") - -data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) - -print(xtable(data.df,label="tab:stat", - caption="Commonly used USGS Stat Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -Examples for using these siteNumbers, parameter codes, and stat codes will be presented in subsequent sections. - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Site Information} -\label{sec:usgsSite} -%------------------------------------------------------------ - -%------------------------------------------------------------ -\subsubsection{readNWISsite} -\label{sec:usgsSiteFileData} -%------------------------------------------------------------ -Use the \texttt{readNWISsite} function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. \texttt{readNWISsite} can also access information about multiple sites with a vector input. - - -<>= -siteNumbers <- c("01491000","01645000") -siteINFO <- readNWISsite(siteNumbers) -@ - -Site information is obtained from: -\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html} - -Information on the returned data can be found with the \texttt{comment} function as described in section \ref{sec:metadata}. - -<>= -comment(siteINFO) -@ - - - -\FloatBarrier - -%------------------------------------------------------------ -\subsubsection{whatNWISdata} -\label{sec:usgsDataAvailability} -%------------------------------------------------------------ -To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{whatNWISdata} function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"}, \texttt{"}rt\texttt{"}, or \texttt{"}iv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (water-quality), \texttt{"}sv\texttt{"} (sites visits), \texttt{"}pk\texttt{"} (peak measurements), \texttt{"}gw\texttt{"} (groundwater levels), \texttt{"}ad\texttt{"} (sites included in USGS Annual Water Data Reports External Link), \texttt{"}aw\texttt{"} (sites monitored by the USGS Active Groundwater Level Network External Link), and \texttt{"}id\texttt{"} (historical instantaneous values). - -In the following example, we limit the retrieved data to only daily data. The default for \texttt{"}service\texttt{"} is \enquote{all}, which returns all of the available data for that site. Likewise, there are arguments for parameter code (\texttt{parameterCd}) and statistic code (\texttt{statCd}) to filter the results. The default for both is to return all possible values (\enquote{all}). The returned \texttt{"}count\_nu\texttt{"} for \texttt{"}uv\texttt{"} data is the count of days with returned data, not the actual count of returned values. - - -<>= -# Continuing from the previous example: -# This pulls out just the daily, mean data: - -dailyDataAvailable <- whatNWISdata(siteNumbers, - service="dv", statCd="00003") - - -@ - -<>= -tableData <- with(dailyDataAvailable, - data.frame( - siteNumber= site_no, - srsname=srsname, - startDate=as.character(begin_date), - endDate=as.character(end_date), - count=as.character(count_nu), - units=parameter_units, -# statCd = stat_cd, - stringsAsFactors=FALSE) - ) - -tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s" -tableData$units[which(tableData$units == "uS/cm @25C")] <- "$\\mu$S/cm @25C" - - -print(xtable(tableData,label="tab:gda", - caption="Reformatted version of output from \\texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.text.function = function(x) {x}, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -\begin{table}[ht] -\caption{Reformatted version of output from \texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]} -\label{tab:gda} -{\footnotesize -\begin{tabular}{rllllll} - \hline - & \multicolumn{1}{c}{\textbf{\textsf{siteNumber}}} & \multicolumn{1}{c}{\textbf{\textsf{srsname}}} & \multicolumn{1}{c}{\textbf{\textsf{startDate}}} & \multicolumn{1}{c}{\textbf{\textsf{endDate}}} & \multicolumn{1}{c}{\textbf{\textsf{count}}} & \multicolumn{1}{c}{\textbf{\textsf{units}}} \\ - \hline - & 01491000 & Temperature, water & 2010-10-01 & 2012-05-09 & 529 & deg C \\ - [5pt] & 01645000 & Stream flow, mean. daily & 1930-09-26 & 2015-02-19 & 30828 & ft$^3$/s \\ - [5pt] & 01491000 & Stream flow, mean. daily & 1948-01-01 & 2015-02-19 & 24522 & ft$^3$/s \\ - [5pt] & 01491000 & Specific conductance & 2010-10-01 & 2012-05-09 & 527 & $\mu$S/cm @25C \\ - [5pt] & 01491000 & Suspended sediment concentration (SSC) & 1980-10-01 & 1991-09-30 & 3651 & mg/l \\ - [5pt] & 01491000 & Suspended sediment discharge & 1980-10-01 & 1991-09-30 & 3652 & tons/day \\ - \hline -\end{tabular} -} -\end{table} - -See Section \ref{app:createWordTable} for instructions on converting an R data frame to a table in Microsoft\textregistered\ software Excel or Word to display a data availability table similar to Table \ref{tab:gda}. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Parameter Information} -\label{sec:usgsParams} -%------------------------------------------------------------ -To obtain all of the available information concerning a measured parameter (or multiple parameters), use the \texttt{readNWISpCode} function: - -<>= -# Using defaults: -parameterCd <- "00618" -parameterINFO <- readNWISpCode(parameterCd) -@ - - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Daily Data} -\label{sec:usgsDaily} -%------------------------------------------------------------ -To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to \texttt{"}00003\texttt{"}). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}. - -The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. - -<>= - -# Choptank River near Greensboro, MD: -siteNumber <- "01491000" -parameterCd <- "00060" # Discharge -startDate <- "2009-10-01" -endDate <- "2012-09-30" - -discharge <- readNWISdv(siteNumber, - parameterCd, startDate, endDate) -@ - -The column \texttt{"}datetime\texttt{"} in the returned data frame is automatically imported as a variable of class \texttt{"}Date\texttt{"} in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often \texttt{"}A\texttt{"} (approved for publication) or \texttt{"}P\texttt{"} (provisional data subject to revision). - -Another example would be a request for mean and maximum daily temperature and discharge in early 2012: - -<>= -siteNumber <- "01491000" -parameterCd <- c("00010","00060") # Temperature and discharge -statCd <- c("00001","00003") # Mean and maximum -startDate <- "2012-01-01" -endDate <- "2012-05-01" - -temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, - startDate, endDate, statCd=statCd) - -@ - -<>= -filePath <- system.file("extdata", package="dataRetrieval") -fileName <- "temperatureAndFlow.RData" -fullPath <- file.path(filePath, fileName) -load(fullPath) - -@ - -The column names can be shortened and simplified using the \texttt{renameNWISColumns} function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. - - -<>= -names(temperatureAndFlow) - -temperatureAndFlow <- renameNWISColumns(temperatureAndFlow) -names(temperatureAndFlow) - -@ - -<>= -#Information about the data frame attributes: -names(attributes(temperatureAndFlow)) - -statInfo <- attr(temperatureAndFlow, "statisticInfo") -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") - -@ - - - -An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}): - -<>= -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") - -par(mar=c(5,5,5,5)) #sets the size of the plot window - -plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, - ylab=variableInfo$parameter_desc[1],xlab="" ) -par(new=TRUE) -plot(temperatureAndFlow$Date, temperatureAndFlow$Flow, - col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE - ) -axis(4,col="red",col.axis="red") -mtext(variableInfo$parameter_desc[2],side=4,line=3,col="red") -title(paste(siteInfo$station_nm,"2012")) -legend("topleft", variableInfo$param_units, - col=c("black","red"),lty=c(NA,1),pch=c(1,NA)) -@ - - -There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns). - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Unit Data} -\label{sec:usgsRT} -%------------------------------------------------------------ -Any data collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values.} Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{readNWISuv}. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. - -<>= - -parameterCd <- "00060" # Discharge -startDate <- "2012-05-12" -endDate <- "2012-05-13" -dischargeUnit <- readNWISuv(siteNumber, parameterCd, - startDate, endDate) -dischargeUnit <- renameNWISColumns(dischargeUnit) -@ - -The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the startDate through the endDate (starting and ending with midnight locally-collected time). The dateTime column is converted to \enquote{UTC} (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm). - -To override the UTC timezone, specify a valid timezone in the tz argument. Default is \texttt{""}, which will keep the dateTime column in UTC. Other valid timezones are: - -\begin{verbatim} -America/New_York -America/Chicago -America/Denver -America/Los_Angeles -America/Anchorage -America/Honolulu -America/Jamaica -America/Managua -America/Phoenix -America/Metlakatla -\end{verbatim} - -Data are retrieved from \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. - -\newpage - - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Water Quality Data} -\label{sec:usgsWQP} -%------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, and endDate. Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument \texttt{"}reshape\texttt{"}, that converts the expanded dataset to a \texttt{"}wide\texttt{"} format (each requested parameter code gets individual columns). The defaults are expanded=TRUE, and reshape=FALSE. - -<>= - -# Dissolved Nitrate parameter codes: -parameterCd <- c("00618","71851") -startDate <- "1985-10-01" -endDate <- "2012-09-30" - -dfLong <- readNWISqw(siteNumber, parameterCd, - startDate, endDate) - -# Or the wide return: -# dfWide <- readNWISqw(siteNumber, parameterCd, -# startDate, endDate, reshape=TRUE) - -@ - -Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. Additional metadata, such as information about the column names can be found by using the \texttt{comment} function, also described in section \ref{sec:metadata}. - -<>= - -comment(dfLong) - -@ - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Groundwater Level Data} -\label{sec:gwl} -%------------------------------------------------------------ -Groundwater level measurements can be obtained with the \texttt{readNWISgwl} function. Information on the returned data can be found with the \texttt{comment} function, and attached attributes as described in section \ref{sec:metadata}. - -<>= -siteNumber <- "434400121275801" -groundWater <- readNWISgwl(siteNumber) -@ - -%------------------------------------------------------------ -\subsection{Peak Flow Data} -\label{sec:peak} -%------------------------------------------------------------ - -Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the \texttt{readNWISpeak} function. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -<>= -siteNumber <- '01594440' -peakData <- readNWISpeak(siteNumber) - -@ - - -%------------------------------------------------------------ -\subsection{Rating Curve Data} -\label{sec:rating} -%------------------------------------------------------------ -Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -Rating curves can be obtained with the \texttt{readNWISrating} function. - -<>= -ratingData <- readNWISrating(siteNumber, "base") -attr(ratingData, "RATING") - -@ - - - -%------------------------------------------------------------ -\subsection{Surface-Water Measurement Data} -\label{sec:meas} -%------------------------------------------------------------ -These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -Surface-water measurement data can be obtained with the \texttt{readNWISmeas} function. - -<>= -surfaceData <- readNWISmeas(siteNumber) - -@ - - -%------------------------------------------------------------ -\section{Water Quality Portal Web Retrievals} -\label{sec:usgsSTORET} -%------------------------------------------------------------ -There are additional water quality data sets available from the Water Quality Data Portal (\url{https://www.waterqualitydata.us/}). These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future. Because only USGS uses parameter codes, a \texttt{"}characteristic name\texttt{"} must be supplied. The \texttt{readWQPqw} function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. - - -<>= -specificCond <- readWQPqw('WIDNR_WQX-10032762', - 'Specific conductance','2011-05-01','2011-09-30') -@ - -A tool for finding NWIS characteristic names can be found at: - -\url{https://www.waterqualitydata.us/public_srsnames/} - -\FloatBarrier - -%------------------------------------------------------------ -\section{Generalized Retrievals} -\label{sec:general} -%------------------------------------------------------------ -The previous examples all took specific input arguments: siteNumber, parameterCd (or characteristic name), startDate, endDate, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. - -%------------------------------------------------------------ -\subsubsection{NWIS Sites} -\label{sec:NWISGenSite} -%------------------------------------------------------------ -The function \texttt{whatNWISsites} can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the \texttt{"..."} argument, which allows the user to use any arbitrary input argument. We can then use the service here: - -\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html} - -to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: - -\url{https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv} - -The following dataRetrieval code can be used to get those sites: - -<>= -sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), - parameterCd=c("00010","00060"), - hasDataTypeCd="dv") -@ - - -%------------------------------------------------------------ -\subsubsection{NWIS Data} -\label{sec:NWISGenData} -%------------------------------------------------------------ -For NWIS data, the function \texttt{readNWISdata} can be used. The argument listed in the R help file is \texttt{"..."} and \texttt{"}service\texttt{"} (only for data requests). Table \ref{tab:NWISGeneral} describes the services are available. - -\begin{table}[!ht] -\begin{minipage}{\linewidth} -{\footnotesize -\caption{NWIS general data calls} -\label{tab:NWISGeneral} -\begin{tabular}{lll} - \hline -\multicolumn{1}{c}{\textbf{\textsf{Service}}} & -\multicolumn{1}{c}{\textbf{\textsf{Description}}} & -\multicolumn{1}{c}{\textbf{\textsf{Reference URL}}} \\ [0pt] - \hline - daily values & dv & \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}\\ - [5pt]instantaneous & iv & \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}\\ - [5pt]groundwater levels & gwlevels & \url{https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html}\\ - [5pt]water quality & qwdata & \url{https://nwis.waterdata.usgs.gov/nwis/qwdata}\\ - \hline -\end{tabular} -} -\end{minipage} -\end{table} - -The \texttt{"..."} argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi$^2$, and the most recent daily dischage data. That request would be done as follows: - -<>= -dischargeWI <- readNWISdata(service="dv", - stateCd="WI", - parameterCd="00060", - drainAreaMin="50", - statCd="00003") - -siteInfo <- attr(dischargeWI, "siteInfo") - -@ - -%------------------------------------------------------------ -\subsubsection{WQP Sites} -\label{sec:WQPGenSite} -%------------------------------------------------------------ - -Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found here: - -\url{https://www.waterqualitydata.us/webservices_documentation} - -To discover available sites in the WQP in New Jersey that have measured Chloride, use the function \texttt{whatWQPsites}. - -<>= - -sitesNJ <- whatWQPsites(statecode="US:34", - characteristicName="Chloride") - -@ - - -%------------------------------------------------------------ -\subsubsection{WQP Data} -\label{sec:WQPGenData} -%------------------------------------------------------------ -Finally, to get data from the WQP using generalized Web service calls, use the function \texttt{readWQPdata}. For example, to get all the pH data in Wisconsin: - -<>= - -dataPH <- readWQPdata(statecode="US:55", - characteristicName="pH") - -@ - - - -\FloatBarrier - -\clearpage - -%------------------------------------------------------------ -\section{Embedded Metadata} -\label{sec:metadata} -%------------------------------------------------------------ -All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a \texttt{url} (returning a character of the url used to obtain the data), \texttt{siteInfo} (returning a data frame with information on sites), and \texttt{queryTime} (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows: - -<>= - -attr(dischargeWI, "url") - -attr(dischargeWI, "queryTime") - -siteInfo <- attr(dischargeWI, "siteInfo") - -@ - -Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes: - -<>= - -names(attributes(dischargeWI)) - -@ - -For data obtained from \texttt{readNWISuv}, \texttt{readNWISdv}, \texttt{readNWISgwl} there are two attributes that are particularly useful: \texttt{siteInfo} and \texttt{variableInfo}. - -<>= - -siteInfo <- attr(dischargeWI, "siteInfo") - -variableInfo <- attr(dischargeWI, "variableInfo") - - -@ - -Data obtained from \texttt{readNWISpeak}, \texttt{readNWISmeas}, and \texttt{readNWISrating}, the \texttt{comment} attribute is useful. - -<>= -comment(peakData) - -#Which is equivalent to: -# attr(peakData, "comment") -@ - - - -%------------------------------------------------------------ -\section{Getting Started in R} -\label{sec:appendix1} -%------------------------------------------------------------ -This section describes the options for downloading and installing the dataRetrieval package. - -%------------------------------------------------------------ -\subsection{New to R?} -%------------------------------------------------------------ -If you are new to R, you will need to first install the latest version of R, which can be found here: \url{http://www.r-project.org/}. - -At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to Figure \ref{fig:help}. To see the raw code for a particular code, type the name of the function, without parentheses. - - -<>= -?readNWISpCode -@ - -\FloatBarrier - - -\begin{figure}[ht!] -\centering - \resizebox{0.95\textwidth}{!}{\includegraphics{Rhelp.png}} -\caption{A simple R help file} -\label{fig:help} -\end{figure} - -Additionally, many R packages have vignette files attached (such as this paper). To view the vignette: -<>= -vignette(dataRetrieval) -@ - -\FloatBarrier -\clearpage -%------------------------------------------------------------ -\subsection{R User: Installing dataRetrieval} -%------------------------------------------------------------ -The following command installs dataRetrieval and subsequent required packages: - -<>= -install.packages("dataRetrieval") -@ - -After installing the package, you need to open the library each time you re-start R. This is done with the simple command: -<>= -library(dataRetrieval) -@ - - -%------------------------------------------------------------ -\section{Creating Tables in Microsoft\textregistered\ Software from R} -\label{app:createWordTable} -%------------------------------------------------------------ -There are a few steps that are required in order to create a table in Microsoft\textregistered\ software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData: - -<>= -availableData <- whatNWISdata(siteNumber, "dv") -dailyData <- availableData["00003" == availableData$stat_cd,] - -tableData <- with(dailyData, - data.frame( - shortName=srsname, - Start=begin_date, - End=end_date, - Count=count_nu, - Units=parameter_units) - ) -@ - -First, save the data frame as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements): - - -<>= -write.table(tableData, file="tableData.tsv",sep="\t", - row.names = FALSE,quote=FALSE) -@ - -This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following: - -\begin{verbatim} -shortName Start End Count Units -Temperature, water 2010-10-01 2012-06-24 575 deg C -Stream flow, mean. daily 1948-01-01 2013-03-13 23814 ft3/s -Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C -Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l -Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day -\end{verbatim} - -Next, follow the steps below to open this file in Excel: -\begin{enumerate} -\item Open Excel -\item Click on the File tab -\item Click on the Open option -\item Navigate to the working directory (as shown in the results of \texttt{getwd()}) -\item Next to the File name text box, change the dropdown type to All Files (*.*) -\item Double click tableData.tsv -\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. -\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. -\item Use the many formatting tools within Excel to customize the table -\end{enumerate} - -From Excel, it is simple to copy and paste the tables in other Microsoft\textregistered\ software. An example using one of the default Excel table formats is here. Additional formatting could be requried in Excel, for example converting u to $\mu$. - -\begin{figure}[ht!] -\centering - \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} -\caption{A simple table produced in Microsoft\textregistered\ Excel.} -\label{overflow} -\end{figure} - -\clearpage - -%------------------------------------- -\section{Disclaimer} -%------------------------------------ -This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information. - - -\end{document} diff --git a/inst/doc/dataRetrieval.html b/inst/doc/dataRetrieval.html new file mode 100644 index 00000000..f8237ee1 --- /dev/null +++ b/inst/doc/dataRetrieval.html @@ -0,0 +1,903 @@ + + + + + + + + + + + + + + + +Introduction to the dataRetrieval package + + + + + + + + + + + + + + + + + +

Introduction to the dataRetrieval package

+

Laura A. DeCicco and Robert M. Hirsch

+

19 May, 2017

+ + +
+ +
+ +

The dataRetrieval package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS).

+

For information on getting started in R and installing the package, see Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government.

+

A quick workflow for USGS dataRetrieval functions:

+
library(dataRetrieval)
+# Choptank River near Greensboro, MD
+siteNumber <- "01491000"
+ChoptankInfo <- readNWISsite(siteNumber)
+parameterCd <- "00060"
+
+# Raw daily data:
+rawDailyData <- readNWISdv(siteNumber, parameterCd, "1980-01-01", 
+    "2010-01-01")
+
+# Sample data Nitrate:
+parameterCd <- "00618"
+qwData <- readNWISqw(siteNumber, parameterCd, "1980-01-01", "2010-01-01")
+
+pCode <- readNWISpCode(parameterCd)
+

USGS data are made available through the National Water Information System (NWIS).

+

Table 1 describes the functions available in the dataRetrieval package.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Table 1: dataRetrieval functions
Function.NameArgumentsDescription
readNWISdata...NWIS data using user-specified queries
service
readNWISdvsiteNumberNWIS daily data
parameterCd
startDate
endDate
statCd
readNWISqwsiteNumberNWIS water quality data
parameterCd
startDate
endDate
expanded
readNWISuvsiteNumberNWIS instantaneous value data
parameterCd
startDate
endDate
readNWISratingsiteNumberNWIS rating table for active streamgage
type
readNWISmeassiteNumberNWIS surface-water measurements
startDate
endDate
readNWISpeaksiteNumberNWIS peak flow data
startDate
endDate
readNWISgwlsiteNumberNWIS groundwater level measurements
startDate
endDate
readNWISusestateCdNWIS water use
countyCd
years
categories
readNWISstatsiteNumbersNWIS statistical service
parameterCd
startDate
endDate
statReportType
statType
readNWISpCodeparameterCdNWIS parameter code information
readNWISsitesiteNumberNWIS site information
whatNWISsites...NWIS site search using user-specified queries
whatNWISdatasiteNumberNWIS data availability, including period of record and count
service
readWQPdata...WQP data using user-specified queries
readWQPqwsiteNumberWQP data
parameterCd
startDate
endDate
whatWQPsites...WQP site search using user-specified queries
+
+

1 USGS Web Retrievals

+

In this section, examples of National Water Information System (NWIS) retrievals show how to get raw data into R. This data includes site information, measured parameter information, historical daily values, unit values (which include real-time data but can also include other sensor data stored at regular time intervals), water quality data, groundwater level data, peak flow data, rating curve data, surface-water measurement data, water use data, and statistics data. The section Embedded Metadata shows instructions for getting metadata that is attached to each returned data frame.

+

The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the dataRetrieval package as siteNumber). Often (but not always), these ID’s are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this siteNumber. There are many ways to do this, one is the National Water Information System: Mapper.

+

Once the siteNumber is known, the next required input for USGS data retrievals is the “parameter code”. This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents “Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen”, with units of “mg/l as N”.

+

Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table 2.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Table 2: Common USGS Parameter Codes
pCodeshortName
00060Discharge [ft3/s]
00065Gage height [ft]
00010Temperature [C]
00045Precipitation [in]
00400pH
+

Two output columns that may not be obvious are “srsname” and “casrn”. Srsname stands for “Substance Registry Services”. More information on the srs name can be found here.

+

Casrn stands for “Chemical Abstracts Service (CAS) Registry Number”. More information on CAS can be found here.

+

For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and siteNumber is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code.

+

Some common codes are shown in Table 3.

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+Table 3: Commonly used USGS Stat Codes
StatCodeshortName
00001Maximum
00002Minimum
00003Mean
00008Median
+

Examples for using these site numbers, parameter codes, and statistic codes will be presented in subsequent sections.

+

There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as “Ice”. Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns), unless the user sets an argument convertType to FALSE. In that case, the data is returned as a data frame that is entirely character columns.

+
+

1.1 Site Information

+
+

1.1.1 readNWISsite

+

Use the readNWISsite function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. readNWISsite can also access information about multiple sites with a vector input.

+
siteNumbers <- c("01491000", "01645000")
+siteINFO <- readNWISsite(siteNumbers)
+

Site information is obtained from: https://waterservices.usgs.gov/rest/Site-Test-Tool.html

+

Information on the returned data can be found with the comment function as described in the Metadata section.

+
comment(siteINFO)
+
+
+

1.1.2 whatNWISdata

+

To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the whatNWISdata function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: “dv” (daily values), “uv”, or “iv” (unit values), “qw” (water-quality), “sv” (sites visits), “pk” (peak measurements), “gw” (groundwater levels), “ad” (sites included in USGS Annual Water Data Reports External Link), “aw” (sites monitored by the USGS Active Groundwater Level Network External Link), and “id” (historical instantaneous values).

+

In the following example, we limit the retrieved data to only daily data. The default for “service” is all, which returns all of the available data for that site. Likewise, there are arguments for parameter code (parameterCd) and statistic code (statCd) to filter the results. The default for both is to return all possible values (all). The returned count_nu for “uv” data is the count of days with returned data, not the actual count of returned values.

+
# Continuing from the previous example: This pulls out just
+# the daily, mean data:
+
+dailyDataAvailable <- whatNWISdata(siteNumbers, service = "dv", 
+    statCd = "00003")
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]
siteNumbersrsnamestartDateendDatecountunits
01491000Temperature, water2010-10-012012-05-09529deg C
01491000Stream flow, mean daily1948-01-012017-05-1725340ft3/s
01645000Stream flow, mean daily1930-09-262017-05-1731646ft3/s
01491000Specific conductance2010-10-012012-05-09527uS/cm @25C
01491000Suspended sediment concentration (SSC)1980-10-011991-09-304017mg/l
01491000Suspended sediment discharge1980-10-011991-09-304017tons/day
+

See Creating Tables for instructions on converting an R data frame to a table in Microsoft® software Excel or Word to display a data availability table similar to Table 4. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries.

+
+
+
+

1.2 Parameter Information

+

To obtain all of the available information concerning a measured parameter (or multiple parameters), use the readNWISpCode function:

+
# Using defaults:
+parameterCd <- "00618"
+parameterINFO <- readNWISpCode(parameterCd)
+
+
+

1.3 Daily Data

+

To obtain daily records of USGS data, use the readNWISdv function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to “00003”). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from https://waterservices.usgs.gov/rest/DV-Test-Tool.html.

+

The dates (start and end) must be in the format “YYYY-MM-DD” (note: the user must include the quotes). Setting the start date to “” (no space) will prompt the program to ask for the earliest date, and setting the end date to “” (no space) will prompt for the latest available date.

+
# Choptank River near Greensboro, MD:
+siteNumber <- "01491000"
+parameterCd <- "00060"  # Discharge
+startDate <- "2009-10-01"
+endDate <- "2012-09-30"
+
+discharge <- readNWISdv(siteNumber, parameterCd, startDate, endDate)
+

The column “datetime” in the returned data frame is automatically imported as a variable of class “Date” in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often “A” (approved for publication) or “P” (provisional data subject to revision).

+

Another example would be a request for mean and maximum daily temperature and discharge in early 2012:

+
siteNumber <- "01491000"
+parameterCd <- c("00010", "00060")  # Temperature and discharge
+statCd <- c("00001", "00003")  # Mean and maximum
+startDate <- "2012-01-01"
+endDate <- "2012-05-01"
+
+temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, startDate, 
+    endDate, statCd = statCd)
+

The column names can be shortened and simplified using the renameNWISColumns function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discussed further in the metadata section.

+
names(temperatureAndFlow)
+
## [1] "agency_cd"        "site_no"          "Date"            
+## [4] "X_00010_00001_cd" "X_00010_00001"    "X_00010_00003_cd"
+## [7] "X_00010_00003"    "X_00060_00003_cd" "X_00060_00003"
+
temperatureAndFlow <- renameNWISColumns(temperatureAndFlow)
+names(temperatureAndFlow)
+
## [1] "agency_cd"    "site_no"      "Date"        
+## [4] "Wtemp_Max_cd" "Wtemp_Max"    "Wtemp_cd"    
+## [7] "Wtemp"        "Flow_cd"      "Flow"
+
# Information about the data frame attributes:
+names(attributes(temperatureAndFlow))
+
## [1] "names"         "row.names"     "url"          
+## [4] "siteInfo"      "variableInfo"  "disclaimer"   
+## [7] "statisticInfo" "queryTime"     "class"
+
statInfo <- attr(temperatureAndFlow, "statisticInfo")
+variableInfo <- attr(temperatureAndFlow, "variableInfo")
+siteInfo <- attr(temperatureAndFlow, "siteInfo")
+

An example of plotting the above data:

+
variableInfo <- attr(temperatureAndFlow, "variableInfo")
+siteInfo <- attr(temperatureAndFlow, "siteInfo")
+
+par(mar = c(5, 5, 5, 5))  #sets the size of the plot window
+
+plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, ylab = variableInfo$parameter_desc[1], 
+    xlab = "")
+par(new = TRUE)
+plot(temperatureAndFlow$Date, temperatureAndFlow$Flow, col = "red", 
+    type = "l", xaxt = "n", yaxt = "n", xlab = "", ylab = "", 
+    axes = FALSE)
+axis(4, col = "red", col.axis = "red")
+mtext(variableInfo$parameter_desc[2], side = 4, line = 3, col = "red")
+title(paste(siteInfo$station_nm, "2012"))
+legend("topleft", variableInfo$param_units, col = c("black", 
+    "red"), lty = c(NA, 1), pch = c(1, NA))
+

+
+
+

1.4 Unit Data

+

Any data collected at regular time intervals (such as 15-minute or hourly) are known as “unit values”. Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function readNWISuv. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data.

+
parameterCd <- "00060"  # Discharge
+startDate <- "2012-05-12"
+endDate <- "2012-05-13"
+dischargeUnit <- readNWISuv(siteNumber, parameterCd, startDate, 
+    endDate)
+dischargeUnit <- renameNWISColumns(dischargeUnit)
+

The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the startDate through the endDate (starting and ending with midnight locally-collected time). The dateTime column is converted to UTC (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm).

+

To override the UTC timezone, specify a valid timezone in the tz argument. Default is “”, which will keep the dateTime column in UTC. Other valid timezones are:

+
America/New_York
+America/Chicago
+America/Denver
+America/Los_Angeles
+America/Anchorage
+America/Honolulu
+America/Jamaica
+America/Managua
+America/Phoenix
+America/Metlakatla
+

Data are retrieved from https://waterservices.usgs.gov/rest/IV-Test-Tool.html. There are occasions where NWIS values are not reported as numbers, instead a common example is “Ice”. Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in metadata section.

+
+
+

1.5 Water Quality Data

+

To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function readNWISqw, with the input arguments: siteNumber, parameterCd, startDate, and endDate. Additionally, the argument expanded is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument “reshape”, that converts the expanded dataset to a “wide” format (each requested parameter code gets individual columns). The defaults are expanded=TRUE, and reshape=FALSE.

+
# Dissolved Nitrate parameter codes:
+parameterCd <- c("00618", "71851")
+startDate <- "1985-10-01"
+endDate <- "2012-09-30"
+
+dfLong <- readNWISqw(siteNumber, parameterCd, startDate, endDate)
+
+# Or the wide return:
+dfWide <- readNWISqw(siteNumber, parameterCd, startDate, endDate, 
+    reshape = TRUE)
+

Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in the metadata section. Additional metadata, such as information about the column names can be found by using the comment function, also described in the metadata section.

+
comment(dfLong)
+
+
+

1.6 Groundwater Level Data

+

Groundwater level measurements can be obtained with the readNWISgwl function. Information on the returned data can be found with the comment function, and attached attributes as described in the metadata section.

+
siteNumber <- "434400121275801"
+groundWater <- readNWISgwl(siteNumber)
+
+
+

1.7 Peak Flow Data

+

Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the readNWISpeak function. Information on the returned data can be found with the comment function and attached attributes as described in the metadata section.

+
siteNumber <- "01594440"
+peakData <- readNWISpeak(siteNumber)
+
+
+

1.8 Rating Curve Data

+

Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the comment function and attached attributes as described in the metadata section.

+

Rating curves can be obtained with the readNWISrating function.

+
ratingData <- readNWISrating(siteNumber, "base")
+attr(ratingData, "RATING")
+
+
+

1.9 Surface-Water Measurement Data

+

These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the comment function and attached attributes as described in the metadata section.

+

Surface-water measurement data can be obtained with the readNWISmeas function.

+
surfaceData <- readNWISmeas(siteNumber)
+
+
+

1.10 Water Use Data

+

Retrieves water use data from USGS Water Use Data for the Nation. See https://waterdata.usgs.gov/nwis/wu for more information. All available use categories for the supplied arguments are retrieved.

+
allegheny <- readNWISuse(stateCd = "Pennsylvania", countyCd = "Allegheny")
+
+
+national <- readNWISuse(stateCd = NULL, countyCd = NULL, transform = TRUE)
+
+
+

1.11 Statistics Data

+

Retrieves site statistics from the USGS Statistics Web Service beta.

+
discharge_stats <- readNWISstat(siteNumbers = c("02319394"), 
+    parameterCd = c("00060"), statReportType = "annual")
+
+
+
+

2 Water Quality Portal Web Retrievals

+

There are additional water quality data sets available from the Water Quality Data Portal. These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future. Because only USGS uses parameter codes, a “characteristic name” must be supplied. The readWQPqw function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin.

+
specificCond <- readWQPqw("WIDNR_WQX-10032762", "Specific conductance", 
+    "2011-05-01", "2011-09-30")
+

A tool for finding NWIS characteristic names can be found here

+
+
+

3 Generalized Retrievals

+

The previous examples all took specific input arguments: siteNumber, parameterCd (or characteristic name), startDate, endDate, etc. However, the Web services that supply the data can accept a wide variety of additional arguments.

+
+

3.1 NWIS

+
+

3.1.1 Sites: whatNWISsites

+

The function whatNWISsites can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the ... argument, which allows the user to use any arbitrary input argument. We can then use the service here to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is:

+

https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5&parameterCd=00010,00060&hasDataTypeCd=dv

+

The following dataRetrieval code can be used to get those sites:

+
sites <- whatNWISsites(bBox = c(-83, 36.5, -81, 38.5), parameterCd = c("00010", 
+    "00060"), hasDataTypeCd = "dv")
+
+
+

3.1.2 Data: readNWISdata

+

For NWIS data, the function readNWISdata can be used. The argument listed in the R help file is ... and service (only for data requests). Table 5 describes the services are available.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Table 5: NWIS general data calls
ServiceDescriptionURL
dvDailyhttps://waterservices.usgs.gov/rest/DV-Test-Tool.html
ivInstantaneoushttps://waterservices.usgs.gov/rest/IV-Test-Tool.html
gwlevelsGroundwater Levelshttps://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html
qwdataWater Qualityhttps://nwis.waterdata.usgs.gov/nwis/qwdata
measurementsSurface Water Measurementshttps://waterdata.usgs.gov/nwis/measurements/
peakPeak Flowhttps://nwis.waterdata.usgs.gov/usa/nwis/peak/
statStatistics Servicehttps://waterservices.usgs.gov/rest/Statistics-Service-Test-Tool.html
+

The ... argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi2, and the most recent daily discharge data. That request would be done as follows:

+
dischargeWI <- readNWISdata(service = "dv", stateCd = "WI", parameterCd = "00060", 
+    drainAreaMin = "50", statCd = "00003")
+
+siteInfo <- attr(dischargeWI, "siteInfo")
+
+
+
+

3.2 WQP

+

Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found here.

+
+

3.2.1 Sites: whatWQPsites

+

To discover available sites in the WQP in New Jersey that have measured Chloride, use the function whatWQPsites.

+
sitesNJ <- whatWQPsites(statecode = "US:34", characteristicName = "Chloride")
+
+
+

3.2.2 Data: readWQPdata

+

To get data from the WQP using generalized Web service calls, use the function readWQPdata. For example, to get all the pH data in Wisconsin:

+
dataPH <- readWQPdata(statecode = "US:55", characteristicName = "pH")
+
+
+

3.2.3 Availability: whatWQPdata

+

The function whatWQPdata returns a data frame with information on the amount of data collected at a site. For example:

+
type <- "Stream"
+sites <- whatWQPdata(countycode = "US:55:025", siteType = type)
+

This returns a data frame with all of the sites that were measured in streams in Dane County, WI. Also, in that table, there is a measure of activityCount (how often the site was sampled), and resultCount (how many individual results are available).

+
+
+

3.2.4 Samples: whatWQPsamples

+

The function whatWQPsamples returns information on the individual samples collected at a site. For example:

+
site <- whatWQPsamples(siteid = "USGS-01594440")
+

This returns one row for each instance that a sample was collect.

+
+
+

3.2.5 Metrics: whatWQPmetrics

+

The function whatWQPmetrics provides metric information. This is only currently available for STORET data:

+
type <- "Stream"
+sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type)
+
+
+
+
+

4 Embedded Metadata

+

All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a url (returning a character of the url used to obtain the data), siteInfo (returning a data frame with information on sites), and queryTime (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows:

+
attr(dischargeWI, "url")
+
+attr(dischargeWI, "queryTime")
+
+siteInfo <- attr(dischargeWI, "siteInfo")
+

Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes:

+
names(attributes(dischargeWI))
+

For data obtained from readNWISuv, readNWISdv, readNWISgwl there are two attributes that are particularly useful: siteInfo and variableInfo.

+
siteInfo <- attr(dischargeWI, "siteInfo")
+
+variableInfo <- attr(dischargeWI, "variableInfo")
+

Data obtained from readNWISpeak, readNWISmeas, and readNWISrating, the comment attribute is useful.

+
comment(peakData)
+
+# Which is equivalent to:
+attr(peakData, "comment")
+
+
+

5 Getting Started in R

+

This section describes the options for downloading and installing the dataRetrieval package.

+
+

5.1 New to R?

+

If you are new to R, you will need to first install the latest version of R, which can be found [here] (www.R-project.org).

+

At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to the image below. To see the raw code for a particular code, type the name of the function, without parentheses.

+
?readNWISpCode
+
+A simple R help file +

A simple R help file

+
+

Additionally, many R packages have vignette files attached (such as this paper). To view the vignette:

+
vignette(dataRetrieval)
+
+
+

5.2 R User: Installing dataRetrieval

+

The following command installs dataRetrieval and subsequent required packages:

+
install.packages("dataRetrieval")
+

After installing the package, you need to open the library each time you re-start R. This is done with the simple command:

+
library(dataRetrieval)
+
+
+
+

6 Creating Tables in Microsoft® Software from R

+

There are a few steps that are required in order to create a table in Microsoft® software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData:

+
availableData <- whatNWISdata(siteNumber, "dv")
+dailyData <- availableData["00003" == availableData$stat_cd, 
+    ]
+
+tableData <- with(dailyData, data.frame(shortName = srsname, 
+    Start = begin_date, End = end_date, Count = count_nu, Units = parameter_units))
+

First, save the data frame as a tab delimited file (you don’t want to use comma delimited because there are commas in some of the data elements):

+
write.table(tableData, file = "tableData.tsv", sep = "\t", row.names = FALSE, 
+    quote = FALSE)
+

This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following:

+
shortName  Start  End   Count   Units
+Temperature, water  2010-10-01  2012-06-24  575 deg C
+Stream flow, mean. daily    1948-01-01  2013-03-13  23814   ft3/s
+Specific conductance    2010-10-01  2012-06-24  551 uS/cm 25C
+Suspended sediment concentration (SSC)  1980-10-01  1991-09-30  3651    mg/l
+Suspended sediment discharge    1980-10-01  1991-09-30  3652    tons/day
+

Next, follow the steps below to open this file in Excel:

+
    +
  1. Open Excel
  2. +
  3. Click on the File tab
  4. +
  5. Click on the Open option
  6. +
  7. Navigate to the working directory (as shown in the results of getwd())
  8. +
  9. Next to the File name text box, change the drop down type to All Files (.)
  10. +
  11. Double click tableData.tsv
  12. +
  13. A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next.
  14. +
  15. In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished.
  16. +
  17. Use the many formatting tools within Excel to customize the table
  18. +
+

From Excel, it is simple to copy and paste the tables in other Microsoft® software. An example using one of the default Excel table formats is here. Additional formatting could be required in Excel, for example converting u to \(\mu\).

+
+A simple table produced in Microsoft ® Excel. +

A simple table produced in Microsoft ® Excel.

+
+
+
+

7 Disclaimer

+

This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information.

+
+ + + + + + + + diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf deleted file mode 100644 index 32dac5d7..00000000 Binary files a/inst/doc/dataRetrieval.pdf and /dev/null differ diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index c0c2e4ba..964bd283 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -312,6 +312,7 @@ test_that("NGWMN functions working", { context("getWebServiceData") test_that("long urls use POST", { + testthat::skip_on_cran() url <- paste0(rep("reallylongurl", 200), collapse = '') with_mock( RETRY = function(method, ...) { @@ -324,7 +325,9 @@ test_that("long urls use POST", { .env = "httr" ) }) + test_that("ngwmn urls don't use post", { + testthat::skip_on_cran() url <- paste0(rep("urlwithngwmn", 200), collapse = '') with_mock( RETRY = function(method, ...) { diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 0422d1b7..9871165e 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -202,6 +202,8 @@ test_that("readNWISuse tests", { context("state tests") test_that("state county tests",{ + testthat::skip_on_cran() + fullName <- stateCdLookup("wi", "fullName") expect_equal(fullName, "Wisconsin") @@ -235,6 +237,7 @@ df_test <- data.frame(site_no = as.character(1:13), result_va = 1:13, stringsAsFactors = FALSE) test_that("addWaterYear works with Date, POSIXct, character, but breaks with numeric", { + testthat::skip_on_cran() library(dplyr) df_date <- df_test @@ -254,7 +257,7 @@ test_that("addWaterYear works with Date, POSIXct, character, but breaks with num }) test_that("addWaterYear works for each column name", { - + testthat::skip_on_cran() nwisqw_style <- df_test nwisqw_style_wy <- addWaterYear(nwisqw_style) expect_equal(ncol(nwisqw_style_wy), ncol(nwisqw_style) + 1) @@ -277,6 +280,7 @@ test_that("addWaterYear works for each column name", { }) test_that("addWaterYear correctly calculates the WY and is numeric", { + testthat::skip_on_cran() df_test_wy <- addWaterYear(df_test) expect_is(df_test_wy[['waterYear']], "numeric") expect_true(all(df_test_wy[['waterYear']][1:9] == 2010)) @@ -284,18 +288,21 @@ test_that("addWaterYear correctly calculates the WY and is numeric", { }) test_that("addWaterYear adds column next to dateTime", { + testthat::skip_on_cran() df_test_wy <- addWaterYear(df_test) dateTime_col <- which(names(df_test_wy) == "dateTime") expect_equal(names(df_test_wy)[dateTime_col + 1], "waterYear") }) test_that("addWaterYear can be used with pipes", { + testthat::skip_on_cran() library(dplyr) df_test_wy <- df_test %>% addWaterYear() expect_equal(ncol(df_test_wy), ncol(df_test) + 1) }) test_that("addWaterYear doesn't add another WY column if it exists", { + testthat::skip_on_cran() df_test_wy <- addWaterYear(df_test) expect_equal(ncol(df_test_wy), ncol(df_test) + 1) df_test_wy2 <- addWaterYear(df_test_wy) @@ -303,6 +310,7 @@ test_that("addWaterYear doesn't add another WY column if it exists", { }) test_that("calcWaterYear can handle missing values", { + testthat::skip_on_cran() dateVec <- seq(as.Date("2010-01-01"),as.Date("2011-01-31"), by="months") dateVec[c(3,7,12)] <- NA wyVec <- dataRetrieval:::calcWaterYear(dateVec) @@ -314,6 +322,7 @@ test_that("calcWaterYear can handle missing values", { context("Construct NWIS urls") test_that("Construct NWIS urls", { + testthat::skip_on_cran() siteNumber <- '01594440' startDate <- '1985-01-01' @@ -368,7 +377,17 @@ test_that("Construct NWIS urls", { context("Construct WQP urls") test_that("Construct WQP urls", { + testthat::skip_on_cran() + site_id <- '01594440' + startDate <- '1985-01-01' + endDate <- '' + pCode <- c("00060","00010") + url_wqp <- constructWQPURL(paste("USGS",site_id,sep="-"), + c('01075','00029','00453'), + startDate,endDate) + + expect_equal(url_wqp, "https://www.waterqualitydata.us/Result/search?siteid=USGS-01594440&pCode=01075;00029;00453&startDateLo=01-01-1985&sorted=no&mimeType=tsv") }) context("checkWQPdates") diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd new file mode 100644 index 00000000..1085d715 --- /dev/null +++ b/vignettes/dataRetrieval.Rmd @@ -0,0 +1,762 @@ +--- +title: "Introduction to the dataRetrieval package" +date: "`r format(Sys.time(), '%d %B, %Y')`" +author: Laura A. DeCicco and Robert M. Hirsch +output: + rmarkdown::html_vignette: + toc: true + number_sections: true +vignette: > + %\VignetteEngine{knitr::rmarkdown} + %\VignetteIndexEntry{Introduction to the dataRetrieval package} + \usepackage[utf8]{inputenc} +--- + + +```{r setup, include=FALSE, message=FALSE} +library(knitr) +library(dataRetrieval) + +options(continue=" ") +options(width=60) +knitr::opts_chunk$set(echo = TRUE, + warning = FALSE, + message = FALSE, + fig.height = 7, + fig.width = 7) + +opts_chunk$set(highlight=TRUE, + tidy=TRUE, + keep.space=TRUE, + keep.blank.space=FALSE, + keep.comment=TRUE) +knit_hooks$set(inline = function(x) { + if (is.numeric(x)) round(x, 3)}) +``` + +The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). + +For information on getting started in R and installing the package, see [Getting Started](#getting-started-in-r). Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. + +A quick workflow for USGS `dataRetrieval` functions: + +```{r workflow, echo=TRUE,eval=FALSE} +library(dataRetrieval) +# Choptank River near Greensboro, MD +siteNumber <- "01491000" +ChoptankInfo <- readNWISsite(siteNumber) +parameterCd <- "00060" + +#Raw daily data: +rawDailyData <- readNWISdv(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +# Sample data Nitrate: +parameterCd <- "00618" +qwData <- readNWISqw(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +pCode <- readNWISpCode(parameterCd) + +``` + +USGS data are made available through the National Water Information System (NWIS). + +Table 1 describes the functions available in the `dataRetrieval` package. + +```{r echo=FALSE} + +library(htmlTable) + +Functions <- c("readNWISdata","", + "readNWISdv","","","","", + "readNWISqw","","","","", + "readNWISuv","","","", + "readNWISrating","", + "readNWISmeas","","", + "readNWISpeak","","", + "readNWISgwl","","", + "readNWISuse","","","", + "readNWISstat","","","","","", + "readNWISpCode", + "readNWISsite", + "whatNWISsites", + "whatNWISdata","", + "readWQPdata", + "readWQPqw","","","", + "whatWQPsites") +Arguments <- c("...","service", #readNWISdata + "siteNumber","parameterCd","startDate","endDate","statCd", #readNWISdv + "siteNumber","parameterCd","startDate","endDate","expanded", #readNWISqw + "siteNumber","parameterCd","startDate","endDate", #readNWISuv + "siteNumber","type", #readNWISrating + "siteNumber","startDate","endDate", #readNWISmeas + "siteNumber","startDate","endDate", #readNWISpeak + "siteNumber","startDate","endDate", #readNWISgwl + "stateCd","countyCd","years","categories", #readNWISuse + "siteNumbers","parameterCd","startDate","endDate","statReportType","statType", #readNWISstat + "parameterCd", #readNWISpCode + "siteNumber", #readNWISsite + "...", #whatNWISsites + "siteNumber","service", #whatNWISdata + "...", #readWQPdata + "siteNumber","parameterCd","startDate","endDate", #readWQPqw + "...") #whatWQPsites +Description <- c("NWIS data using user-specified queries","", #readNWISdata + "NWIS daily data","","","","", #readNWISdv + "NWIS water quality data","","","","", #readNWISqw + "NWIS instantaneous value data","","","", #readNWISuv + "NWIS rating table for active streamgage","", #readNWISrating + "NWIS surface-water measurements","","", #readNWISmeas + "NWIS peak flow data","","", #readNWISpeak + "NWIS groundwater level measurements","","", #readNWISgwl + "NWIS water use","","","", #readNWISuse + "NWIS statistical service","","","","","", #readNWISstat + "NWIS parameter code information", #readNWISpCode + "NWIS site information", #readNWISsite + "NWIS site search using user-specified queries", + "NWIS data availability, including period of record and count","", + "WQP data using user-specified queries", + "WQP data","","","", + "WQP site search using user-specified queries") + +data.df <- data.frame(`Function Name` = Functions, Arguments, Description, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 1: dataRetrieval functions", + rnames=FALSE, align=c("l","l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + + +# USGS Web Retrievals + +In this section, examples of National Water Information System (NWIS) retrievals show how to get raw data into R. This data includes [site information](#site-information), measured [parameter information](#parameter-information), historical [daily values](#daily-data), [unit values](#unit-data) (which include real-time data but can also include other sensor data stored at regular time intervals), [water quality data](#water-quality-data), [groundwater level data](#groundwater-level-data), [peak flow data](#peak-flow-data), [rating curve data](#rating-curve-data), [surface-water measurement data](#surface-water-measurement-data), [water use data](#water-use-data), and [statistics data](#statistics-data). The section [Embedded Metadata](#embedded-metadata) shows instructions for getting metadata that is attached to each returned data frame. + +The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the `dataRetrieval` package as `siteNumber`). Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this `siteNumber`. There are many ways to do this, one is the [National Water Information System: Mapper](https://maps.waterdata.usgs.gov/mapper/index.html). + +Once the `siteNumber` is known, the next required input for USGS data retrievals is the "parameter code". This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents "Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen", with units of "mg/l as N". + +Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table 2. + +```{r tableParameterCodes, echo=FALSE} + + +pCode <- c('00060', '00065', '00010','00045','00400') +shortName <- c("Discharge [ft3/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") + +data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 2: Common USGS Parameter Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + +``` + +Two output columns that may not be obvious are "srsname" and "casrn". Srsname stands for "Substance Registry Services". More information on the srs name can be found [here](http://ofmpub.epa.gov/sor_internet/registry/substreg/home/overview/home.do). + +Casrn stands for "Chemical Abstracts Service (CAS) Registry Number". More information on CAS can be found [here](http://www.cas.org/content/chemical-substances/faqs). + +For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and `siteNumber` is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code. + +Some common codes are shown in Table 3. + +```{r tableStatCodes, echo=FALSE} +StatCode <- c('00001', '00002', '00003','00008') +shortName <- c("Maximum","Minimum","Mean", "Median") + +data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) + +htmlTable(data.df, + caption="Table 3: Commonly used USGS Stat Codes", + rnames=FALSE, align=c("c","c"), col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + + + +``` + +Examples for using these site numbers, parameter codes, and statistic codes will be presented in subsequent sections. + +There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as "Ice". Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns), unless the user sets an argument `convertType` to `FALSE`. In that case, the data is returned as a data frame that is entirely character columns. + +## Site Information + + +### readNWISsite + +Use the `readNWISsite` function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. `readNWISsite` can also access information about multiple sites with a vector input. + + +```{r getSite, echo=TRUE, eval=FALSE} +siteNumbers <- c("01491000","01645000") +siteINFO <- readNWISsite(siteNumbers) +``` + +Site information is obtained from: +[https://waterservices.usgs.gov/rest/Site-Test-Tool.html](https://waterservices.usgs.gov/rest/Site-Test-Tool.html) + +Information on the returned data can be found with the `comment` function as described in the [Metadata](#embedded-metadata) section. + +```{r siteNames3, echo=TRUE, eval=FALSE} +comment(siteINFO) +``` + + +### whatNWISdata + +To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the `whatNWISdata` function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: "dv" (daily values), "uv", or "iv" (unit values), "qw" (water-quality), "sv" (sites visits), "pk" (peak measurements), "gw" (groundwater levels), "ad" (sites included in USGS Annual Water Data Reports External Link), "aw" (sites monitored by the USGS Active Groundwater Level Network External Link), and "id" (historical instantaneous values). + +In the following example, we limit the retrieved data to only daily data. The default for "service" is `all`, which returns all of the available data for that site. Likewise, there are arguments for parameter code (`parameterCd`) and statistic code (`statCd`) to filter the results. The default for both is to return all possible values (`all`). The returned `count_nu` for "uv" data is the count of days with returned data, not the actual count of returned values. + + +```{r getSiteExtended, echo=TRUE, eval=FALSE} +# Continuing from the previous example: +# This pulls out just the daily, mean data: + +dailyDataAvailable <- whatNWISdata(siteNumbers, + service="dv", statCd="00003") + + +``` + +```{r echo=FALSE} + +tableData <- data.frame( + siteNumber = c("01491000","01491000","01645000","01491000","01491000","01491000"), + srsname = c("Temperature, water","Stream flow, mean daily", + "Stream flow, mean daily", + "Specific conductance", + "Suspended sediment concentration (SSC)", + "Suspended sediment discharge" ), + startDate = c("2010-10-01","1948-01-01","1930-09-26","2010-10-01","1980-10-01","1980-10-01"), + endDate = c("2012-05-09","2017-05-17","2017-05-17","2012-05-09","1991-09-30","1991-09-30"), + count = c("529","25340","31646","527","4017","4017"), + units = c("deg C","ft3/s","ft3/s","uS/cm @25C","mg/l","tons/day"), + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]", + rnames=FALSE, + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + +See [Creating Tables](#creating-tables-in-microsoft-software-from-r) for instructions on converting an R data frame to a table in Microsoft® software Excel or Word to display a data availability table similar to Table 4. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. + +## Parameter Information + +To obtain all of the available information concerning a measured parameter (or multiple parameters), use the `readNWISpCode` function: + +```{r label=getPCodeInfo, echo=TRUE, eval=FALSE} +# Using defaults: +parameterCd <- "00618" +parameterINFO <- readNWISpCode(parameterCd) +``` + + +## Daily Data + +To obtain daily records of USGS data, use the `readNWISdv` function. The arguments for this function are `siteNumber`, `parameterCd`, `startDate`, `endDate`, and `statCd` (defaults to "00003"). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from [https://waterservices.usgs.gov/rest/DV-Test-Tool.html](https://waterservices.usgs.gov/rest/DV-Test-Tool.html). + +The dates (start and end) must be in the format "YYYY-MM-DD" (note: the user must include the quotes). Setting the start date to "" (no space) will prompt the program to ask for the earliest date, and setting the end date to "" (no space) will prompt for the latest available date. + +```{r label=getNWISDaily, echo=TRUE, eval=FALSE} + +# Choptank River near Greensboro, MD: +siteNumber <- "01491000" +parameterCd <- "00060" # Discharge +startDate <- "2009-10-01" +endDate <- "2012-09-30" + +discharge <- readNWISdv(siteNumber, + parameterCd, startDate, endDate) +``` + +The column "datetime" in the returned data frame is automatically imported as a variable of class "Date" in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often "A" (approved for publication) or "P" (provisional data subject to revision). + +Another example would be a request for mean and maximum daily temperature and discharge in early 2012: + +```{r label=getNWIStemperature, echo=TRUE, eval=FALSE} +siteNumber <- "01491000" +parameterCd <- c("00010","00060") # Temperature and discharge +statCd <- c("00001","00003") # Mean and maximum +startDate <- "2012-01-01" +endDate <- "2012-05-01" + +temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, + startDate, endDate, statCd=statCd) + +``` + +```{r label=getNWIStemperature2, echo=FALSE, eval=TRUE} +filePath <- system.file("extdata", package="dataRetrieval") +fileName <- "temperatureAndFlow.RData" +fullPath <- file.path(filePath, fileName) +load(fullPath) + +``` + +The column names can be shortened and simplified using the `renameNWISColumns` function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discussed further in the [metadata](#embedded-metadata) section. + + +```{r label=renameColumns, echo=TRUE} +names(temperatureAndFlow) + +temperatureAndFlow <- renameNWISColumns(temperatureAndFlow) +names(temperatureAndFlow) + +``` + +```{r label=attr1, echo=TRUE} +#Information about the data frame attributes: +names(attributes(temperatureAndFlow)) + +statInfo <- attr(temperatureAndFlow, "statisticInfo") +variableInfo <- attr(temperatureAndFlow, "variableInfo") +siteInfo <- attr(temperatureAndFlow, "siteInfo") + +``` + + + +An example of plotting the above data: + +```{r} +variableInfo <- attr(temperatureAndFlow, "variableInfo") +siteInfo <- attr(temperatureAndFlow, "siteInfo") + +par(mar=c(5,5,5,5)) #sets the size of the plot window + +plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, + ylab=variableInfo$parameter_desc[1],xlab="" ) +par(new=TRUE) +plot(temperatureAndFlow$Date, temperatureAndFlow$Flow, + col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE + ) +axis(4,col="red",col.axis="red") +mtext(variableInfo$parameter_desc[2],side=4,line=3,col="red") +title(paste(siteInfo$station_nm,"2012")) +legend("topleft", variableInfo$param_units, + col=c("black","red"),lty=c(NA,1),pch=c(1,NA)) +``` + + +## Unit Data + +Any data collected at regular time intervals (such as 15-minute or hourly) are known as "unit values". Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function `readNWISuv`. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. + +```{r label=readNWISuv, eval=FALSE} + +parameterCd <- "00060" # Discharge +startDate <- "2012-05-12" +endDate <- "2012-05-13" +dischargeUnit <- readNWISuv(siteNumber, parameterCd, + startDate, endDate) +dischargeUnit <- renameNWISColumns(dischargeUnit) +``` + +The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the `startDate` through the `endDate` (starting and ending with midnight locally-collected time). The dateTime column is converted to UTC (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm). + +To override the UTC timezone, specify a valid timezone in the tz argument. Default is "", which will keep the dateTime column in UTC. Other valid timezones are: + +``` +America/New_York +America/Chicago +America/Denver +America/Los_Angeles +America/Anchorage +America/Honolulu +America/Jamaica +America/Managua +America/Phoenix +America/Metlakatla +``` + +Data are retrieved from [https://waterservices.usgs.gov/rest/IV-Test-Tool.html](https://waterservices.usgs.gov/rest/IV-Test-Tool.html). There are occasions where NWIS values are not reported as numbers, instead a common example is "Ice". Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in [metadata](#embedded-metadata) section. + + +## Water Quality Data + +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function `readNWISqw`, with the input arguments: `siteNumber`, `parameterCd`, `startDate`, and `endDate`. Additionally, the argument `expanded` is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (`expanded=FALSE`), or a more complete and verbose output (`expanded=TRUE`). `expanded = TRUE` includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument "reshape", that converts the expanded dataset to a "wide" format (each requested parameter code gets individual columns). The defaults are `expanded=TRUE`, and `reshape=FALSE`. + +```{r label=getQW, echo=TRUE, eval=FALSE} + +# Dissolved Nitrate parameter codes: +parameterCd <- c("00618","71851") +startDate <- "1985-10-01" +endDate <- "2012-09-30" + +dfLong <- readNWISqw(siteNumber, parameterCd, + startDate, endDate) + +# Or the wide return: +dfWide <- readNWISqw(siteNumber, parameterCd, + startDate, endDate, reshape=TRUE) + +``` + +Site information and measured parameter information is attached to the data frame as attributes. This is discussed further in the [metadata](#embedded-metadata) section. Additional metadata, such as information about the column names can be found by using the `comment` function, also described in the [metadata](#embedded-metadata) section. + +```{r qwmeta, echo=TRUE, eval=FALSE} + +comment(dfLong) + +``` + + +## Groundwater Level Data + +Groundwater level measurements can be obtained with the `readNWISgwl` function. Information on the returned data can be found with the `comment` function, and attached attributes as described in the [metadata](#embedded-metadata) section. + +```{r gwlexample, echo=TRUE, eval=FALSE} +siteNumber <- "434400121275801" +groundWater <- readNWISgwl(siteNumber) +``` + + +## Peak Flow Data + +Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `readNWISpeak` function. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +```{r peakexample, echo=TRUE, eval=FALSE} +siteNumber <- '01594440' +peakData <- readNWISpeak(siteNumber) + +``` + + +## Rating Curve Data + +Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +Rating curves can be obtained with the `readNWISrating` function. + +```{r ratingexample, echo=TRUE, eval=FALSE} +ratingData <- readNWISrating(siteNumber, "base") +attr(ratingData, "RATING") + +``` + + +## Surface-Water Measurement Data + +These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. + +Surface-water measurement data can be obtained with the `readNWISmeas` function. + +```{r surfexample, echo=TRUE, eval=FALSE} +surfaceData <- readNWISmeas(siteNumber) + +``` + + +## Water Use Data +Retrieves water use data from USGS Water Use Data for the Nation. See [https://waterdata.usgs.gov/nwis/wu](https://waterdata.usgs.gov/nwis/wu) for more information. All available use categories for the supplied arguments are retrieved. + +```{r eval=FALSE} +allegheny <- readNWISuse(stateCd = "Pennsylvania", + countyCd = "Allegheny") + + +national <- readNWISuse(stateCd = NULL, + countyCd = NULL, + transform = TRUE) + +``` + +## Statistics Data +Retrieves site statistics from the USGS Statistics Web Service beta. + +```{r eval=FALSE} +discharge_stats <- readNWISstat(siteNumbers=c("02319394"), + parameterCd=c("00060"), + statReportType="annual") + +``` + + +# Water Quality Portal Web Retrievals + +There are additional water quality data sets available from the [Water Quality Data Portal](https://www.waterqualitydata.us/). These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future. Because only USGS uses parameter codes, a "characteristic name" must be supplied. The `readWQPqw` function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. + + +```{r label=getQWData, echo=TRUE, eval=FALSE} +specificCond <- readWQPqw('WIDNR_WQX-10032762', + 'Specific conductance', + '2011-05-01','2011-09-30') +``` + +A tool for finding NWIS characteristic names can be found [here](https://www.waterqualitydata.us/public_srsnames/) + + +# Generalized Retrievals + +The previous examples all took specific input arguments: `siteNumber`, `parameterCd` (or characteristic name), `startDate`, `endDate`, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. + +## NWIS + +### Sites: whatNWISsites + +The function `whatNWISsites` can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the `...` argument, which allows the user to use any arbitrary input argument. We can then use the service [here](https://waterservices.usgs.gov/rest/Site-Test-Tool.html) to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: + +[https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv](https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv) + +The following `dataRetrieval` code can be used to get those sites: + +```{r siteSearch, eval=FALSE} +sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), + parameterCd=c("00010","00060"), + hasDataTypeCd="dv") +``` + +### Data: readNWISdata + +For NWIS data, the function `readNWISdata` can be used. The argument listed in the R help file is `...` and `service` (only for data requests). Table 5 describes the services are available. + +```{r echo=FALSE} + +Service <- c("dv","iv","gwlevels","qwdata","measurements","peak","stat") +Description <- c("Daily","Instantaneous","Groundwater Levels","Water Quality","Surface Water Measurements","Peak Flow","Statistics Service") +URL <- c("https://waterservices.usgs.gov/rest/DV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/IV-Test-Tool.html", + "https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html", + "https://nwis.waterdata.usgs.gov/nwis/qwdata", + "https://waterdata.usgs.gov/nwis/measurements/", + "https://nwis.waterdata.usgs.gov/usa/nwis/peak/", + "https://waterservices.usgs.gov/rest/Statistics-Service-Test-Tool.html") + +tableData <- data.frame(Service, + Description, + URL, + stringsAsFactors = FALSE) + + +htmlTable(tableData, + caption="Table 5: NWIS general data calls", + rnames=FALSE, align=c("l","l","l"), + col.rgroup = c("none", "#F7F7F7"), + css.cell="padding-bottom: 0.0em; padding-right: 0.5em; padding-top: 0.0em;") + +``` + + +The `...` argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi2, and the most recent daily discharge data. That request would be done as follows: + +```{r dataExample, eval=FALSE} +dischargeWI <- readNWISdata(service="dv", + stateCd="WI", + parameterCd="00060", + drainAreaMin="50", + statCd="00003") + +siteInfo <- attr(dischargeWI, "siteInfo") + +``` + + +## WQP + +Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found [here](https://www.waterqualitydata.us/webservices_documentation). + +### Sites: whatWQPsites + +To discover available sites in the WQP in New Jersey that have measured Chloride, use the function `whatWQPsites`. + +```{r NJChloride, eval=FALSE} + +sitesNJ <- whatWQPsites(statecode="US:34", + characteristicName="Chloride") + +``` + + +### Data: readWQPdata + +To get data from the WQP using generalized Web service calls, use the function `readWQPdata`. For example, to get all the pH data in Wisconsin: + +```{r phData, eval=FALSE} +dataPH <- readWQPdata(statecode="US:55", + characteristicName="pH") +``` + + +### Availability: whatWQPdata + +The function `whatWQPdata` returns a data frame with information on the amount of data collected at a site. For example: + +```{r eval=FALSE} +type <- "Stream" +sites <- whatWQPdata(countycode="US:55:025",siteType=type) +``` + +This returns a data frame with all of the sites that were measured in streams in Dane County, WI. Also, in that table, there is a measure of `activityCount` (how often the site was sampled), and `resultCount` (how many individual results are available). + +### Samples: whatWQPsamples + +The function `whatWQPsamples` returns information on the individual samples collected at a site. For example: + +```{r eval=FALSE} +site <- whatWQPsamples(siteid="USGS-01594440") +``` + +This returns one row for each instance that a sample was collect. + +### Metrics: whatWQPmetrics + +The function `whatWQPmetrics` provides metric information. This is only currently available for STORET data: + +```{r eval=FALSE} +type <- "Stream" +sites <- whatWQPmetrics(countycode="US:55:025",siteType=type) +``` + + +# Embedded Metadata + +All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a `url` (returning a character of the url used to obtain the data), `siteInfo` (returning a data frame with information on sites), and `queryTime` (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows: + +```{r meta1, eval=FALSE} + +attr(dischargeWI, "url") + +attr(dischargeWI, "queryTime") + +siteInfo <- attr(dischargeWI, "siteInfo") + +``` + +Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes: + +```{r meta2, eval=FALSE} + +names(attributes(dischargeWI)) + +``` + +For data obtained from `readNWISuv`, `readNWISdv`, `readNWISgwl` there are two attributes that are particularly useful: `siteInfo` and `variableInfo`. + +```{r meta3, eval=FALSE} + +siteInfo <- attr(dischargeWI, "siteInfo") + +variableInfo <- attr(dischargeWI, "variableInfo") + + +``` + +Data obtained from `readNWISpeak`, `readNWISmeas`, and `readNWISrating`, the `comment` attribute is useful. + +```{r meta5, eval=FALSE} +comment(peakData) + +#Which is equivalent to: +attr(peakData, "comment") +``` + + +# Getting Started in R + +This section describes the options for downloading and installing the `dataRetrieval` package. + +## New to R? + +If you are new to R, you will need to first install the latest version of R, which can be found [here] (www.R-project.org). + +At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to the image below. To see the raw code for a particular code, type the name of the function, without parentheses. + + +``` +?readNWISpCode +``` + + +![A simple R help file](Rhelp.png) + + +Additionally, many R packages have vignette files attached (such as this paper). To view the vignette: +```{r seeVignette,eval = FALSE} +vignette(dataRetrieval) +``` + + +## R User: Installing dataRetrieval + +The following command installs `dataRetrieval` and subsequent required packages: + +```{r installFromCran,eval = FALSE} +install.packages("dataRetrieval") +``` + +After installing the package, you need to open the library each time you re-start R. This is done with the simple command: + +```{r openLibraryTest, eval=FALSE} +library(dataRetrieval) +``` + + + +# Creating Tables in Microsoft® Software from R + +There are a few steps that are required in order to create a table in Microsoft® software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData: + +```{r label=getSiteApp, echo=TRUE, eval=FALSE} +availableData <- whatNWISdata(siteNumber, "dv") +dailyData <- availableData["00003" == availableData$stat_cd,] + +tableData <- with(dailyData, + data.frame( + shortName=srsname, + Start=begin_date, + End=end_date, + Count=count_nu, + Units=parameter_units) + ) +``` + +First, save the data frame as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements): + + +```{r label=saveData, echo=TRUE, eval=FALSE} +write.table(tableData, file="tableData.tsv",sep="\t", + row.names = FALSE,quote=FALSE) +``` + +This will save a file in your working directory called tableData.tsv. You can see your working directory by typing `getwd()` in the R console. Opening the file in a general-purpose text editor, you should see the following: + +``` +shortName Start End Count Units +Temperature, water 2010-10-01 2012-06-24 575 deg C +Stream flow, mean. daily 1948-01-01 2013-03-13 23814 ft3/s +Specific conductance 2010-10-01 2012-06-24 551 uS/cm 25C +Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +``` + +Next, follow the steps below to open this file in Excel: + +1. Open Excel +2. Click on the File tab +3. Click on the Open option +4. Navigate to the working directory (as shown in the results of `getwd()`) +5. Next to the File name text box, change the drop down type to All Files (*.*) +6. Double click tableData.tsv +7. A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. +8. In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. +9. Use the many formatting tools within Excel to customize the table + +From Excel, it is simple to copy and paste the tables in other Microsoft® software. An example using one of the default Excel table formats is here. Additional formatting could be required in Excel, for example converting u to $\mu$. + +![A simple table produced in Microsoft ® Excel.](table1.png) + + +# Disclaimer +This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information. + + diff --git a/vignettes/dataRetrieval.Rnw b/vignettes/dataRetrieval.Rnw deleted file mode 100644 index 16dfa930..00000000 --- a/vignettes/dataRetrieval.Rnw +++ /dev/null @@ -1,960 +0,0 @@ -%\VignetteIndexEntry{Introduction to the dataRetrieval package} -%\VignetteEngine{knitr::knitr} -%\VignetteDepends{} -%\VignetteSuggests{xtable, testthat} -%\VignetteImports{XML, httr, reshape2,lubridate,utils,stats} -%\VignettePackage{dataRetrieval} - -\documentclass[a4paper,11pt]{article} - -\usepackage{amsmath} -\usepackage{times} -\usepackage{hyperref} -\usepackage[numbers, round]{natbib} -\usepackage[american]{babel} -\usepackage{authblk} -\usepackage{subfig} -\usepackage{placeins} -\usepackage{footnote} -\usepackage{tabularx} -\usepackage{threeparttable} -\usepackage{parskip} - -\usepackage{csquotes} -\usepackage{setspace} - -% \doublespacing - -\renewcommand{\topfraction}{0.85} -\renewcommand{\textfraction}{0.1} -\usepackage{graphicx} - - -\usepackage{mathptmx}% Times Roman font -\usepackage[scaled=.90]{helvet}% Helvetica, served as a model for arial - -% \usepackage{indentfirst} -% \setlength\parindent{20pt} -\setlength{\parskip}{0pt} - -\usepackage{courier} - -\usepackage{titlesec} -\usepackage{titletoc} - -\titleformat{\section} - {\normalfont\sffamily\bfseries\LARGE} - {\thesection}{0.5em}{} -\titleformat{\subsection} - {\normalfont\sffamily\bfseries\Large} - {\thesubsection}{0.5em}{} -\titleformat{\subsubsection} - {\normalfont\sffamily\large} - {\thesubsubsection}{0.5em}{} - -\titlecontents{section} -[2em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{subsection} -[4.6em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{subsubsection} -[6.9em] % adjust left margin -{\sffamily} % font formatting -{\contentslabel{2.3em}} % section label and offset -{\hspace*{-2.3em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{table} -[0em] % adjust left margin -{\sffamily} % font formatting -{Table\hspace*{2em} \contentslabel {2em}} % section label and offset -{\hspace*{4em}} -{\titlerule*[0.25pc]{.}\contentspage} - -\titlecontents{figure} -[0em] % adjust left margin -{\sffamily} % font formatting -{Figure\hspace*{2em} \contentslabel {2em}} % section label and offset -{\hspace*{4em}} -{\titlerule*[0.25pc]{.}\contentspage} - -%Italisize and change font of urls: -\urlstyle{sf} -\renewcommand\UrlFont\itshape - -\usepackage{caption} -\captionsetup{ - font={sf}, - labelfont={bf,sf}, - labelsep=period, - justification=justified, - singlelinecheck=false -} - - - -\textwidth=6.2in -\textheight=8.5in -\parskip=.3cm -\oddsidemargin=.1in -\evensidemargin=.1in -\headheight=-.3in - - -%------------------------------------------------------------ -% newcommand -%------------------------------------------------------------ -\newcommand{\scscst}{\scriptscriptstyle} -\newcommand{\scst}{\scriptstyle} -\newcommand{\Robject}[1]{{\texttt{#1}}} -\newcommand{\Rfunction}[1]{{\texttt{#1}}} -\newcommand{\Rclass}[1]{\textit{#1}} -\newcommand{\Rpackage}[1]{\textit{#1}} -\newcommand{\Rexpression}[1]{\texttt{#1}} -\newcommand{\Rmethod}[1]{{\texttt{#1}}} -\newcommand{\Rfunarg}[1]{{\texttt{#1}}} - -\begin{document} - -<>= -library(xtable) -options(continue=" ") -options(width=60) -library(knitr) - -@ - -\renewenvironment{knitrout}{\begin{singlespace}}{\end{singlespace}} -\renewcommand*\listfigurename{Figures} - -\renewcommand*\listtablename{Tables} - - -%------------------------------------------------------------ -\title{The dataRetrieval R package} -%------------------------------------------------------------ -\author[1]{Laura A. De Cicco} -\author[1]{Robert M. Hirsch} -\affil[1]{United States Geological Survey} - - -<>= -opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="") -knit_hooks$set(inline = function(x) { - if (is.numeric(x)) round(x, 3)}) -knit_hooks$set(crop = hook_pdfcrop) - -bold.colHeaders <- function(x) { - x <- gsub("\\^(\\d)","$\\^\\1$",x) - x <- gsub("\\%","\\\\%",x) - x <- gsub("\\_"," ",x) - returnX <- paste("\\multicolumn{1}{c}{\\textbf{\\textsf{", x, "}}}", sep = "") -} -addSpace <- function(x) ifelse(x != "1", "[5pt]","") -library(dataRetrieval) -@ - -\noindent{\huge\textsf{\textbf{The dataRetrieval R package}}} - -\noindent\textsf{By Laura A. De Cicco and Robert M. Hirsch} - -\noindent\textsf{\today} - -% \maketitle -% -% \newpage - -\tableofcontents -\listoffigures -\listoftables - -\newpage - -%------------------------------------------------------------ -\section{Introduction to dataRetrieval} -%------------------------------------------------------------ -The dataRetrieval package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). - -For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. - -A quick workflow for USGS dataRetrieval functions: - -<>= -library(dataRetrieval) -# Choptank River near Greensboro, MD -siteNumber <- "01491000" -ChoptankInfo <- readNWISsite(siteNumber) -parameterCd <- "00060" - -#Raw daily data: -rawDailyData <- readNWISdv(siteNumber,parameterCd, - "1980-01-01","2010-01-01") - -# Sample data Nitrate: -parameterCd <- "00618" -qwData <- readNWISqw(siteNumber,parameterCd, - "1980-01-01","2010-01-01") - -pCode <- readNWISpCode(parameterCd) - -@ - -USGS data are made available through the National Water Information System (NWIS). - -Table \ref{tab:func} describes the functions available in the dataRetrieval package. - -\begin{table}[!ht] -\begin{minipage}{\linewidth} -{\footnotesize -\caption{dataRetrieval functions} -\label{tab:func} -\begin{tabular}{lll} - \hline -\multicolumn{1}{c}{\textbf{\textsf{Function Name}}} & -\multicolumn{1}{c}{\textbf{\textsf{Arguments}}} & -\multicolumn{1}{c}{\textbf{\textsf{Description}}} \\ [0pt] - \hline - \texttt{readNWISdata} & \texttt{...} & NWIS data using user-specified queries\\ - & service & \\ - [5pt]\texttt{readNWISdv} & siteNumber & NWIS daily data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - & statCd & \\ - [5pt]\texttt{readNWISqw} & siteNumber & NWIS water quality data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - & expanded & \\ - [5pt]\texttt{readNWISuv} & siteNumber & NWIS instantaneous value data\\ - & parameterCd & \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISrating} & siteNumber & NWIS rating table for active streamgage \\ - & type & \\ - [5pt]\texttt{readNWISmeas} & siteNumber & NWIS surface-water measurements \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISpeak} & siteNumber & NWIS peak flow data \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISgwl} & siteNumber & NWIS groundwater level measurements \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{readNWISpCode} & parameterCd & NWIS parameter code information\\ - [5pt]\texttt{readNWISsite} & siteNumber & NWIS site information \\ - [5pt]\texttt{whatNWISsites} & \texttt{...} & NWIS site search using user-specified queries \\ - [5pt]\texttt{whatNWISdata} & siteNumber & NWIS data availability, including period of record and count \\ - & service & \\ - [5pt]\texttt{readWQPdata} & \texttt{...} & WQP data using user-specified queries \\ - [5pt]\texttt{readWQPqw} & siteNumber & WQP data \\ - & parameterCd (or characteristic name) & \\ - & startDate & \\ - & endDate & \\ - [5pt]\texttt{whatWQPsites} & \texttt{...} & WQP site search using user-specified queries \\ - \hline -\end{tabular} -} -\end{minipage} -\end{table} - -\clearpage - -%------------------------------------------------------------ -\section{USGS Web Retrievals} -\label{sec:genRetrievals} -%------------------------------------------------------------ -In this section, examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), water quality data (\ref{sec:usgsWQP}), groundwater level data (\ref{sec:gwl}), peak flow data (\ref{sec:peak}), rating curve data (\ref{sec:rating}, and surface-water measurement data (\ref{sec:meas}). Section \ref{sec:metadata} shows instructions for getting metadata that is attached to each returned data frame. - -The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the dataRetrieval package as \enquote{siteNumber}). Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this siteNumber. There are many ways to do this, one is the National Water Information System: Mapper \url{https://maps.waterdata.usgs.gov/mapper/index.html}. - -Once the siteNumber is known, the next required input for USGS data retrievals is the \enquote{parameter code}. This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents \enquote{Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen}, with units of \enquote{mg/l as N}. - -Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table \ref{tab:params}. - -<>= -pCode <- c('00060', '00065', '00010','00045','00400') -shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") - -data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) - -print(xtable(data.df, - label="tab:params", - caption="Common USGS Parameter Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.text.function = function(x) {x}, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -Two output columns that may not be obvious are \enquote{srsname} and \enquote{casrn}. Srsname stands for \enquote{Substance Registry Services}. More information on the srs name can be found here: - -\url{http://ofmpub.epa.gov/sor_internet/registry/substreg/home/overview/home.do} - -Casrn stands for \enquote{Chemical Abstracts Service (CAS) Registry Number}. More information on CAS can be found here: - -\url{http://www.cas.org/content/chemical-substances/faqs} - -For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and siteNumber is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code. - -Some common codes are shown in Table \ref{tab:stat}. - -<>= -StatCode <- c('00001', '00002', '00003','00008') -shortName <- c("Maximum","Minimum","Mean", "Median") - -data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) - -print(xtable(data.df,label="tab:stat", - caption="Commonly used USGS Stat Codes"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -Examples for using these siteNumbers, parameter codes, and stat codes will be presented in subsequent sections. - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Site Information} -\label{sec:usgsSite} -%------------------------------------------------------------ - -%------------------------------------------------------------ -\subsubsection{readNWISsite} -\label{sec:usgsSiteFileData} -%------------------------------------------------------------ -Use the \texttt{readNWISsite} function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. \texttt{readNWISsite} can also access information about multiple sites with a vector input. - - -<>= -siteNumbers <- c("01491000","01645000") -siteINFO <- readNWISsite(siteNumbers) -@ - -Site information is obtained from: -\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html} - -Information on the returned data can be found with the \texttt{comment} function as described in section \ref{sec:metadata}. - -<>= -comment(siteINFO) -@ - - - -\FloatBarrier - -%------------------------------------------------------------ -\subsubsection{whatNWISdata} -\label{sec:usgsDataAvailability} -%------------------------------------------------------------ -To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{whatNWISdata} function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"}, \texttt{"}rt\texttt{"}, or \texttt{"}iv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (water-quality), \texttt{"}sv\texttt{"} (sites visits), \texttt{"}pk\texttt{"} (peak measurements), \texttt{"}gw\texttt{"} (groundwater levels), \texttt{"}ad\texttt{"} (sites included in USGS Annual Water Data Reports External Link), \texttt{"}aw\texttt{"} (sites monitored by the USGS Active Groundwater Level Network External Link), and \texttt{"}id\texttt{"} (historical instantaneous values). - -In the following example, we limit the retrieved data to only daily data. The default for \texttt{"}service\texttt{"} is \enquote{all}, which returns all of the available data for that site. Likewise, there are arguments for parameter code (\texttt{parameterCd}) and statistic code (\texttt{statCd}) to filter the results. The default for both is to return all possible values (\enquote{all}). The returned \texttt{"}count\_nu\texttt{"} for \texttt{"}uv\texttt{"} data is the count of days with returned data, not the actual count of returned values. - - -<>= -# Continuing from the previous example: -# This pulls out just the daily, mean data: - -dailyDataAvailable <- whatNWISdata(siteNumbers, - service="dv", statCd="00003") - - -@ - -<>= -tableData <- with(dailyDataAvailable, - data.frame( - siteNumber= site_no, - srsname=srsname, - startDate=as.character(begin_date), - endDate=as.character(end_date), - count=as.character(count_nu), - units=parameter_units, -# statCd = stat_cd, - stringsAsFactors=FALSE) - ) - -tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s" -tableData$units[which(tableData$units == "uS/cm @25C")] <- "$\\mu$S/cm @25C" - - -print(xtable(tableData,label="tab:gda", - caption="Reformatted version of output from \\texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]"), - caption.placement="top", - size = "\\footnotesize", - latex.environment=NULL, - sanitize.text.function = function(x) {x}, - sanitize.colnames.function = bold.colHeaders, - sanitize.rownames.function = addSpace - ) - -@ - -\begin{table}[ht] -\caption{Reformatted version of output from \texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]} -\label{tab:gda} -{\footnotesize -\begin{tabular}{rllllll} - \hline - & \multicolumn{1}{c}{\textbf{\textsf{siteNumber}}} & \multicolumn{1}{c}{\textbf{\textsf{srsname}}} & \multicolumn{1}{c}{\textbf{\textsf{startDate}}} & \multicolumn{1}{c}{\textbf{\textsf{endDate}}} & \multicolumn{1}{c}{\textbf{\textsf{count}}} & \multicolumn{1}{c}{\textbf{\textsf{units}}} \\ - \hline - & 01491000 & Temperature, water & 2010-10-01 & 2012-05-09 & 529 & deg C \\ - [5pt] & 01645000 & Stream flow, mean. daily & 1930-09-26 & 2015-02-19 & 30828 & ft$^3$/s \\ - [5pt] & 01491000 & Stream flow, mean. daily & 1948-01-01 & 2015-02-19 & 24522 & ft$^3$/s \\ - [5pt] & 01491000 & Specific conductance & 2010-10-01 & 2012-05-09 & 527 & $\mu$S/cm @25C \\ - [5pt] & 01491000 & Suspended sediment concentration (SSC) & 1980-10-01 & 1991-09-30 & 3651 & mg/l \\ - [5pt] & 01491000 & Suspended sediment discharge & 1980-10-01 & 1991-09-30 & 3652 & tons/day \\ - \hline -\end{tabular} -} -\end{table} - -See Section \ref{app:createWordTable} for instructions on converting an R data frame to a table in Microsoft\textregistered\ software Excel or Word to display a data availability table similar to Table \ref{tab:gda}. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Parameter Information} -\label{sec:usgsParams} -%------------------------------------------------------------ -To obtain all of the available information concerning a measured parameter (or multiple parameters), use the \texttt{readNWISpCode} function: - -<>= -# Using defaults: -parameterCd <- "00618" -parameterINFO <- readNWISpCode(parameterCd) -@ - - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Daily Data} -\label{sec:usgsDaily} -%------------------------------------------------------------ -To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to \texttt{"}00003\texttt{"}). If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}. - -The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. - -<>= - -# Choptank River near Greensboro, MD: -siteNumber <- "01491000" -parameterCd <- "00060" # Discharge -startDate <- "2009-10-01" -endDate <- "2012-09-30" - -discharge <- readNWISdv(siteNumber, - parameterCd, startDate, endDate) -@ - -The column \texttt{"}datetime\texttt{"} in the returned data frame is automatically imported as a variable of class \texttt{"}Date\texttt{"} in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often \texttt{"}A\texttt{"} (approved for publication) or \texttt{"}P\texttt{"} (provisional data subject to revision). - -Another example would be a request for mean and maximum daily temperature and discharge in early 2012: - -<>= -siteNumber <- "01491000" -parameterCd <- c("00010","00060") # Temperature and discharge -statCd <- c("00001","00003") # Mean and maximum -startDate <- "2012-01-01" -endDate <- "2012-05-01" - -temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, - startDate, endDate, statCd=statCd) - -@ - -<>= -filePath <- system.file("extdata", package="dataRetrieval") -fileName <- "temperatureAndFlow.RData" -fullPath <- file.path(filePath, fileName) -load(fullPath) - -@ - -The column names can be shortened and simplified using the \texttt{renameNWISColumns} function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. - - -<>= -names(temperatureAndFlow) - -temperatureAndFlow <- renameNWISColumns(temperatureAndFlow) -names(temperatureAndFlow) - -@ - -<>= -#Information about the data frame attributes: -names(attributes(temperatureAndFlow)) - -statInfo <- attr(temperatureAndFlow, "statisticInfo") -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") - -@ - - - -An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}): - -<>= -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") - -par(mar=c(5,5,5,5)) #sets the size of the plot window - -plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, - ylab=variableInfo$parameter_desc[1],xlab="" ) -par(new=TRUE) -plot(temperatureAndFlow$Date, temperatureAndFlow$Flow, - col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE - ) -axis(4,col="red",col.axis="red") -mtext(variableInfo$parameter_desc[2],side=4,line=3,col="red") -title(paste(siteInfo$station_nm,"2012")) -legend("topleft", variableInfo$param_units, - col=c("black","red"),lty=c(NA,1),pch=c(1,NA)) -@ - - -There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns). - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Unit Data} -\label{sec:usgsRT} -%------------------------------------------------------------ -Any data collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values.} Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{readNWISuv}. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. - -<>= - -parameterCd <- "00060" # Discharge -startDate <- "2012-05-12" -endDate <- "2012-05-13" -dischargeUnit <- readNWISuv(siteNumber, parameterCd, - startDate, endDate) -dischargeUnit <- renameNWISColumns(dischargeUnit) -@ - -The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day). They include all data collected from the startDate through the endDate (starting and ending with midnight locally-collected time). The dateTime column is converted to \enquote{UTC} (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm). - -To override the UTC timezone, specify a valid timezone in the tz argument. Default is \texttt{""}, which will keep the dateTime column in UTC. Other valid timezones are: - -\begin{verbatim} -America/New_York -America/Chicago -America/Denver -America/Los_Angeles -America/Anchorage -America/Honolulu -America/Jamaica -America/Managua -America/Phoenix -America/Metlakatla -\end{verbatim} - -Data are retrieved from \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. - -\newpage - - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Water Quality Data} -\label{sec:usgsWQP} -%------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, and endDate. Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument \texttt{"}reshape\texttt{"}, that converts the expanded dataset to a \texttt{"}wide\texttt{"} format (each requested parameter code gets individual columns). The defaults are expanded=TRUE, and reshape=FALSE. - -<>= - -# Dissolved Nitrate parameter codes: -parameterCd <- c("00618","71851") -startDate <- "1985-10-01" -endDate <- "2012-09-30" - -dfLong <- readNWISqw(siteNumber, parameterCd, - startDate, endDate) - -# Or the wide return: -# dfWide <- readNWISqw(siteNumber, parameterCd, -# startDate, endDate, reshape=TRUE) - -@ - -Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. Additional metadata, such as information about the column names can be found by using the \texttt{comment} function, also described in section \ref{sec:metadata}. - -<>= - -comment(dfLong) - -@ - -\FloatBarrier - -%------------------------------------------------------------ -\subsection{Groundwater Level Data} -\label{sec:gwl} -%------------------------------------------------------------ -Groundwater level measurements can be obtained with the \texttt{readNWISgwl} function. Information on the returned data can be found with the \texttt{comment} function, and attached attributes as described in section \ref{sec:metadata}. - -<>= -siteNumber <- "434400121275801" -groundWater <- readNWISgwl(siteNumber) -@ - -%------------------------------------------------------------ -\subsection{Peak Flow Data} -\label{sec:peak} -%------------------------------------------------------------ - -Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the \texttt{readNWISpeak} function. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -<>= -siteNumber <- '01594440' -peakData <- readNWISpeak(siteNumber) - -@ - - -%------------------------------------------------------------ -\subsection{Rating Curve Data} -\label{sec:rating} -%------------------------------------------------------------ -Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -Rating curves can be obtained with the \texttt{readNWISrating} function. - -<>= -ratingData <- readNWISrating(siteNumber, "base") -attr(ratingData, "RATING") - -@ - - - -%------------------------------------------------------------ -\subsection{Surface-Water Measurement Data} -\label{sec:meas} -%------------------------------------------------------------ -These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}. - -Surface-water measurement data can be obtained with the \texttt{readNWISmeas} function. - -<>= -surfaceData <- readNWISmeas(siteNumber) - -@ - - -%------------------------------------------------------------ -\section{Water Quality Portal Web Retrievals} -\label{sec:usgsSTORET} -%------------------------------------------------------------ -There are additional water quality data sets available from the Water Quality Data Portal (\url{https://www.waterqualitydata.us/}). These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future. Because only USGS uses parameter codes, a \texttt{"}characteristic name\texttt{"} must be supplied. The \texttt{readWQPqw} function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. - - -<>= -specificCond <- readWQPqw('WIDNR_WQX-10032762', - 'Specific conductance','2011-05-01','2011-09-30') -@ - -A tool for finding NWIS characteristic names can be found at: - -\url{https://www.waterqualitydata.us/public_srsnames/} - -\FloatBarrier - -%------------------------------------------------------------ -\section{Generalized Retrievals} -\label{sec:general} -%------------------------------------------------------------ -The previous examples all took specific input arguments: siteNumber, parameterCd (or characteristic name), startDate, endDate, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. - -%------------------------------------------------------------ -\subsubsection{NWIS Sites} -\label{sec:NWISGenSite} -%------------------------------------------------------------ -The function \texttt{whatNWISsites} can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the \texttt{"..."} argument, which allows the user to use any arbitrary input argument. We can then use the service here: - -\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html} - -to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: - -\url{https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv} - -The following dataRetrieval code can be used to get those sites: - -<>= -sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), - parameterCd=c("00010","00060"), - hasDataTypeCd="dv") -@ - - -%------------------------------------------------------------ -\subsubsection{NWIS Data} -\label{sec:NWISGenData} -%------------------------------------------------------------ -For NWIS data, the function \texttt{readNWISdata} can be used. The argument listed in the R help file is \texttt{"..."} and \texttt{"}service\texttt{"} (only for data requests). Table \ref{tab:NWISGeneral} describes the services are available. - -\begin{table}[!ht] -\begin{minipage}{\linewidth} -{\footnotesize -\caption{NWIS general data calls} -\label{tab:NWISGeneral} -\begin{tabular}{lll} - \hline -\multicolumn{1}{c}{\textbf{\textsf{Service}}} & -\multicolumn{1}{c}{\textbf{\textsf{Description}}} & -\multicolumn{1}{c}{\textbf{\textsf{Reference URL}}} \\ [0pt] - \hline - daily values & dv & \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}\\ - [5pt]instantaneous & iv & \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}\\ - [5pt]groundwater levels & gwlevels & \url{https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html}\\ - [5pt]water quality & qwdata & \url{https://nwis.waterdata.usgs.gov/nwis/qwdata}\\ - \hline -\end{tabular} -} -\end{minipage} -\end{table} - -The \texttt{"..."} argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi$^2$, and the most recent daily dischage data. That request would be done as follows: - -<>= -dischargeWI <- readNWISdata(service="dv", - stateCd="WI", - parameterCd="00060", - drainAreaMin="50", - statCd="00003") - -siteInfo <- attr(dischargeWI, "siteInfo") - -@ - -%------------------------------------------------------------ -\subsubsection{WQP Sites} -\label{sec:WQPGenSite} -%------------------------------------------------------------ - -Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found here: - -\url{https://www.waterqualitydata.us/webservices_documentation} - -To discover available sites in the WQP in New Jersey that have measured Chloride, use the function \texttt{whatWQPsites}. - -<>= - -sitesNJ <- whatWQPsites(statecode="US:34", - characteristicName="Chloride") - -@ - - -%------------------------------------------------------------ -\subsubsection{WQP Data} -\label{sec:WQPGenData} -%------------------------------------------------------------ -Finally, to get data from the WQP using generalized Web service calls, use the function \texttt{readWQPdata}. For example, to get all the pH data in Wisconsin: - -<>= - -dataPH <- readWQPdata(statecode="US:55", - characteristicName="pH") - -@ - - - -\FloatBarrier - -\clearpage - -%------------------------------------------------------------ -\section{Embedded Metadata} -\label{sec:metadata} -%------------------------------------------------------------ -All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a \texttt{url} (returning a character of the url used to obtain the data), \texttt{siteInfo} (returning a data frame with information on sites), and \texttt{queryTime} (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows: - -<>= - -attr(dischargeWI, "url") - -attr(dischargeWI, "queryTime") - -siteInfo <- attr(dischargeWI, "siteInfo") - -@ - -Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes: - -<>= - -names(attributes(dischargeWI)) - -@ - -For data obtained from \texttt{readNWISuv}, \texttt{readNWISdv}, \texttt{readNWISgwl} there are two attributes that are particularly useful: \texttt{siteInfo} and \texttt{variableInfo}. - -<>= - -siteInfo <- attr(dischargeWI, "siteInfo") - -variableInfo <- attr(dischargeWI, "variableInfo") - - -@ - -Data obtained from \texttt{readNWISpeak}, \texttt{readNWISmeas}, and \texttt{readNWISrating}, the \texttt{comment} attribute is useful. - -<>= -comment(peakData) - -#Which is equivalent to: -# attr(peakData, "comment") -@ - - - -%------------------------------------------------------------ -\section{Getting Started in R} -\label{sec:appendix1} -%------------------------------------------------------------ -This section describes the options for downloading and installing the dataRetrieval package. - -%------------------------------------------------------------ -\subsection{New to R?} -%------------------------------------------------------------ -If you are new to R, you will need to first install the latest version of R, which can be found here: \url{http://www.r-project.org/}. - -At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to Figure \ref{fig:help}. To see the raw code for a particular code, type the name of the function, without parentheses. - - -<>= -?readNWISpCode -@ - -\FloatBarrier - - -\begin{figure}[ht!] -\centering - \resizebox{0.95\textwidth}{!}{\includegraphics{Rhelp.png}} -\caption{A simple R help file} -\label{fig:help} -\end{figure} - -Additionally, many R packages have vignette files attached (such as this paper). To view the vignette: -<>= -vignette(dataRetrieval) -@ - -\FloatBarrier -\clearpage -%------------------------------------------------------------ -\subsection{R User: Installing dataRetrieval} -%------------------------------------------------------------ -The following command installs dataRetrieval and subsequent required packages: - -<>= -install.packages("dataRetrieval") -@ - -After installing the package, you need to open the library each time you re-start R. This is done with the simple command: -<>= -library(dataRetrieval) -@ - - -%------------------------------------------------------------ -\section{Creating Tables in Microsoft\textregistered\ Software from R} -\label{app:createWordTable} -%------------------------------------------------------------ -There are a few steps that are required in order to create a table in Microsoft\textregistered\ software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData: - -<>= -availableData <- whatNWISdata(siteNumber, "dv") -dailyData <- availableData["00003" == availableData$stat_cd,] - -tableData <- with(dailyData, - data.frame( - shortName=srsname, - Start=begin_date, - End=end_date, - Count=count_nu, - Units=parameter_units) - ) -@ - -First, save the data frame as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements): - - -<>= -write.table(tableData, file="tableData.tsv",sep="\t", - row.names = FALSE,quote=FALSE) -@ - -This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following: - -\begin{verbatim} -shortName Start End Count Units -Temperature, water 2010-10-01 2012-06-24 575 deg C -Stream flow, mean. daily 1948-01-01 2013-03-13 23814 ft3/s -Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C -Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l -Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day -\end{verbatim} - -Next, follow the steps below to open this file in Excel: -\begin{enumerate} -\item Open Excel -\item Click on the File tab -\item Click on the Open option -\item Navigate to the working directory (as shown in the results of \texttt{getwd()}) -\item Next to the File name text box, change the dropdown type to All Files (*.*) -\item Double click tableData.tsv -\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. -\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. -\item Use the many formatting tools within Excel to customize the table -\end{enumerate} - -From Excel, it is simple to copy and paste the tables in other Microsoft\textregistered\ software. An example using one of the default Excel table formats is here. Additional formatting could be requried in Excel, for example converting u to $\mu$. - -\begin{figure}[ht!] -\centering - \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} -\caption{A simple table produced in Microsoft\textregistered\ Excel.} -\label{overflow} -\end{figure} - -\clearpage - -%------------------------------------- -\section{Disclaimer} -%------------------------------------ -This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information. - - -\end{document} diff --git a/vignettes/figure/egretEx.pdf b/vignettes/figure/egretEx.pdf deleted file mode 100644 index 87e7eaa2..00000000 Binary files a/vignettes/figure/egretEx.pdf and /dev/null differ diff --git a/vignettes/figure/getNWISUnitPlot.pdf b/vignettes/figure/getNWISUnitPlot.pdf deleted file mode 100644 index 710116d8..00000000 Binary files a/vignettes/figure/getNWISUnitPlot.pdf and /dev/null differ diff --git a/vignettes/figure/getNWIStemperaturePlot-1.pdf b/vignettes/figure/getNWIStemperaturePlot-1.pdf deleted file mode 100644 index ed6227cf..00000000 Binary files a/vignettes/figure/getNWIStemperaturePlot-1.pdf and /dev/null differ diff --git a/vignettes/figure/getQWtemperaturePlot-1.pdf b/vignettes/figure/getQWtemperaturePlot-1.pdf deleted file mode 100644 index 8e78b6c4..00000000 Binary files a/vignettes/figure/getQWtemperaturePlot-1.pdf and /dev/null differ diff --git a/vignettes/figure/getQWtemperaturePlot.pdf b/vignettes/figure/getQWtemperaturePlot.pdf deleted file mode 100644 index 84f8397d..00000000 Binary files a/vignettes/figure/getQWtemperaturePlot.pdf and /dev/null differ