From 1c5899dfe64d9df4732e301acd869bd3cce6a4be Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Mon, 13 Jan 2025 16:38:22 -0800 Subject: [PATCH] part 1 chapter 1 & 4 updates and devel soilDB --- Part1/01-intro.Rmd | 633 +++++++++++++++++++------------------------ Part1/02-data.Rmd | 46 +--- Part1/04-spatial.Rmd | 560 +++++++++++++++++++------------------- Part1/packages.bib | 284 +++++++++++++++++++ Part1/sand.Rhistory | 25 -- 5 files changed, 862 insertions(+), 686 deletions(-) delete mode 100644 Part1/sand.Rhistory diff --git a/Part1/01-intro.Rmd b/Part1/01-intro.Rmd index f325e683..173acdd5 100644 --- a/Part1/01-intro.Rmd +++ b/Part1/01-intro.Rmd @@ -1,138 +1,131 @@ # Introduction to R {#intro} -![](static-figures/logo.jpg) +![](static-figures/logo.jpg) ## Outline -1. Course Overview - 1. Review Course Objectives - 2. Why is this training needed? - 3. Why is course organized this way? -2. What is R? - 1. Why should I use R? - 2. What can R do? -3. How do I get started? - 1. RStudio interface - 2. What are packages? - 3. How to navigate the Help tab - 4. How to save files -4. Manipulating data - 1. Loading & viewing data - 2. Filtering, transforming, merging, aggregating and reshaping data - 3. Exporting data - - +1. Course Overview + 1. Review Course Objectives + 2. Why is this training needed? + 3. Why is course organized this way? +2. What is R? + 1. Why should I use R? + 2. What can R do? +3. How do I get started? + 1. RStudio interface + 2. What are packages? + 3. How to navigate the Help tab + 4. How to save files +4. Manipulating data + 1. Loading & viewing data + 2. Filtering, transforming, merging, aggregating and reshaping data + 3. Exporting data + ## Course Overview ### Course Objectives -- Develop solutions to investigate soil survey correlation problems and update activities. -- Evaluate investigations for interpretive results and determine how to proceed. -- Summarize data for population in NASIS. -- Analyze spatial data to investigate soil-landscape relationships -- Help to pursue the question "why" - +- Develop solutions to investigate soil survey correlation problems and update activities. +- Evaluate investigations for interpretive results and determine how to proceed. +- Summarize data for population in NASIS. +- Analyze spatial data to investigate soil-landscape relationships +- Help to pursue the question "why" ### Why is this training needed? -- Long standing goal of the Soil Science Division to have a course in statistics [@mausbach2003] -- Opportunities to learn these techniques are limited, especially at the undergraduate level [@hennemann2004] -- Consistent methodology (data analysis, data population, sampling design, etc.) -- There is continually a greater need to use these techniques: - - Mapping of lands at high production rates ([@macmillan2007]; [@kempen2012]; [@brevik2016]) - - Ecological Sites [@maynard2019] - - Soil survey refinement (disaggregation) (@chaney2016];[@ramcharan2018]) - - +- Long standing goal of the Soil Science Division to have a course in statistics [@mausbach2003] +- Opportunities to learn these techniques are limited, especially at the undergraduate level [@hennemann2004] +- Consistent methodology (data analysis, data population, sampling design, etc.) +- There is continually a greater need to use these techniques: + - Mapping of lands at high production rates ([@macmillan2007]; [@kempen2012]; [@brevik2016]) + - Ecological Sites [@maynard2019] + - Soil survey refinement (disaggregation) ([@chaney2016];[@ramcharan2018]) ### Why is course organized this way? -- Our best judgment for assembling into **24** hours what could be **6** University level courses -- Mixture of slides and script enabled web pages is new for NRCS -- The web content is a long-term investment and should serve as a permanent reference -- Feel free to provide guidance for improving the class for future offerings - +- The web content is a long-term investment and serves as a reference +- Our best judgment for assembling into **24** hours what could be **6** University level courses +- Mixture of slides and script-enabled web pages is "new" for NRCS +Feel free to provide feedback for improving the class for future offerings. ## What is R? -R is a free, open-source software and programming language developed in 1995 at the University of Auckland as an environment for statistical computing and graphics [@ihaka1996]. Since then R has become one of the dominant software environments for data analysis and is used by a variety of scientific disiplines, including soil science, ecology, and geoinformatics ([Envirometrics CRAN Task View](https://cran.r-project.org/web/views/Environmetrics.html); [Spatial CRAN Task View](https://cran.r-project.org/web/views/Spatial.html)). R is particularly popular for its graphical capabilities, but it is also prized for it's GIS capabilities which make it relatively easy to generate raster-based models. More recently, R has also gained several packages which are designed specifically for analyzing soil data. +R is a free, open-source software and programming language developed in 1995 at the University of Auckland as an environment for statistical computing and graphics [@ihaka1996]. Since then R has become one of the dominant software environments for data analysis and is used by a variety of scientific disiplines, including soil science, ecology, and geoinformatics ([Envirometrics CRAN Task View](https://cran.r-project.org/web/views/Environmetrics.html); [Spatial CRAN Task View](https://cran.r-project.org/web/views/Spatial.html)). R is particularly popular for its graphical capabilities, but it is also prized for it's GIS capabilities which make it relatively easy to generate raster-based models. More recently, R has also gained several packages designed specifically for analyzing soil data. -1. a software **environment**: - + statistics - + graphics - + programming - + calculator - + GIS -2. a **language** to explore, summarize, and model data - + functions = verbs - + objects = nouns +1. A software **environment**: + - statistics + - graphics + - programming + - calculator + - GIS +2. A **language** to explore, summarize, and model data + - functions = verbs + - objects = nouns
-![](static-figures/rproject.png){ width=80% } +![](static-figures/rproject.png){width="80%"}
- ### Why Should I Learn R? While the vast majority of people use Microsoft Excel for data analysis, R offers numerous advantages, such as: -1. Cost. R is free! [("Free as in free speech, not free beer.")](https://www.gnu.org/philosophy/free-sw.html) +1. Cost. R is free! [("Free as in free speech, not free beer.")](https://www.gnu.org/philosophy/free-sw.en.html) -2. [Reproducible Research](http://christophergandrud.github.io/RepResR-RStudio/) (*self-documenting, repeatable*) - + repeatable: - + code + output in a single document *('I want the right answer, not a quick answer' - Paul Finnell)* - + easier the next time ([humorous example](https://www.youtube.com/watch?v=s3JldKoA0zw)) - + numerous Excel horror stories of scientific studies gone wrong exist ([TED Talk](https://www.youtube.com/watch?v=dXKbkpilQME)) - + scalable: applicable to small or large problems +2. [Reproducible Research](http://christophergandrud.github.io/RepResR-RStudio/) (*self-documenting, repeatable*) -3. R in a Community - + [Numerous Discipline Specific R Groups](https://cran.r-project.org/web/views/) - + [Numerous Local R User Groups (including R-Ladies Groups)](https://jumpingrivers.github.io/meetingsR/r-user-groups.html#north-america) - + [Stack Overflow](https://stackoverflow.com/) + - repeatable: + - code + output in a single document *('I want the right answer, not a quick answer' - Paul Finnell)* + - easier the next time ([humorous example](https://www.youtube.com/watch?v=s3JldKoA0zw)) + - numerous Excel horror stories of scientific studies gone wrong exist ([TED Talk](https://www.youtube.com/watch?v=dXKbkpilQME)) + - scalable: applicable to small or large problems -4. Learning Resources *(quantity and quality)* - + [R books](https://www.r-project.org/doc/bib/R-books.html) - + [(Free Online) R Books](https://bookdown.org/) - -5. R is 'becoming' the new norm (paradigm shift?) "If we don't accept these challenges, others who are less qualified will; and soil scientists will be displaced by apathy." [@arnold1991] +3. R in a Community -While some people find the use of a commandline environment daunting, it is becoming a necessary skill for scientists as the volume and variety of data has grown. Thus scripting or programming has become a third language for many scientists, in addition to their native language and disipline specific terminology. Other popular programming languages include: SQL (i.e. NASIS), Python (i.e. ArcGIS), and JavaScript. + - [Numerous Discipline Specific R Groups](https://cran.r-project.org/web/views/) + - [Numerous Local R User Groups (including R-Ladies Groups)](https://jumpingrivers.github.io/meetingsR/r-user-groups.html#north-america) + - [Stack Overflow](https://stackoverflow.com/) -
+4. Learning Resources *(quantity and quality)* + + - [R books](https://www.r-project.org/doc/bib/R-books.html) + - [(Free Online) R Books](https://bookdown.org/) -![*ODBC and GDAL link R to nearly all possible formats/interfaces*](static-figures/triangle.png){ width=50% } +5. "If we don't accept these challenges, others who are less qualified will; and soil scientists will be displaced by apathy." [@arnold1991] + +While some people find the use of a command line environment daunting, it is becoming a necessary skill for scientists as the volume and variety of data has grown. Thus scripting or programming has become a third language for many scientists, in addition to their native language and discipline specific terminology. Other popular programming languages include: SQL (i.e. NASIS), Python (i.e. ArcGIS), and JavaScript.
+![*ODBC and GDAL link R to nearly all possible formats/interfaces*](static-figures/triangle.png){width="50%"} + +
### What can R do? ### Packages -* Base R (*functionality is extended through packages*) - + basic summaries of quantitative or qualitative data - + data exploration via graphics - + [GIS](https://cran.r-project.org/web/views/Spatial.html) data processing and analysis - -* Soil Science R Packages - + [aqp](https://github.com/ncss-tech/aqp) - visualization, aggregation, classification - + [soilDB](https://github.com/ncss-tech/soilDB) - access to commonly used soil databases - + [soilReports](https://github.com/ncss-tech/soilReports) - handful of report templates - + [soiltexture](http://soiltexture.r-forge.r-project.org/) - textural triangles - -* [Ecology](https://cran.r-project.org/web/views/Environmetrics.html) R packages - + [vegan](http://vegan.r-forge.r-project.org/) - ordination, diversity analysis, etc. - + [dismo](http://rspatial.org/sdm/) - species distribution modeling +- Base R (*functionality is extended through packages*) + - basic summaries of quantitative or qualitative data + - data exploration via graphics + - [GIS](https://cran.r-project.org/web/views/Spatial.html) data processing and analysis +- Soil Science R Packages + - [aqp](https://github.com/ncss-tech/aqp) - visualization, aggregation, classification + - [soilDB](https://github.com/ncss-tech/soilDB) - access to commonly used soil databases + - [soilReports](https://github.com/ncss-tech/soilReports) - handful of report templates + - [soiltexture](http://soiltexture.r-forge.r-project.org/) - textural triangles +- [Ecology](https://cran.r-project.org/web/views/Environmetrics.html) R packages + - [vegan](http://vegan.r-forge.r-project.org/) - ordination, diversity analysis, etc. + - [dismo](http://rspatial.org/sdm/) - species distribution modeling #### Soil Science Applications ##### Create Maps -![](static-figures/ssurgo_timeline.png){ width=80% } - +![](static-figures/ssurgo_timeline.png){width="80%"} ##### Draw Soil Profiles @@ -164,10 +157,9 @@ hzdesgnname(loafercreek) <- "genhz" lc <- trunc(loafercreek[5:10,], 0, 115) par(mar = c(0, 0, 0, 0)) -plotSPC(lc, plot.depth.axis = FALSE, hz.depths = TRUE, name.style = 'center-center', hz.depths.offset = 0.08, fixLabelCollisions = TRUE, cex.names = 0.75, label = 'pedon_id') +plotSPC(lc, depth.axis = FALSE, hz.depths = TRUE, name.style = 'center-center', hz.depths.offset = 0.08, fixLabelCollisions = TRUE, cex.names = 0.75, label = 'upedonid') ``` - ##### Draw Depth Plots ```{r, purl=FALSE, echo=FALSE, eval=TRUE, fig.dim=c(10, 5)} @@ -198,7 +190,6 @@ ggplot(s, aes(x = top, y = p50)) + ``` - ##### Estimate the Range in Characteristics (RIC) ```{r, purl=FALSE, echo=FALSE, eval=TRUE} @@ -255,60 +246,57 @@ rownames(test) <- NULL knitr::kable(test[1:8,]) ``` - ## RStudio: An Integrated Development Environment (IDE) for R RStudio is an integrated development environment (IDE) that allows you to interact with R more readily. RStudio is similar to the standard RGui, but is considerably more user friendly. It has more drop-down menus, windows with multiple tabs, and many customization options. The first time you open RStudio, you will see three windows. A forth window is hidden by default, but can be opened by clicking the **File** drop-down menu, then **New File,** and then **R Script**. Detailed information on using RStudio can be found at at [RStudio's Website](https://support.rstudio.com/hc/en-us/sections/200107586-Using-RStudio). - -![](static-figures/ch1_rstudio2.png) +![](static-figures/ch1_rstudio2.png) +| RStudio Windows / Tabs | Location | Description | +|------------------------|-------------|--------------------------------------------------------------| +| Console Window | lower-left | location were commands are entered and the output is printed | +| Source Tabs | upper-left | built-in text editor | +| Environment Tab | upper-right | interactive list of loaded R objects | +| History Tab | upper-right | list of key strokes entered into the Console | +| Files Tab | lower-right | file explorer to navigate C drive folders | +| Plots Tab | lower-right | output location for plots | +| Packages Tab | lower-right | list of installed packages | +| Help Tab | lower-right | output location for help commands and help search window | +| Viewer Tab | lower-right | advanced tab for local web content | -RStudio Windows / Tabs | Location | Description | -------------------------|-------------|----------------------------------------------| -Console Window | lower-left | location were commands are entered and the output is printed | -Source Tabs | upper-left | built-in text editor | -Environment Tab | upper-right | interactive list of loaded R objects | -History Tab | upper-right | list of key strokes entered into the Console | -Files Tab | lower-right | file explorer to navigate C drive folders | -Plots Tab | lower-right | output location for plots | -Packages Tab | lower-right | list of installed packages | -Help Tab | lower-right | output location for help commands and help search window | -Viewer Tab | lower-right | advanced tab for local web content | + + + -## Rcmdr (R Commander): A Graphical User Interface for R + -While we recommend the use of RStudio for some of the reasons listed above, many people new to R (or infrequent users) might benefit from a graphical user interface (GUI) that allows the user to run basic functions using a point and click interface. + -Luckily for beginners R has the R Commander (Rcmdr) GUI, which is similar to [JMP](https://www.jmp.com/en_us/learning-library/using-jmp.html). Rcmdr was created by [John Fox](http://socserv.socsci.mcmaster.ca/jfox/Misc/Rcmdr/index.html) for his introductory statistics students so they could see how the software worked without learning a large number of function names and arguments. Rcmdr is a great way to begin familiarizing yourself with R and statistics within a GUI environment. + -Regrettably, we know of no GUI that allows users to perform the majority of soil survey applications demonstrated in this course, and thus Rcmdr will not be covered. For those who wish to pursue Rcmdr, alternative instructions can be viewed at [Andy Chang & G. Jay Kerns website](http://gchang.people.ysu.edu/r/R_Instructions.htm). + -To take a quick peek at Rcmdr, it can be opened by **entering** the following command into the R console. + -```{r, purl=FALSE, echo=TRUE, eval=FALSE} -install.packages("Rcmdr") -library(Rcmdr) -``` + -![](static-figures/ch1_rcmdr.png) + +## R basics -## R basics +- R is command-line driven. It requires you to type or copy-and-paste commands after a command prompt (\>) that appears when you open R. This is called the "Read-Eval-Print-Loop" or REPL. After typing a command in the R console and pressing **Enter** on your keyboard, the command will run. -- R is command-line driven. It requires you to type or copy-and-paste commands after a command prompt (>) that appears when you open R. This is called the "Read-Eval-Print-Loop" or REPL. After typing a command in the R console and pressing **Enter** on your keyboard, the command will run. +- If your command is not complete, R issues a continuation prompt (signified by a plus sign: `+`). -- If your command is not complete, R issues a continuation prompt (signified by a plus sign: `+`). +- R is case sensitive. Make sure your spelling and capitalization are correct. -- R is case sensitive. Make sure your spelling and capitalization are correct. - -- Commands in R are also called functions. The basic format of a function in R is: `object <- function.name(argument_1 = data, argument_2 = TRUE)`. +- Commands in R are also called functions. The basic format of a function in R is: `object <- function.name(argument_1 = data, argument_2 = TRUE)`. -- The up arrow (^) on your keyboard can be used to bring up previous commands that you've typed in the R console. +- The up arrow (\^) on your keyboard can be used to bring up previous commands that you've typed in the R console. -- Comments in R code need to start with the `#` symbol (a.k.a. hash-tag, comment, pound, or number symbol). R ignores the remainder of the script line following `#`. +- Comments in R code need to start with the `#` symbol (a.k.a. hash-tag, comment, pound, or number symbol). R ignores the remainder of the script line following `#`. ```{r, eval=FALSE} # Math @@ -346,15 +334,13 @@ subgroup Packages are collections of additional functions that can be loaded on demand. They commonly include example data that can be used to demonstrate those functions. Although R comes with many common statistical functions and models, most of our work requires additional packages. - - ### Installing Packages -To use a package, you must first install it and then load it. These steps can be done at the command line or using the Packages Tab. Examples of both approaches are provided below. R packages only need to be installed once (until R is upgraded or re-installed). Every time you start a new R session, however, you need to load every package that you intend to use in that session. +To use a package, you must first install it and then load it. These steps can be done at the command line or using the Packages Tab. Examples of both approaches are provided below. R packages only need to be installed once (until R is upgraded or re-installed). Every time you start a new R session, however, you need to load every package that you intend to use in that session. -Within the **Packages** tab you will see a list of all the packages currently installed on your computer, and 2 buttons labeled either "Install" or "Update". To install a new package simply select the **Install** button. You can enter install one or more than one packages at a time by simply separating them with a comma. +Within the **Packages** tab you will see a list of all the packages currently installed on your computer, and 2 buttons labeled either "Install" or "Update". To install a new package simply select the **Install** button. You can enter install one or more than one packages at a time by simply separating them with a comma. -![](static-figures/ch1_package_window.png) +![](static-figures/ch1_package_window.png) ![](static-figures/ch1_install_example.png) @@ -366,9 +352,9 @@ library() # or installed.packages() -``` +``` -One useful package for soil scientists is the `soiltexture` package. It allows you to plot soil textural triangles. The following command shows how to install this package if you do not currently have it downloaded: +One useful package for soil scientists is the `soiltexture` package. It allows you to plot soil textural triangles. The following command shows how to install this package if you do not currently have it downloaded: ```{r} # CRAN (static version) @@ -376,39 +362,35 @@ install.packages(c("soiltexture")) # GitHub (development version) remotes::install_github("julienmoeys/soiltexture/pkg/soiltexture", dependencies = FALSE, upgrade = FALSE, build = FALSE) -``` - - +``` ### Loading Packages -Once a package is installed, it must be loaded into the R session to be used. -This can be done by using `library()`. The package name does not need to be quoted. +Once a package is installed, it must be loaded into the R session to be used. This can be done by using `library()`. The package name does not need to be quoted. ```{r, eval=TRUE} library(soilDB) ``` -![](static-figures/ch1_load_packages.png){ width=60% } +![](static-figures/ch1_load_packages.png){width="60%"} - -You can also load packages using the **Packages Tab**, by **checking** the box next to the package name. For example, documentation for the soilDB package is available from the `help()` function. +You can also load packages using the **Packages Tab**, by **checking** the box next to the package name. For example, documentation for the soilDB package is available from the `help()` function. ```{r} help(package = "soilDB") -``` +``` -## Getting Help +## Getting Help -R has [extensive documentation](https://cran.r-project.org/manuals.html), numerous [mailing lists](https://www.r-project.org/mail.html), and [countless books](https://www.r-project.org/doc/bib/R-books.html) (many of which are free and listed at end of each chapter for this course). +R has [extensive documentation](https://cran.r-project.org/manuals.html), numerous [mailing lists](https://www.r-project.org/mail.html), and [countless books](https://www.r-project.org/doc/bib/R-books.html) (many of which are free and listed at end of each chapter for this course). To learn more about the function you are using and the options and arguments available, learn to help yourself by taking advantage of some of the following help functions in RStudio: -1. Use the Help tab in the lower-right Window to search commands (such as hist) or topics (such as histogram). +1. Use the Help tab in the lower-right Window to search commands (such as hist) or topics (such as histogram). ![](static-figures/ch1_help_tab.png) -2. Type `help(read.csv) or ?read.csv` in the Console window to bring up a help page. Results will appear in the Help tab in the lower right-hand window. Certain functions may require quotations, such as `help("+")`. +2. Type `help(read.csv) or ?read.csv` in the Console window to bring up a help page. Results will appear in the Help tab in the lower right-hand window. Certain functions may require quotations, such as `help("+")`. ```{r, eval=FALSE} # Help file for a function @@ -418,44 +400,43 @@ help(read.csv) # or ?read.csv help(package = "soiltexture") ``` +## Documenting your work -## Documenting your work +RStudio's Source Tabs serve as a built-in text editor. Prior to executing R functions at the Console, commands are typically written down (or scripted). Scripting is essentially showing your work. -RStudio's Source Tabs serve as a built-in text editor. Prior to executing R functions at the Console, commands are typically written down (or scripted). Scripting is essentially showing your work. - -The sequence of functions necessary to complete a task are scripted in order to document or automate a task. +The sequence of functions necessary to complete a task are scripted in order to document or automate a task. While scripting may seems cumbersome at first, it ultimately saves time in the long run, particularly for repetitive tasks ([humorous YouTube Video on Scripting](https://www.youtube.com/watch?v=s3JldKoA0zw)). Benefits include: -- allows others to reproduce your work, which is the foundation of science -- serves as instruction/reminder on how to perform a task -- allows rapid iteration, which saves time and allows the evaluation of incremental changes -- reduces the chance of human error +- allows others to reproduce your work, which is the foundation of science +- serves as instruction/reminder on how to perform a task +- allows rapid iteration, which saves time and allows the evaluation of incremental changes +- reduces the chance of human error ### Basic Tips for Scripting -To write a script, simply open a new R script file by clicking **File>New File>R Script**. Within the text editor **type** out a sequence of functions. +To write a script, simply open a new R script file by clicking **File\>New File\>R Script**. Within the text editor **type** out a sequence of functions. - - Place each function (e.g. `read.csv()`) on a separate line. - - If a function has a long list of arguments, place each argument on a separate line. - - A command can be excuted from the text editor by placing the cursor on a line and typing **Crtl + Enter**, or by **clicking** the Run button. - - An entire R script file can be excuted by **clicking** the Source button. +- Place each function (e.g. `read.csv()`) on a separate line. +- If a function has a long list of arguments, place each argument on a separate line. +- A command can be excuted from the text editor by placing the cursor on a line and typing **Crtl + Enter**, or by **clicking** the Run button. +- An entire R script file can be excuted by **clicking** the Source button. ![](static-figures/ch1_text_editor.png) ### Comments -It is a good idea to include comments in your code, so that in the future both yourself and others can understand what you were doing. Each line with a comment starts with `#`. +It is a good idea to include comments in your code, so that in the future both yourself and others can understand what you were doing. Each line with a comment starts with `#`. -In RStudio, you can use `#` comments to create an "outline" for your source documents. Multiple `#` signs increase the depth of the hierarchy. Ending a comment line with four hyphens (`----`) indicates that text should be included in the outline. The source file outline using comments in regular _.R_ source files is analogous to the [Markdown](https://www.markdownguide.org/basic-syntax/) syntax used in R Markdown and Quarto for headers. +In RStudio, you can use `#` comments to create an "outline" for your source documents. Multiple `#` signs increase the depth of the hierarchy. Ending a comment line with four hyphens (`----`) indicates that text should be included in the outline. The source file outline using comments in regular *.R* source files is analogous to the [Markdown](https://www.markdownguide.org/basic-syntax/) syntax used in R Markdown and Quarto for headers. -For example, the following code block creates two outline sections, each with a nested subsection. +For example, the following code block creates two outline sections, each with a nested subsection. To show the outline view, click the "outline" button in the top-right hand corner of the source window. Paste it in a fresh R document to try it out. -```r +``` r # one ---- print("Section 1") @@ -472,40 +453,37 @@ print("Subsection 2.1) ## Organizing your work -When you first begin a project you should create a new folder and place within it all the data and code associated with the project. This simplifies the process of accessing your files from R. Using a project folder is also a good habit because it makes it easier to pickup where you left off and find your data if you need to come back to it later. Within R, your project folder is also known as your working directory. This directory will be the default location your plots and other R output are saved. +When you first begin a project you should create a new folder and place within it all the data and code associated with the project. This simplifies the process of accessing your files from R. Using a project folder is also a good habit because it makes it easier to pickup where you left off and find your data if you need to come back to it later. Within R, your project folder is also known as your working directory. This directory will be the default location your plots and other R output are saved. -You want to have inputs for your code in the working directory so that you can refer to them using [relative file paths](https://excelquick.com/r-programming/importing-data-absolute-and-relative-file-paths-in-r/). Relative file paths make it easier if you move the folder containing your script(s) around. Or, if you share it with someone else, they will have little issue getting your code to work on their own file system. +You want to have inputs for your code in the working directory so that you can refer to them using [relative file paths](https://excelquick.com/r-programming/importing-data-absolute-and-relative-file-paths-in-r/). Relative file paths make it easier if you move the folder containing your script(s) around. Or, if you share it with someone else, they will have little issue getting your code to work on their own file system. ### Setting the Working Directory Before you begin working in R, you should set your working directory to your project folder; for example, `setwd("C:\\workspace2\\projectx...")`. You can use RStudio to manage your projects and folders. -**NOTE:** _Beware when specifying any file paths_ that **R** uses forward slashes `/` instead of back slashes `\`. Back slashes are reserved for use as an [escape character](https://en.wikipedia.org/wiki/Escape_character), so you must use two of them to get one in result character string. +**NOTE:** *Beware when specifying any file paths* that **R** uses forward slashes `/` instead of back slashes `\`. Back slashes are reserved for use as an [escape character](https://en.wikipedia.org/wiki/Escape_character), so you must use two of them to get one in result character string. -To change the working directory in RStudio, select main menu **Session >> Set Working Directory >> ...**. Or, from the "Files" tab click **More >> Set As Working Directory** to use the _current location of the "Files" tab_ as your working directory. +To change the working directory in RStudio, select main menu **Session \>\> Set Working Directory \>\> ...**. Or, from the "Files" tab click **More \>\> Set As Working Directory** to use the *current location of the "Files" tab* as your working directory. ![](static-figures/ch1_setwd.png) - Setting the working directory can also be done via the Console with the `setwd()` command: ```{r} setwd("C:/workspace2") ``` - To check the file path of the current working directory (which should now be `"C:\\workspace2"`), type: ```{r} getwd() ``` - ### RStudio Projects (.Rproj files) -You can also manage your working directory using RStudio Projects. An RStudio Project file (_.Rproj_) is analogous to, for example, a _.mxd_ file for ArcMap. It contains information about the specific settings you may have set for a "project". - -You open or create projects using the drop down menu in the top right-hand corner of the RStudio window (_shown below_) +You can also manage your working directory using RStudio Projects. An RStudio Project file (*.Rproj*) is analogous to, for example, a *.mxd* file for ArcMap. It contains information about the specific settings you may have set for a "project". + +You open or create projects using the drop down menu in the top right-hand corner of the RStudio window (*shown below*) ![RStudio Project Menu](static-figures/rstudio_projectdropdown.png) @@ -513,13 +491,13 @@ Here is what a typical Project drop-down menu looks like: ![RStudio Project Menu (expanded)](static-figures/rstudio_projectdropdown2.png) - * You can create new projects from existing or new directories with "New Project...". - - * When you click "Open Project...", your working directory is _automatically set to the .Rproj file's location_ -- this is _extremely_ handy +- You can create new projects from existing or new directories with "New Project...". - * Any projects you have created/used recently will show up in the "Project List" +- When you click "Open Project...", your working directory is *automatically set to the .Rproj file's location* -- this is *extremely* handy +- Any projects you have created/used recently will show up in the "Project List" +```{=html} +``` +## Saving your work +In R, you can save several types of files to keep track of the work you do. The file types include: workspace, script, history, and graphics. It is important to save often because R, like any other software, may crash periodically. -## Saving your work - -In R, you can save several types of files to keep track of the work you do. The file types include: workspace, script, history, and graphics. It is important to save often because R, like any other software, may crash periodically. - -Such problems are especially likely when working with large files. You can save your workspace in R via the command line or the File menu. +Such problems are especially likely when working with large files. You can save your workspace in R via the command line or the File menu. ```{r, purl=FALSE, echo=FALSE} @@ -554,31 +531,29 @@ knitr::kable(rfiles) ``` - #### R script (.R) -An R script is simply a text file of R commands that you've typed. +An R script is simply a text file of R commands that you've typed. -You may want to save your scripts (whether they were written in R Editor or another program such as Notepad) so that you can reference them in the future, edit them as needed, and keep track of what you've done. +You may want to save your scripts (whether they were written in R Editor or another program such as Notepad) so that you can reference them in the future, edit them as needed, and keep track of what you've done. -To save R scripts in RStudio, simply **click the save button** from your R script tab. Save scripts with the .R extension. +To save R scripts in RStudio, simply **click the save button** from your R script tab. Save scripts with the .R extension. -R assumes that script files are saved with only that extension. If you are using another text editor, you won't need to worry about saving your scripts in R. You can open text files in the RStudio text editor, but beware copying and pasting from Word files as discussed below. +R assumes that script files are saved with only that extension. If you are using another text editor, you won't need to worry about saving your scripts in R. You can open text files in the RStudio text editor, but beware copying and pasting from Word files as discussed below. -![](static-figures/ch1_save_script.png) +![](static-figures/ch1_save_script.png) -To open an R script, **click the file icon**. +To open an R script, **click the file icon**. ![](static-figures/ch1_file_icon.png) - #### Microsoft Word Files Using Microsoft Word to write or save R scripts is generally a bad idea. -Certain keyboard characters, such as quotations "", are not stored the same in Word (e.g. they are "left" and "right" handed). The difference is hard to distinguish, but will not run in R. +Certain keyboard characters, such as quotations "", are not stored the same in Word (e.g. they are "left" and "right" handed). The difference is hard to distinguish, but will not run in R. -Also, pasting your R code or output into Wword documents manually is not reproducible, so while it may work in a pinch, it ultimately costs you time. +Also, pasting your R code or output into Wword documents manually is not reproducible, so while it may work in a pinch, it ultimately costs you time. You can use the `word_document` Rmarkdown template to automatically "Knit" `.docx` files from R code using a template, which is very handy for quickly getting a nice looking document! @@ -588,6 +563,7 @@ R Markdown (.Rmd) documents contain information for the reproducible combination This document is made in bookdown, a variant of rmarkdown used for book templates involving multiple chapters. You can make blogs and websites for your R packages with blogdown and pkgdown. These are all tools based off of the powerful "pandoc" engine and the tools in the R Markdown ecosystem. +````{=html} +```` -#### R history (.Rhistory) +#### R history (.Rhistory) An R history file is a copy of all your key strokes. You can think of it as brute force way of saving your work. It can be useful if you didn't document all your steps in an R script file. -Like an R file, an Rhistory file is simply a text file that lists all of the commands that you've executed. It does not keep a record of the results. +Like an R file, an Rhistory file is simply a text file that lists all of the commands that you've executed. It does not keep a record of the results. To load or save your R history from the History Tab click the **Open File** or **Save** button. If you load an Rhistory file, your previous commands will again become available with the up-arrow and down-arrow keys. -![](static-figures/ch1_save_history.png) +![](static-figures/ch1_save_history.png) -You can also use the command line to load or save your history. +You can also use the command line to load or save your history. ```{r, eval=FALSE} savehistory(file = "sand.Rhistory") @@ -627,24 +604,31 @@ loadhistory(file = "sand.Rhistory") history(max.show=Inf) #displays all previous commands ``` -#### R Graphics +#### R Graphics + +Graphic outputs can be saved in various formats. -Graphic outputs can be saved in various formats. - ```{r, purl=FALSE, echo=FALSE, eval=TRUE} library(knitr) test <- data.frame( Format = c("pdf", "window metafile", "png", "jpeg", "bmp", "postscript"), - Function = c('pdf("graphic.pdf")', 'win.metafile("graphic.wmf")', 'png("graph.png")', 'jpeg("graph.jpg")', 'bmp("graph.bmp")', 'postscript("graph.ps")') + Function = c( + 'pdf("graphic.pdf")', + 'win.metafile("graphic.wmf")', + 'png("graph.png")', + 'jpeg("graph.jpg")', + 'bmp("graph.bmp")', + 'postscript("graph.ps")' ) +) kable(test) ``` -To save a graphic: (1) Click the **Plots Tab** window, (2) click the **Export** button, (3) **Choose** your desired format, (3) **Modify** the export settings as you desire, and (4) click **Save**. +To save a graphic: (1) Click the **Plots Tab** window, (2) click the **Export** button, (3) **Choose** your desired format, (3) **Modify** the export settings as you desire, and (4) click **Save**. -![](static-figures/ch1_save_plot.png) +![](static-figures/ch1_save_plot.png) -The R command for saving a graphic is: +The R command for saving a graphic is: ```{r} png(file = "npk_yield.png") @@ -652,75 +636,60 @@ plot(npk$yield) dev.off() ``` -The first line of this command creates a blank file named sand with a JPEG extension. The second line plots the data object that you want to create a graphic of (here it is conveniently the same name as the JPEG file we are creating). The third line closes the graphics device. - +The first line of this command creates a blank file named sand with a JPEG extension. The second line plots the data object that you want to create a graphic of (here it is conveniently the same name as the JPEG file we are creating). The third line closes the graphics device. ## Exercise 1 Using the examples discussed thus far as a guide, demonstrate your mastery of the material by performing the following tasks. -1. Create an R script file, demonstrate 3 basic R functions, and comment (`#`) your code. -2. Install the FedData R package from CRAN and [GitHub](https://github.com/ropensci/FedData). Save the commands in your R script file. -3. Load the FedData R package and read the help file for the `get_ssurgo` function within the FedData package. What is the 1st input/argument? Save the R command in your R script. -4. Save your R script, and forward to your instructor. - - +1. Create an R script file, demonstrate 3 basic R functions, and comment (`#`) your code. +2. Install the FedData R package from CRAN and [GitHub](https://github.com/ropensci/FedData). Save the commands in your R script file. +3. Load the FedData R package and read the help file for the `get_ssurgo` function within the FedData package. What is the 1st input/argument? Save the R command in your R script. +4. Save your R script, and forward to your instructor. ## Loading Data -R can load a variety of data formats, however tabular data is by far the most common, and what we will spend of the majority of our time working with. Typically tabular data is stored in spreadsheets (e.g. .txt, .csv, .xlsx), databases (e.g. NASIS), or webpages (.html). Within R tabular data is stored as a `data.frame`. - - +R can load a variety of data formats, however tabular data is by far the most common, and what we will spend of the majority of our time working with. Typically tabular data is stored in spreadsheets (e.g. .txt, .csv, .xlsx), databases (e.g. NASIS), or webpages (.html). Within R tabular data is stored as a `data.frame`. #### Text files Text files are a preferable format for storing and transferring small datasets. One basic command for importing text files into R is `read.csv()`. The command is followed by the file name or URL and then some optional instructions for how to read the file. -These files can either be imported into R by clicking the **Import Dataset >> From Text** buttons from the Environment tab, or by typing the following command into the R console: +These files can either be imported into R by clicking the **Import Dataset \>\> From Text** buttons from the Environment tab, or by typing the following command into the R console: ```{r, eval=FALSE} - # from working directory sand <- read.csv("C:/workspace2/sand_example.csv") - ``` ```{r, eval=TRUE} # from URL sand <- read.csv("https://raw.githubusercontent.com/ncss-tech/stats_for_soil_survey/master/data/sand_example.csv") - ``` -![](static-figures/sand_readcsv.png) - - +![](static-figures/sand_readcsv.png) #### Excel files -R can import Excel files, but generally speaking it is a bad idea to use Excel. Excel has a dangerous default which automatically converts data with common notations to their standard format without warning or notice. For example, the character "11-JUN" entered into a cell automatically becomes the date 6/11/2021, even though the data is still displayed as 11-JUN. The only way to avoid this default behavior is to manually import your data into Excel via the **Data Tab>Get External Data Ribbon**, and manually set the data type of all your columns to text. Failure to do so has resulted in numerous retracted research articles ([Washington Post Article](https://www.washingtonpost.com/news/wonk/wp/2016/08/26/an-alarming-number-of-scientific-papers-contain-excel-errors/)). Warnings aside, Excel files are a very common and are a format most people are familiar with. Therefore we will illustrate how to bring them into R. +R can import Excel files, but generally speaking it is a bad idea to use Excel. Excel has a dangerous default which automatically converts data with common notations to their standard format without warning or notice. For example, the character "11-JUN" entered into a cell automatically becomes the date 6/11/2021, even though the data is still displayed as 11-JUN. The only way to avoid this default behavior is to manually import your data into Excel via the **Data Tab\>Get External Data Ribbon**, and manually set the data type of all your columns to text. Failure to do so has resulted in numerous retracted research articles ([Washington Post Article](https://www.washingtonpost.com/news/wonk/wp/2016/08/26/an-alarming-number-of-scientific-papers-contain-excel-errors/)). Warnings aside, Excel files are a very common and are a format most people are familiar with. Therefore we will illustrate how to bring them into R. -Download the sand Excel dataset from GitHub at [https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/Pre-course/R_sand/sand_example.xlsx](https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/Pre-course/R_sand/sand_example.xlsx) +Download the sand Excel dataset from GitHub at -Excel datasets can either be imported into R by clicking the **Import Dataset >> From Excel** buttons from the Environment tab, or by typing the following command into the R console: +Excel datasets can either be imported into R by clicking the **Import Dataset \>\> From Excel** buttons from the Environment tab, or by typing the following command into the R console: ```{r} - library(readxl) sand_example <- read_excel("sand_example.xlsx") - ``` -![](static-figures/sand_readxl.png) - - +![](static-figures/sand_readxl.png) #### NASIS (Web) Reports NASIS provides a plethora of reports, many of which can be read into R for analysis. The `soilDB` R package provides a series of functions to read data from NASIS either using a local database connection or via HTML web reports. Similar functions also exist for accessing tabular data from Soil Data Access. More details on `soilDB` will be provided in the next chapter, but now we'll illustrate how to access some example datasets for manipulating tabular data. ```{r, eval=FALSE} - library(soilDB) # get projects @@ -731,43 +700,38 @@ leg <- get_legend_from_NASISWebReport(mlraoffice = "Indi%", areasymbol = "%") # get map units mu <- get_mapunit_from_NASISWebReport(areasymbol = c("IN001", "IN11%")) - - ``` - - ## Data manipulation -Before we can do any sort of analysis, analysis, our data often needs to be manipulated one way or another. Estimates vary, but an analyst typically spend 80% of their time manipulating data, and only 20% actually analyzing or modeling. Tasks generally involve filtering, transforming, merging, aggregating, and reshaping data. +Before we can do any sort of analysis, analysis, our data often needs to be manipulated one way or another. Estimates vary, but an analyst typically spend 80% of their time manipulating data, and only 20% actually analyzing or modeling. Tasks generally involve filtering, transforming, merging, aggregating, and reshaping data. R has many functions and packages for manipulating data frames, but within the past several years a family of packages, known as the `tidyverse`, have been developed to simplify interacting with data frames (or tibbles). Within the `tidyverse` the most commonly used packages are `dplyr` and `tidyr`. Many of the tidyverse function names are patterned after SQL syntax. +```{=html} - +``` We will review the most common functions you need to know in order to accomplish the majority of data manipulation tasks. - ### Viewing and Removing Data Once a file is imported, it is imperative that you check to ensure that R correctly imported your data. Make sure numerical data are correctly imported as numerical, that your column headings are preserved, etc. To view the data simply **click** on the mu dataset listed in the Environment tab. This will open up a separate window that displays a spreadsheet like view. ![](static-figures/ch1_view_dataframe.png) - Additionally you can use the following functions to view your data in R. -Function | Description | -----------|-----------------------------------------------------| -`print()` | prints the entire object (avoid with large tables) | -`head()` | prints the first 6 lines of your data | -`str()` | shows the data structure of an R object | -`names()` | lists the column names (i.e., headers) of your data | -`ls()` | lists all the R objects in your workspace directory | +| Function | Description | +|-----------|-----------------------------------------------------| +| `print()` | prints the entire object (avoid with large tables) | +| `head()` | prints the first 6 lines of your data | +| `str()` | shows the data structure of an R object | +| `names()` | lists the column names (i.e., headers) of your data | +| `ls()` | lists all the R objects in your workspace directory | -Try entering the following commands to view the `mu` dataset in R: +Try entering the following commands to view the `mu` dataset in R: ```{r} str(mu) @@ -777,7 +741,7 @@ names(mu) head(mu) ls() -``` +``` A data object is anything you've created or imported and assigned a name to in R. The Environment tab allows you to see what data objects are in your R session and expand their structure. Right now sand should be the only data object listed. If you wanted to delete all data objects from your R session, you could **click the broom icon** from the Environments tab. Otherwise you could type: @@ -791,50 +755,44 @@ rm(mu, leg, sand) ![](static-figures/ch1_clear_workspace.png) - - ### Filtering or Subsetting Data When analyzing data in NASIS, filtering is typically accomplished by loading your selected set with only the records you're interested in. However, it is often useful or necessary to subset your data after it's loaded. This can allow you to isolate interesting records within large datasets. For these reasons R has numerous options/functions for filtering data. Data frames can be filtered by both columns and rows, using either **names**, **position** (e.g. column 1, row 5), or **logical** indices (e.g. `TRUE`/`FALSE`). Another particularly useful feature is the use of **pattern matching** which uses regular expressions to select data, which is similar to the `LIKE` statement from SQL. -**Filtering with names and numerical indices +\*\*Filtering with names and numerical indices ```{r} - # Filtering with names mu$areasymbol # select column names using $ mu[, c("areasymbol", "musym")] # select column names using [] mu[c("1", "2"), ] # select row names using [] mu[c("1", "2"), c("areasymbol", "musym")] # select column and row names using [] - # Filtering by position mu[1, ] # select first row mu[, 1] # select first column mu[2, 2] # select second row and second column mu[c(1, 2, 3), ] # select multiple rows mu[c(-1, -2), ] # drop multiple rows - - ``` **Logical Operators** - - `==` R uses a double equal sign as "equal-to" in SQL - - - `!=` "Not-equal-to" - - - `<, >, <=, >=` Less than, greater than, less than or equal to, and greater than or equal - - - `&` Equivalent to `AND` in SQL and Soil Taxonomy, must match both conditions - - - `|` Equivalent to `OR` in SQL and Soil Taxonomy, must match at least one condition - - - `%in%` Equivalent to `IN ()` in SQL (e.g. `mu$areasymbol %in% c("IN001", "IN111"`) - - - `grepl()` equivalent to `LIKE` in SQL (e.g. `grepl("IN%", mu$areasymbol)`) +- `==` R uses a double equal sign as "equal-to" in SQL + +- `!=` "Not-equal-to" + +- `<, >, <=, >=` Less than, greater than, less than or equal to, and greater than or equal + +- `&` Equivalent to `AND` in SQL and Soil Taxonomy, must match both conditions + +- `|` Equivalent to `OR` in SQL and Soil Taxonomy, must match at least one condition + +- `%in%` Equivalent to `IN ()` in SQL (e.g. `mu$areasymbol %in% c("IN001", "IN111"`) + +- `grepl()` equivalent to `LIKE` in SQL (e.g. `grepl("IN%", mu$areasymbol)`) **Filtering with logicals** @@ -848,7 +806,6 @@ mu[, names(mu) == "areasymbol"] # select columns that equal areasy mu[, names(mu) %in% c("areasymbol", "musym")] # select columns that match areasymbol and musym mu[grepl("Miami", mu$muname), ] # select rows that contain Miami - # Non-standard evaluation with tidyverse library(dplyr) @@ -865,48 +822,41 @@ select(mu, areasymbol, musym) # Slice rows slice(mu, 1:5) - ``` - - ### Transforming Data This allows you to create new columns by convert, compute, or combine data within existing columns. ```{r} - -mu <- mutate(mu, - # convert to hectares - muhectares = muacres * 0.4047, - # convert muname to TRUE or FALSE if Miami is present using pattern matching - miami = grepl("Miami", muname), - # compute % minor component - n_minor = n_component - n_majcompflag, - # combine columns - key = paste(areasymbol, musym) - ) - +mu <- mutate( + mu, + # convert to hectares + muhectares = muacres * 0.4047, + # convert muname to TRUE or FALSE if Miami is present using pattern matching + miami = grepl("Miami", muname), + # compute % minor component + n_minor = n_component - n_majcompflag, + # combine columns + key = paste(areasymbol, musym) +) ``` - ### Sorting Data Sorting allows you to rearrange your data. Beware R has several similar functions (e.g. `sort` and `order`) for sorting data only work with specific datatypes. The tidyverse function `arrange` is designed to work with data frames. ```{r} - # sort ascending arrange(mu, areasymbol, muname) # sort descending arrange(mu, desc(areasymbol), desc(muname)) - ``` ### Piping Data -Another particularly useful feature provided by the `magrittr` package and used in the `tidyverse` is the use of **pipe** (`%>%`). Base R also has a native pipe operator (`|>`). Using the RStudio keyboard shortcut Ctrl + Shift + M inserts the pipe you have selected as default in Global Options > Code. +Another particularly useful feature provided by the `magrittr` package and used in the `tidyverse` is the use of **pipe** (`%>%`). Base R also has a native pipe operator (`|>`). Using the RStudio keyboard shortcut Ctrl + Shift + M inserts the pipe you have selected as default in Global Options \> Code. `f(x,y)` becomes `x %>% f(y)` @@ -924,19 +874,15 @@ mu_sub <- mutate(filter(mu, areasymbol == "IN001"), pct_100less = pct_component mu_sub <- mu %>% filter(areasymbol == "IN001") %>% mutate(pct_100less = pct_component < 100) - ``` - - ### Merging/Joining or Combining Data -** Joining** +**Joining** -When working with tabular data you often have 2 or more tables you need to join. There are several ways to join tables. Which direction to join and which columns to join will determine how you achieve the join. +When working with tabular data you often have 2 or more tables you need to join. There are several ways to join tables. Which direction to join and which columns to join will determine how you achieve the join. ```{r joining} - # inner join leg_mu <- inner_join(leg, mu, by = c("liid", "areasymbol")) @@ -945,11 +891,9 @@ leg_mu <- left_join(leg, mu, by = c("liid")) # right_join leg_mu <- right_join(leg, mu, by = "liid") - ``` - -** Combining** +**Combining** If your tables have the same structure (e.g. columns), or length and order you may simply combine them. For example, if you have two different mapunit tables. @@ -967,14 +911,12 @@ cbind(mu, leg) # won't work ``` - ### Aggregating or Grouping Data Because soil data has multiple dimensions (e.g. properties and depths) and levels of organization (e.g. many to one relationships), it is often necessary to aggregate it. For example, when we wish to make a map we often need to aggregate over components and then map units. Depending on the data type this aggregation may involve taking a weighted average or selecting the dominant condition. The `group_by` function defines the groups over which we wish to `summarize` the data. - ```{r aggregating} mu_agg <- mu %>% @@ -985,14 +927,11 @@ mu_agg <- mu %>% ``` - - ### Reshaping Data Typically data is stored in what is known as a **wide** format, where each column contains a different variable (e.g. depth, clay, sand, rocks). However, sometimes it is necessary to reshape or pivot to a **long** format, where each variable/column is compressed into 2 new rows. One new column contains the old column names, while another new column contains the values from the old columns. This is particularly useful when combining multiple variables into a single plot. ```{r} - library(tidyr) # Simplify mu example dataset @@ -1008,97 +947,85 @@ print(mu2_long) # Pivot wide mu2_wide <- pivot_wider(mu2_long, names_from = name) print(mu2_wide) - ``` - - ### Exporting Data -To export data from R, use the command `write.csv()` or `write.dbf()` functions. Since we have already set our working directory, R automatically saves our file into the working directory. +To export data from R, use the command `write.csv()` or `write.dbf()` functions. Since we have already set our working directory, R automatically saves our file into the working directory. ```{r} - write.csv(mu_agg, file = "mu_agg.csv") library(foreign) write.dbf(as.data.frame(mu_agg), file = "mu_agg.dbf") - -``` +``` ## Exercise 2 -1. Create a new R script file. +1. Create a new R script file. -2. To get information from the NASIS legend table for the state of Wisconsin use the soilDB function `get_legend_from_NASISWebReport()` for `mlraoffice = "%"` and `areasymbol = "WI%"` +2. To get information from the NASIS legend table for the state of Wisconsin use the soilDB function `get_legend_from_NASISWebReport()` for `mlraoffice = "%"` and `areasymbol = "WI%"` -3. Filter the legend table for rows where the `ssastatus == "Out-of-date"` to find the soil survey areas that need update. Inspect the result to find the `areasymbol` values. +3. Filter the legend table for rows where the `ssastatus == "Out-of-date"` to find the soil survey areas that need update. Inspect the result to find the `areasymbol` values. -4. Load the mapunit table, using soilDB `get_mapunit_from_NASISWebReport()` using the area symbols you identified in step 3. +4. Load the mapunit table, using soilDB `get_mapunit_from_NASISWebReport()` using the area symbols you identified in step 3. -5. Calculate the acreage of hydric soils for each mapunit by multiplying `muacres` by `pct_hydric`. Note: `pct_hydric` is a percentage, not a proportion. +5. Calculate the acreage of hydric soils for each mapunit by multiplying `muacres` by `pct_hydric`. Note: `pct_hydric` is a percentage, not a proportion. -6. Aggregate the total acreage of hydric soils each soil survey area using dplyr functions `group_by()` and `summarize()`. +6. Aggregate the total acreage of hydric soils each soil survey area using dplyr functions `group_by()` and `summarize()`. -7. Join the aggregated mapunit table from Step 6 to the legend table from Step 3 using dplyr `left_join()`. +7. Join the aggregated mapunit table from Step 6 to the legend table from Step 3 using dplyr `left_join()`. -8. Calculate the proportion of the total soil survey area acres (`areaacres`) that are hydric soils. +8. Calculate the proportion of the total soil survey area acres (`areaacres`) that are hydric soils. -9. Answer the following questions: +9. Answer the following questions: + + - What soil survey areas need update in Wisconsin? + + - What proportion of those soil survey areas are hydric soils? + + - Bonus: How does your joined result in Step 7 differ if you replace dplyr `left_join()` with `inner_join()`? Why? - - What soil survey areas need update in Wisconsin? - - - What proportion of those soil survey areas are hydric soils? - - - Bonus: How does your joined result in Step 7 differ if you replace dplyr `left_join()` with `inner_join()`? Why? - 10. Save your R script and forward to your instructor. ## Review Given what you now know about R, try to answer the following questions: -1. Can you think of a situation where an existing hypothesis or conventional wisdom was not repeatable? - -2. What are packages? +1. Can you think of a situation where an existing hypothesis or conventional wisdom was not repeatable? -3. What is GitHub? +2. What are packages? -4. Where can you get help? +3. What is GitHub? -5. What is a data frame? - -6. What are 3 ways you can manipulate a data frame? +4. Where can you get help? +5. What is a data frame? +6. What are 3 ways you can manipulate a data frame? ## Additional Reading (Introduction) -* Introductory R Books - + [R for Data Science](https://r4ds.had.co.nz/index.html) - + [RStudio Cheatsheets](https://rstudio.com/resources/cheatsheets/) - + [Quick-R](https://www.statmethods.net/) - -* Advanced DSM R Books - + [Predictive Soil Mapping with R](https://envirometrix.github.io/PredictiveSoilMapping/) - + [Using R for Digital Soil Mapping (not free)](http://www.springer.com/us/book/9783319443256) - + [Soil Spectral Inference with R (not free)](https://github.com/AlexandreWadoux/soilspec) - + [GSP SOC Cookbook](https://fao-gsp.github.io/SOC-Mapping-Cookbook/) - + [GSP SAS Manual](https://fao-gsp.github.io/GSSmap) - -* Soil Science R Applications - + [aqp and soilDB tutorials](http://ncss-tech.github.io/AQP/) - + [ISRIC World Soil Information Example Training Courses](https://www.isric.org/utilise/capacity-building/training-courses#examplecourses) - + [ISRIC World Soil Information YouTube Channel](https://www.youtube.com/channel/UCNi1XYjdXWF9eAjvG40KqWg) - + [OpenGeoHub Courses](https://opengeohub.org/course) - + [OpenGeoHub YouTube Channel](https://www.youtube.com/channel/UC6HFFFYiV4zEYJlQMIXemWA/featured) - + [David Rossiter's Cornell Homepage](http://www.css.cornell.edu/faculty/dgr2/) - + [Pierre Roudier](https://pierreroudier.github.io/teaching/index.html) - -* Soil Sciences and Statistics Review Articles - + Arkely, R., 1976. Statistical Methods in Soil Classification Research. Advances in Agronomy 28:37-70. [https://www.sciencedirect.com/science/article/pii/S0065211308605520](https://www.sciencedirect.com/science/article/pii/S0065211308605520) - + Mausbach, M., and L. Wilding, 1991. Spatial Variability of Soils and Landforms. Soil Science Society of America, Madison. [https://dl.sciencesocieties.org/publications/books/tocs/sssaspecialpubl/spatialvariabil](https://dl.sciencesocieties.org/publications/books/tocs/sssaspecialpubl/spatialvariabil) - + Wilding, L., Smeck, N., and G. Hall, 1983. Spatial Variability and Pedology. In : L. Widling, N. Smeck, and G. Hall (Eds). Pedogenesis and Soil Taxonomy I. Conceps and Interactions. Elseiver, Amsterdam, pp. 83-116. [https://www.sciencedirect.com/science/article/pii/S0166248108705993](https://www.sciencedirect.com/science/article/pii/S0166248108705993) - - +- Introductory R Books + - [R for Data Science](https://r4ds.had.co.nz/index.html) + - [RStudio Cheatsheets](https://rstudio.com/resources/cheatsheets/) + - [Quick-R](https://www.statmethods.net/) +- Advanced DSM R Books + - [Predictive Soil Mapping with R](https://envirometrix.github.io/PredictiveSoilMapping/) + - [Using R for Digital Soil Mapping (not free)](http://www.springer.com/us/book/9783319443256) + - [Soil Spectral Inference with R (not free)](https://github.com/AlexandreWadoux/soilspec) + - [GSP SOC Cookbook](https://fao-gsp.github.io/SOC-Mapping-Cookbook/) + - [GSP SAS Manual](https://fao-gsp.github.io/GSSmap) +- Soil Science R Applications + - [aqp and soilDB tutorials](http://ncss-tech.github.io/AQP/) + - [ISRIC World Soil Information Example Training Courses](https://www.isric.org/utilise/capacity-building/training-courses#examplecourses) + - [ISRIC World Soil Information YouTube Channel](https://www.youtube.com/channel/UCNi1XYjdXWF9eAjvG40KqWg) + - [OpenGeoHub Courses](https://opengeohub.org/course) + - [OpenGeoHub YouTube Channel](https://www.youtube.com/channel/UC6HFFFYiV4zEYJlQMIXemWA/featured) + - [David Rossiter's Cornell Homepage](http://www.css.cornell.edu/faculty/dgr2/) + - [Pierre Roudier](https://pierreroudier.github.io/teaching/index.html) +- Soil Sciences and Statistics Review Articles + - Arkely, R., 1976. Statistical Methods in Soil Classification Research. Advances in Agronomy 28:37-70. + - Mausbach, M., and L. Wilding, 1991. Spatial Variability of Soils and Landforms. Soil Science Society of America, Madison. + - Wilding, L., Smeck, N., and G. Hall, 1983. Spatial Variability and Pedology. In : L. Widling, N. Smeck, and G. Hall (Eds). Pedogenesis and Soil Taxonomy I. Conceps and Interactions. Elseiver, Amsterdam, pp. 83-116. diff --git a/Part1/02-data.Rmd b/Part1/02-data.Rmd index 28d49167..15f969bb 100644 --- a/Part1/02-data.Rmd +++ b/Part1/02-data.Rmd @@ -740,30 +740,6 @@ We can use the `addmargins()` function to add the row and column sums to the mar addmargins(table(pedons$hzname, pedons$texcl)) ``` - - - - - - - - - - - - - - - - - - - - - -## Many Packages, Many Spatial Representations +## Packages for Vector Data ### The `sf` package -[Simple Features Access](https://www.ogc.org/standards/sfa) is a set of standards that specify a common storage and access model of geographic features. It is used mostly for two-dimensional geometries such as point, line, polygon, multi-point, multi-line, etc. +[Simple Features Access](https://www.ogc.org/standards/sfa) is a set of standards that specify a common storage and access model of geographic features. It is used mostly for two-dimensional geometries such as point, line, polygon, multi-point, multi-line, etc. This is one of many ways of modeling the geometry of shapes in the real world. This model happens to be widely adopted in the **R** ecosystem via the `sf` package, and very convenient for typical data encountered by soil survey operations. @@ -257,52 +254,52 @@ Most of the sf package functions start with the prefix `st_`, such as: `st_crs() #### `sf` vignettes -You can the following `sf` package vignettes for details, sample data sets and usage of `sf` objects. +You can the following `sf` package vignettes for details, sample data sets and usage of `sf` objects. -1. [Simple Features for R](https://r-spatial.github.io/sf/articles/sf1.html) +1. [Simple Features for R](https://r-spatial.github.io/sf/articles/sf1.html) -2. [Reading, Writing and Converting Simple Features](https://r-spatial.github.io/sf/articles/sf2.html) +2. [Reading, Writing and Converting Simple Features](https://r-spatial.github.io/sf/articles/sf2.html) -3. [Manipulating Simple Feature Geometries](https://r-spatial.github.io/sf/articles/sf3.html) +3. [Manipulating Simple Feature Geometries](https://r-spatial.github.io/sf/articles/sf3.html) -4. [Manipulating Simple Features](https://r-spatial.github.io/sf/articles/sf4.html) +4. [Manipulating Simple Features](https://r-spatial.github.io/sf/articles/sf4.html) -5. [Plotting Simple Features](https://r-spatial.github.io/sf/articles/sf5.html) +5. [Plotting Simple Features](https://r-spatial.github.io/sf/articles/sf5.html) -6. [Miscellaneous](https://r-spatial.github.io/sf/articles/sf6.html) +6. [Miscellaneous](https://r-spatial.github.io/sf/articles/sf6.html) -7. [Spherical geometry in sf using s2geometry](https://r-spatial.github.io/sf/articles/sf7.html) +7. [Spherical geometry in sf using s2geometry](https://r-spatial.github.io/sf/articles/sf7.html) ### The `sp` Package -The data structures ("classes") and functions provided by the [`sp`](https://cran.r-project.org/web/packages/sp/index.html) package have served a foundational role in the handling of spatial data in R for years. +The data structures ("classes") and functions provided by the [`sp`](https://cran.r-project.org/web/packages/sp/index.html) package have served a foundational role in the handling of spatial data in R for years. -Many of the following examples will reference names such as `SpatialPoints`, `SpatialPointsDataFrame`, and `SpatialPolygonsDataFrame`. These are specialized (S4) classes implemented by the `sp` package. +Many of the following examples will reference names such as `SpatialPoints`, `SpatialPointsDataFrame`, and `SpatialPolygonsDataFrame`. These are specialized (S4) classes implemented by the `sp` package. Objects of these classes maintain linkages between all of the components of spatial data. For example, a point, line, or polygon feature will typically be associated with: - -* coordinate geometry -* bounding box -* coordinate reference system -* attribute table + +- coordinate geometry +- bounding box +- coordinate reference system +- attribute table ### Converting `sp` and `sf` `sp` provides access to the same compiled code libraries (PROJ, GDAL, GEOS) through `sf` package. -For now the different package object types are interchangeable, and you may find yourself having to do this for a variety of reasons. You can convert between object types as needed using `sf::as_Spatial()` or `sf::st_as_sf()`. +For now the different package object types are interchangeable, and you may find yourself having to do this for a variety of reasons. You can convert between object types as needed using `sf::as_Spatial()` or `sf::st_as_sf()`. Check the documentation (`?functionname`) to figure out what object types different methods need as input; and check an input object's class with `class()` or `inherits()`. ### Importing / Exporting Vector Data -Import a feature class from a ESRI File Geodatabase or shape file. +Import a feature class from a ESRI File Geodatabase or shape file. -If you have a _.shp_ file, you can specify the whole path, including the file extension in the `dsn` argument, or just the folder. +If you have a *.shp* file, you can specify the whole path, including the file extension in the `dsn` argument, or just the folder. For a Geodatabase, you should specify the feature class using the `layer` argument. Note that a trailing "/" is omitted from the `dsn` (data source name) and the ".shp" suffix is omitted from the `layer`. -#### `sf` +#### `sf` ```{r eval=FALSE} x <- sf::st_read(dsn = 'E:/gis_data/ca630/FG_CA630_OFFICIAL.gdb', layer = 'ca630_a') @@ -324,14 +321,14 @@ The `sf` `st_read()` / `read_sf()` / `st_write()` / `write_sf()` functions have The `mapview` and `leaflet` packages make it possible to display interactive maps of `sf` objects in RStudio viewer pane, or within an HTML document generated via R Markdown (e.g. this document). -* [`mapview` package](https://github.com/r-spatial/mapview) - - [Basics](https://r-spatial.github.io/mapview/articles/mapview_01-basics.html) - - [Advanced Features](https://r-spatial.github.io/mapview/articles/mapview_02-advanced.html) - - See other "Articles" in this series, you can make [complex, interactive maps](https://r-spatial.github.io/mapview/articles/mapview_05-extras.html) using the `mapview` package. -* [`leaflet` package](https://rstudio.github.io/leaflet/) -* [`leafem`: 'leaflet' Extensions for 'mapview'](https://cran.r-project.org/web/packages/leafem/index.html) +- [`mapview` package](https://github.com/r-spatial/mapview) + - [Basics](https://r-spatial.github.io/mapview/articles/mapview_01-basics.html) + - [Advanced Features](https://r-spatial.github.io/mapview/articles/mapview_02-advanced.html) + - See other "Articles" in this series, you can make [complex, interactive maps](https://r-spatial.github.io/mapview/articles/mapview_05-extras.html) using the `mapview` package. +- [`leaflet` package](https://rstudio.github.io/leaflet/) +- [`leafem`: 'leaflet' Extensions for 'mapview'](https://cran.r-project.org/web/packages/leafem/index.html) -### Exercise 2: Map your favorite soil series extents +## Exercise 2: Map your favorite soil series extents The `seriesExtent` function in `soilDB` returns an `sf` object showing generalized extent polygons for a given soil series. @@ -358,7 +355,7 @@ cols <- c('royalblue', 'firebrick') mapview(s, zcol = 'series', col.regions = cols, legend = TRUE) ``` -The following code demonstrates how to fetch / convert / map soil series extents, using a vector of soil series names. +The following code demonstrates how to fetch / convert / map soil series extents, using a vector of soil series names. Results appear in the RStudio "Viewer" pane. Be sure to try the "Export" and "show in window" (next to the broom icon) buttons. @@ -389,22 +386,37 @@ cols <- RColorBrewer::brewer.pal(n = length(series.names), name = 'Set1') mapview(s, zcol = 'series', col.regions = cols, legend = TRUE) ``` -__Question: What do you notice about the areas where the extent polygons occur? Share your thoughts with your peers or mentor__ +**Question: What do you notice about the areas where the extent polygons occur? Share your thoughts with your peers or mentor** + +## Packages for Raster Data ### The `terra` Package The [`terra` package](https://cran.r-project.org/web/packages/terra/index.html) package provides most of the commonly used grid and vector processing functionality that one might find in a conventional GIS. It provides high-level data structures and functions for the GDAL (Geospatial Data Abstraction Library). - - * re-sampling / interpolation - * projection and warping (coordinate system transformations of gridded data) - * cropping, mosaicing, masking - * local and focal functions - * raster algebra - * contouring - * raster/vector conversions - * terrain analysis - * model-based prediction (more on this in Part 2) -#### Importing / Exporting Rasters + +- resampling ([`terra::resample()`](https://rspatial.github.io/terra/reference/resample.html)) + +- projection and warping ([`terra::project()`](https://rspatial.github.io/terra/reference/project.html)`)` + +- cropping, mosaicing, masking ([`terra::crop()`](https://rspatial.github.io/terra/reference/crop.html), [`terra::mosaic()`](https://rspatial.github.io/terra/reference/mosaic.html), [`terra::merge()`](https://rspatial.github.io/terra/reference/merge.html), [`terra::mask()`](https://rspatial.github.io/terra/reference/mask.html)) + +- local and focal functions ([`terra::local()`](https://rspatial.github.io/terra/reference/local.html), [`terra::focal()`](https://rspatial.github.io/terra/reference/focal.html)) + +- raster algebra (arithmetic operators, [`terra::xapp()`](https://rspatial.github.io/terra/reference/xapp.html)) + +- sampling ([`terra::spatSample()`](https://rspatial.github.io/terra/reference/spatSample.html)`)` + +- contouring ([`terra::contour()`](https://rspatial.github.io/terra/reference/contour.html)`)` + +- raster/vector conversions ([`terra::rasterize()`](https://rspatial.github.io/terra/reference/rasterize.html), [`terra::as.polygons()`](https://rspatial.github.io/terra/reference/as.polygons.html)) + +- terrain analysis ([`terra::terrain()`](https://rspatial.github.io/terra/reference/terrain.html)) + +- model-based prediction and interpolation ([`terra::predict()`](https://rspatial.github.io/terra/reference/predict.html), [`terra::interpolate()`](https://rspatial.github.io/terra/reference/interpolate.html); more on this in [Part 2](https://ncss-tech.github.io/stats_for_soil_survey/book2/index.html)) + +#### Terra Example + +This is a brief demonstration using sample data files with terra. ```{r fig.width=6, fig.height=6, eval=TRUE} # use an example from the terra package @@ -419,15 +431,10 @@ r v <- terra::vect(g) v -# convert r to a RasterLayer object -r2 <- raster::raster(f) # show SpatRaster details print(r) -# show RasterLayer details -print(r2) - # default plot method plot(r) lines(v) @@ -446,30 +453,39 @@ The R object only stores a reference to the data until they are needed to be loa A more complete background on the capabilities of the `raster` package, and the replacement [`terra`](https://rspatial.org/terra/index.html), are described in the [*Spatial Data Science with R*](http://rspatial.org/) online book. -[*Introduction to the raster package*](https://cran.r-project.org/web/packages/raster/vignettes/Raster.pdf) vignette +```{r, eval = TRUE} +# convert r to a RasterLayer object +r2 <- raster::raster(f) + +# show RasterLayer details +print(r2) +``` + +[*Introduction to the raster package*](https://cran.r-project.org/web/packages/raster/vignettes/Raster.pdf) vignette ##### `stars` -There is also a package called [`stars`](https://r-spatial.github.io/stars/) (Spatiotemporal Arrays: Raster and Vector Datacubes) that is the `sf`-centric way of dealing with higher dimensional raster and vector "datacubes." Data cubes have dimensions related to time, spectral band, and sensor. The `stars` data structures are often used for processing satellite data sources. - +There is also a package called [`stars`](https://r-spatial.github.io/stars/) (Spatiotemporal Arrays: Raster and Vector Datacubes) that is the `sf`-centric way of dealing with higher dimensional raster and vector "datacubes." Data cubes have additional dimensions related to time, spectral band, or sensor type. The `stars` data structures are often used for processing spectral data sources from satellites. + #### Related Links - * [`sf` package website](https://r-spatial.github.io/sf/) - * [rspatial.org - Spatial Data Science with R](https://rspatial.org/) - * [Goodbye PROJ.4 strings! How to specify a coordinate reference system in R?](https://inbo.github.io/tutorials/tutorials/spatial_crs_coding/) - +- [`sf` package website](https://r-spatial.github.io/sf/) +- [rspatial.org - Spatial Data Science with R](https://rspatial.org/) +- [Goodbye PROJ.4 strings! How to specify a coordinate reference system in R?](https://inbo.github.io/tutorials/tutorials/spatial_crs_coding/) + ### Converting Vector to Raster #### [`terra::rasterize()`](https://rspatial.github.io/terra/reference/rasterize.html) -#### [`raster::rasterize()`](https://rspatial.github.io/raster/reference/rasterize.html) - #### [`fasterize::fasterize()`](https://cran.r-project.org/web/packages/fasterize/vignettes/using-fasterize.html) +### Converting Raster to Vector + +#### [`terra::as.polygons()`](https://rspatial.github.io/terra/reference/as.polygons.html) ## Coordinate Reference Systems (CRS) -Spatial data aren't all that useful without an accurate description of the Coordinate Reference System (CRS). This type of information is typically stored within the ".prj" component of a shapefile, or in the header of a GeoTIFF. +Spatial data aren't all that useful without an accurate description of the Coordinate Reference System (CRS). This type of information is typically stored within the ".prj" component of a shapefile, or in the header of a GeoTIFF. Without a CRS it is not possible to perform coordinate transformations (e.g. conversion of geographic coordinates to projected coordinates), spatial overlay (e.g. intersection), or geometric calculations (e.g. distance or area). @@ -477,36 +493,41 @@ The "old" way (PROJ.4) of specifying coordinate reference systems is using chara Some common examples of coordinate system "EPSG" codes and their legacy "PROJ.4" strings. 4 - - "EPSG" stands for European Petroleum Survey Group. The "EPSG Geodetic Parameter Dataset" is a public registry of geodetic datums, spatial reference systems, Earth ellipsoids, coordinate transformations and related units of measurement. - - - "OGC" refers to the Open Geospatial Consortium, which is an example of another important `authority:code`. "ESRI" (company that develops ArcGIS) also defines many CRS codes. - - - "PROJ" is the software responsible for transforming coordinates from one CRS to another. The current version of PROJ available is 9, and in PROJ > 6 major changes were made to the way that coordinate reference systems are defined and transformed led to the "PROJ.4" syntax falling out of favor. - - * _EPSG_: [`4326`](https://epsg.io/4326) / _PROJ.4_:`+proj=longlat +datum=WGS84` - geographic, WGS84 datum (NASIS Standard) - * _OGC_:[`CRS84`](http://defs.opengis.net/vocprez/object?uri=http://www.opengis.net/def/ogc/CRS84) - geographic, WGS84 datum (same as above but explicit longitude, latitude XY order) - * _EPSG_: [`4269`](https://epsg.io/4269) / _PROJ.4_:`+proj=longlat +datum=NAD83` - geographic, NAD83 datum - * _EPSG_: [`4267`](https://epsg.io/4267) / _PROJ.4_:`+proj=longlat +datum=NAD27` - geographic, NAD27 datum - * _EPSG_: [`26910`](https://epsg.io/26910) / _PROJ.4_:`+proj=utm +zone=10 +datum=NAD83` - projected (UTM zone 10), NAD83 datum - * _EPSG_: [`5070`](https://epsg.io/5070) / _PROJ.4_: `+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23.0 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs` - Albers Equal Area CONUS (gSSURGO) - - - More on the EPSG codes and specifics of CRS definitions: - - - https://spatialreference.org/ref/epsg/ - - - https://epsg.io/ - +- "EPSG" stands for European Petroleum Survey Group. The "EPSG Geodetic Parameter Dataset" is a public registry of geodetic datums, spatial reference systems, Earth ellipsoids, coordinate transformations and related units of measurement. + +- "OGC" refers to the Open Geospatial Consortium, which is an example of another important `authority:code`. "ESRI" (company that develops ArcGIS) also defines many CRS codes. + +- "PROJ" is the software responsible for transforming coordinates from one CRS to another. The current version of PROJ available is 9, and in PROJ \> 6 major changes were made to the way that coordinate reference systems are defined and transformed led to the "PROJ.4" syntax falling out of favor. + +- *EPSG*: [`4326`](https://epsg.io/4326) / *PROJ.4*:`+proj=longlat +datum=WGS84` - geographic, WGS84 datum (NASIS Standard) + +- *OGC*:[`CRS84`](http://defs.opengis.net/vocprez/object?uri=http://www.opengis.net/def/ogc/CRS84) - geographic, WGS84 datum (same as above but explicit longitude, latitude XY order) + +- *EPSG*: [`4269`](https://epsg.io/4269) / *PROJ.4*:`+proj=longlat +datum=NAD83` - geographic, NAD83 datum + +- *EPSG*: [`4267`](https://epsg.io/4267) / *PROJ.4*:`+proj=longlat +datum=NAD27` - geographic, NAD27 datum + +- *EPSG*: [`26910`](https://epsg.io/26910) / *PROJ.4*:`+proj=utm +zone=10 +datum=NAD83` - projected (UTM zone 10), NAD83 datum + +- *EPSG*: [`5070`](https://epsg.io/5070) / *PROJ.4*: `+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23.0 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs` - Albers Equal Area CONUS (gSSURGO) + +- More on the EPSG codes and specifics of CRS definitions: + + - + + - + While you may encounter PROJ.4 strings, these are no longer considered the preferred method of referencing Coordinate Reference Systems -- and, in general, newer methods are "easier." -Well-known text (WKT) is a human- machine-readable standard format for geometry, so storing the Coordinate Reference System information in a similar format makes sense. This format is returned by the `sf::st_crs()` method. +Well-known text (WKT) is a human- machine-readable standard format for geometry, so storing the Coordinate Reference System information in a similar format makes sense. This format is returned by the `sf::st_crs()` method. -For example: the WKT representation of `EPSG:4326`: +For example: the WKT representation of `EPSG:4326`: ```{r, eval=TRUE} st_crs(4326) ``` -This is using the [OGC WKT CRS standard](https://www.ogc.org/standards/wkt-crs). Adoption of this standard caused some significant changes in packages in the R ecosystem. +This is using the [OGC WKT CRS standard](https://www.ogc.org/standards/wkt-crs). Adoption of this standard caused some significant changes in packages in the R ecosystem. So you can get familiar, what follows are several examples of doing the same thing: setting the CRS of spatial objects with WGS84 longitude/latitude geographic coordinates. If you have another target coordinate system, it is just a matter of using the correct codes to identify it. @@ -542,7 +563,7 @@ s.utm <- st_transform(x = s, crs = 26910) s.nad27 <- st_transform(x = s, crs = st_crs(4267)) ``` -#### `sp` +#### `sp` You can do the same thing several different ways with `sp` objects. An equivalent EPSG, OGC and PROJ.4 can be set or get using `proj4string<-`/`proj4string` and either a `sp` `CRS` object or a PROJ.4 string for `Spatial` objects. @@ -581,20 +602,20 @@ terra::crs(r) terra::crs(r) <- terra::crs("OGC:CRS84") ``` -"Transforming" or "warping" a raster is a different from with a vector as it requires interpolation of pixels to a target resolution and CRS. +"Transforming" or "warping" a raster is a different from with a vector as it requires interpolation of pixels to a target resolution and CRS. -The method provided by `terra` is `project()` and in `raster` it is `projectRaster()`. +The method provided by `terra` is `project()` and in `raster` it is `projectRaster()`. -It works the same as the above "transform" methods in that you specify an object to transform, and the target reference system or a template for the object. +It works the same as the above "transform" methods in that you specify an object to transform, and the target reference system or a template for the object. ```{r, eval=FALSE} t.wgs84 <- terra::project(r, terra::crs("+proj=igh")) r.wgs84 <- raster::projectRaster(raster::raster(r), crs = CRS("+proj=igh")) ``` -Note that the default warping of raster uses _bilinear interpolation_ (`method='bilinear'`), which is appropriate for _continuous variables_. +Note that the default warping of raster uses *bilinear interpolation* (`method='bilinear'`), which is appropriate for *continuous variables*. -You also have the option of using nearest-neighbor (`method='ngb'`) for _categorical variables_ (class maps) where interpolation would not make sense. +You also have the option of using nearest-neighbor (`method='ngb'`) for *categorical variables* (class maps) where interpolation would not make sense. If we want to save this transformed raster to file, we can use something like this for `terra` @@ -656,11 +677,11 @@ unzip( ## Load Example MLRA Data -We will be using polygons associated with MLRA 15 and 18 as part of this demonstration. +We will be using polygons associated with MLRA 15 and 18 as part of this demonstration. -Import these data with `sf::st_read()`. +Import these data with `sf::st_read()`. -```{r results='hide', eval=TRUE} +```{r results='hide', eval=TRUE} # load MLRA polygons mlra <- sf::st_read(file.path(ch4.data.path, 'mlra-18-15-AEA.shp')) @@ -670,7 +691,7 @@ mlra <- sf::st_read(file.path(ch4.data.path, 'mlra-18-15-AEA.shp')) We will load the sample MLRA 15 and 18 (California) raster data (PRISM derived) using `terra::rast()`. If using your own MLRA, you will need to update file paths to use your own rasters. -```{r results='hide', eval=TRUE} +```{r results='hide', eval=TRUE} # mean annual air temperature, Deg C maat <- terra::rast(file.path(ch4.data.path, 'MAAT.tif')) @@ -702,19 +723,68 @@ rs plot(rs) ``` +## Vector Data + +### `sf` + +```{r, eval=TRUE} +p <- sf::st_as_sf(data.frame(x = -120, y = 37.5), + coords = c("x", "y"), + crs = 4326) +p.aea <- st_transform(p, "EPSG:5070") +``` + +In `sf` the functions used to do this are `st_intersects()` or `st_intersection()`. -## Raster data +```{r, eval=TRUE} +st_intersects(p.aea, mlra) +st_intersection(p.aea, mlra) +``` + +### `terra` + +```{r, eval=TRUE} +p <- terra::vect(data.frame(x = -120, y = 37.5), + geom = c("x", "y"), + crs = "EPSG:4326") +p.aea <- project(p, "EPSG:5070") +``` + +In `terra` the functions used to determine the intersection is `relate()`. + +```{r, eval=TRUE} +mlra[relate(vect(mlra), p.aea, relation = "intersects"), +] +``` + +### `sp` + +In `sp` objects, you do these operations with the `sp::over()` function. Access the associated vignette by pasting `vignette("over")` in the console when the `sp` package is loaded. + +```{r, eval=FALSE} +# hand make a SpatialPoints object +# note that this is GCS +p <- SpatialPoints(coords = cbind(-120, 37.5), + proj4string = CRS('+proj=longlat +datum=WGS84')) + +mlra.sp <- sf::as_Spatial(mlra) +# spatial extraction of MLRA data requires a CRS transformation +p.aea <- spTransform(p, proj4string(mlra.sp)) +over(p.aea, mlra.sp) +``` + +## Raster Data ### Object Properties `SpatRaster` and `RasterLayer` objects are similar to `sf`, `sp` and other R spatial objects in that they keep track of the linkages between data, coordinate reference system, and optional attribute tables. Getting and setting the contents of raster objects should be performed using functions such as: - -* `terra::NAflag(r)` / `raster::NAvalue(r)`: get / set the NODATA value -* `terra::crs(r)` / `raster::wkt(r)` : get / set the coordinate reference system -* `terra::res(r)` / `raster::res(r)`: get / set the resolution -* `terra::ext(r)` / `raster::extent(r)`: get / set the extent -* `terra::datatype(r)` / `raster::dataType(r)`: get / set the data type -* ... many more, see the [`raster`](https://rspatial.github.io/raster/) and [`terra`](https://rspatial.github.io/terra/) package manuals + +- `terra::NAflag(r)` / `raster::NAvalue(r)`: get / set the NODATA value +- `terra::crs(r)` / `raster::wkt(r)` : get / set the coordinate reference system +- `terra::res(r)` / `raster::res(r)`: get / set the resolution +- `terra::ext(r)` / `raster::extent(r)`: get / set the extent +- `terra::datatype(r)` / `raster::dataType(r)`: get / set the data type +- ... many more, see the [`raster`](https://rspatial.github.io/raster/) and [`terra`](https://rspatial.github.io/terra/) package manuals ### Rasters "In Memory" v.s. "File-Based" @@ -724,7 +794,7 @@ With the `raster` package, the initial file/disk-based reference can be converte ### Writing Rasters to File -Exporting data requires consideration of the output format, datatype, encoding of NODATA, and other options such as compression. +Exporting data requires consideration of the output format, datatype, encoding of NODATA, and other options such as compression. With terra, "LZW" compression is used by default when writing GeoTIFF files. Using the `gdal` argument e.g.: `terra::writeRaster(..., gdal=)` is equivalent to specifying `option` argument to `raster::writeRaster()`. @@ -733,8 +803,8 @@ With terra, "LZW" compression is used by default when writing GeoTIFF files. Usi terra::writeRaster(t.wgs84, filename = 't.wgs84.tif') ``` -For example, a `RasterLayer` object that you wanted to save to disk as an internally-compressed GeoTIFF: - +For example, a `RasterLayer` object that you wanted to save to disk as an internally-compressed GeoTIFF: + ```{r eval=FALSE} # using previous example data set raster::writeRaster(r.wgs84, filename = 'r.tif', options = c("COMPRESS=LZW")) @@ -744,14 +814,14 @@ raster::writeRaster(r.wgs84, filename = 'r.tif', options = c("COMPRESS=LZW")) Commonly used raster `datatype` include: "unsigned integer", "signed integer", and "floating point" of variable precision. - * `INT1U`: integers from 0 to 255 - * `INT2U`: integers from 0 to 65,534 - * `INT2S`: integers from -32,767 to 32,767 - * `INT4S`: integers from -2,147,483,647 to 2,147,483,647 - * `FLT4S`: floating point from -3.4e+38 to 3.4e+38 - * `FLT8S`: floating point from -1.7e+308 to 1.7e+308 +- `INT1U`: integers from 0 to 255 +- `INT2U`: integers from 0 to 65,534 +- `INT2S`: integers from -32,767 to 32,767 +- `INT4S`: integers from -2,147,483,647 to 2,147,483,647 +- `FLT4S`: floating point from -3.4e+38 to 3.4e+38 +- `FLT8S`: floating point from -1.7e+308 to 1.7e+308 -It is wise to manually specify an output `datatype` that will "just fit" the required precision. +It is wise to manually specify an output `datatype` that will "just fit" the required precision. For example, if you have generated a `RasterLayer` that warrants integer precision and ranges from 0 to 100, then the `INT1U` data type would provide enough precision to store all possible values *and* the NODATA value. Raster data stored as integers will always be smaller (sometimes 10-100x) than those stored as floating point, especially when internal compression is enabled. @@ -767,9 +837,9 @@ terra::writeRaster(t.wgs84, filename = 'r.tif', datatype = 'FLT4S') #### Notes on Compression -It is often a good idea to create internally-compressed raster data. +It is often a good idea to create internally-compressed raster data. -The [GeoTiff format](https://gdal.org/drivers/raster/gtiff.html) can accommodate many different compression algorithms, including lossy (JPEG) compression. Usually, the default "LZW" or "DEFLATE" compression will result in significant savings, especially for data encoded as integers. +The [GeoTiff format](https://gdal.org/drivers/raster/gtiff.html) can accommodate many different compression algorithms, including lossy (JPEG) compression. Usually, the default "LZW" or "DEFLATE" compression will result in significant savings, especially for data encoded as integers. For example, the CONUS gSSURGO map unit key grid at 30m resolution is about 55Gb (GeoTiff, no compression) vs. 2.4Gb after LZW compression. @@ -782,57 +852,7 @@ raster::writeRaster(r, filename='r.tif', options=c("COMPRESS=NONE")) terra::writeRaster(t.wgs84, filename='r.tif', gdal=c("COMPRESS=DEFLATE", "PREDICTOR=2", "ZLEVEL=9") ``` -See [this article](https://kokoalberti.com/articles/geotiff-compression-optimization-guide/) for some ideas on optimization of file read/write times and associated compressed file sizes. - -## Vector Data - -### `sf` - -```{r, eval=TRUE} -p <- sf::st_as_sf(data.frame(x = -120, y = 37.5), - coords = c("x", "y"), - crs = 4326) -p.aea <- st_transform(p, "EPSG:5070") -``` - -In `sf` the functions used to do this are `st_intersects()` or `st_intersection()`. - -```{r, eval=TRUE} -st_intersects(p.aea, mlra) -st_intersection(p.aea, mlra) -``` - -### `terra` -```{r, eval=TRUE} -p <- terra::vect(data.frame(x = -120, y = 37.5), - geom = c("x", "y"), - crs = "EPSG:4326") -p.aea <- project(p, "EPSG:5070") -``` - -In `terra` the functions used to determine the intersection is `relate()`. - -```{r, eval=TRUE} -mlra[relate(vect(mlra), p.aea, relation = "intersects"), -] -``` - -### `sp` - -In `sp` objects, you do these operations with the `sp::over()` function. Access the associated vignette by pasting `vignette("over")` in the console when the `sp` package is loaded. - -```{r, eval=FALSE} -# hand make a SpatialPoints object -# note that this is GCS -p <- SpatialPoints(coords = cbind(-120, 37.5), - proj4string = CRS('+proj=longlat +datum=WGS84')) - -mlra.sp <- sf::as_Spatial(mlra) -# spatial extraction of MLRA data requires a CRS transformation -p.aea <- spTransform(p, proj4string(mlra.sp)) -over(p.aea, mlra.sp) -``` - +See [this article](https://kokoalberti.com/articles/geotiff-compression-optimization-guide/) for some ideas on optimization of file read/write times and associated compressed file sizes. ## Spatial Operations @@ -841,18 +861,18 @@ Spatial data are lot more useful when "related" (overlay, intersect, spatial que ### Working with Vector and Raster Data Typically, spatial queries of raster data by geometry features (point, line, polygon) are performed in two ways: - - 1. For each geometry, collect all pixels that overlap (`exactextractr` approach) - - 2. For each geometry, collect a sample of pixels defined by [sampling points](http://ncss-tech.github.io/AQP/sharpshootR/sample-vs-population.html) -The first method ensures that all data are included in the analysis, however, processing can be slow for multiple/detailed rasters, and the results may not fit into memory. +1. For each geometry, collect all pixels that overlap (`exactextractr` approach) + +2. For each geometry, collect a sample of pixels defined by [sampling points](http://ncss-tech.github.io/AQP/sharpshootR/sample-vs-population.html) + +The first method ensures that all data are included in the analysis, however, processing can be slow for multiple/detailed rasters, and the results may not fit into memory. -The second method is more efficient (10-100x faster), requires less memory, and can remain statistically sound--as long as a reasonable sampling strategy is applied. Sampling may also help you avoid low-acreage "anomalies" in the raster product. More on sampling methods in the next chapter. +The second method is more efficient (10-100x faster), requires less memory, and can remain statistically sound--as long as a reasonable sampling strategy is applied. Sampling may also help you avoid low-acreage "anomalies" in the raster product. More on sampling methods in the next chapter. The `extract()` function can perform several operations in one call, such as buffering (in projected units) with `buffer` argument. See the manual page for an extensive listing of optional arguments and what they do. -Sampling and extraction with `terra` the results in a `SpatVector` object. Sampling and extraction with `raster` methods results in a `matrix` object. +Sampling and extraction with `terra` the results in a `SpatVector` object. Sampling and extraction with `raster` methods results in a `matrix` object. ```{r, eval=TRUE} # sampling single layer SpatRaster @@ -862,7 +882,6 @@ terra::spatSample(maat, size = 10) terra::spatSample(rs, size = 10) ``` - ```{r fig.width=8, fig.height=5, eval=TRUE} par(mfcol = c(1, 2), mar = c(1, 1, 3, 1)) @@ -915,7 +934,7 @@ mean(x.regular$MAAT, na.rm = TRUE) mean(x.random$MAAT, na.rm = TRUE) ``` -Just how much variation can we expect when collecting 100, randomly-located samples over such a large area? +Just how much variation can we expect when collecting 100, randomly-located samples over such a large area? ```{r} # 10 replications of samples of n=100 @@ -947,11 +966,11 @@ plot(maat, main = 'PRISM Mean Annual Air Temperature (deg C)') plot(sf::st_geometry(mlra.gcs), main = 'MLRA 15 and 18', add = TRUE) ``` -### Exercise 3: Extracting Raster Data +## Exercise 3: Extracting Raster Data -#### Raster Summary By Point: NASIS Pedon Locations +### Raster Summary By Point: NASIS Pedon Locations -Extract PRISM data at the coordinates associated with NASIS pedons that have been correlated to the [Loafercreek](https://casoilresource.lawr.ucdavis.edu/sde/?series=loafercreek) series. +Extract PRISM data at the coordinates associated with NASIS pedons that have been correlated to the [Loafercreek](https://casoilresource.lawr.ucdavis.edu/sde/?series=loafercreek) series. We will use the sample dataset `loafercreek` from the `soilDB` package to get NASIS data. This example can be easily adapted to your own pedon data extracted from NASIS using [`fetchNASIS()`](http://ncss-tech.github.io/soilDB/reference/fetchNASIS.html), but if your points are not in California, you will need to supply your own raster data. @@ -968,7 +987,7 @@ pedons <- loafercreek # extract site data s <- sf::st_as_sf(aqp::site(pedons), - coords = c("x_std", "y_std"), + coords = c("longstddecimaldegrees", "latstddecimaldegrees"), crs = 4326, na.fail = FALSE) ``` @@ -987,7 +1006,7 @@ e <- terra::extract(rs, s2, df = TRUE) summary(e[, -1]) ``` -Join the extracted PRISM data with the original `SoilProfileCollection` object. +Join the extracted PRISM data with the original `SoilProfileCollection` object. ```{r, eval=TRUE} # combine site data (sf) with extracted raster values (data.frame), row-order is identical, result is sf @@ -995,7 +1014,7 @@ res <- cbind(s, e) # extract unique IDs and PRISM data # dplyr verbs work with sf data.frames -res2 <- dplyr::select(res, pedon_id, MAAT, MAP, FFD, GDD, rain_fraction, effective_precipitation) +res2 <- dplyr::select(res, upedonid, MAAT, MAP, FFD, GDD, rain_fraction, effective_precipitation) # join with original SoilProfileCollection object via pedon_key site(pedons) <- res2 @@ -1012,7 +1031,7 @@ my_rv_function <- function(x) median(x, na.rm = TRUE) my_high_function <- function(x) quantile(x, probs = 0.95, na.rm = TRUE) site(pedons) |> - dplyr::select(pedon_id, MAAT, MAP, FFD, GDD, + dplyr::select(upedonid, MAAT, MAP, FFD, GDD, rain_fraction, effective_precipitation) |> dplyr::summarize(dplyr::across( MAAT:effective_precipitation, @@ -1022,10 +1041,9 @@ site(pedons) |> )) ``` -#### Raster Summary By Polygon: Series Extent +### Raster Summary By Polygon: Series Extent -The [`seriesExtent()`](http://ncss-tech.github.io/AQP/soilDB/series-extent.html) function from the `soilDB` package provides a simple interface to [Series Extent Explorer](https://casoilresource.lawr.ucdavis.edu/see/) data files. -Note that these series extents have been generalized for rapid display at regional to continental scales. A more precise representation of "series extent" can be generated from SSURGO polygons and queried from [SDA](http://ncss-tech.github.io/AQP/soilDB/SDA-tutorial-2.html). +The [`seriesExtent()`](http://ncss-tech.github.io/AQP/soilDB/series-extent.html) function from the `soilDB` package provides a simple interface to [Series Extent Explorer](https://casoilresource.lawr.ucdavis.edu/see/) data files. Note that these series extents have been generalized for rapid display at regional to continental scales. A more precise representation of "series extent" can be generated from SSURGO polygons and queried from [SDA](http://ncss-tech.github.io/AQP/soilDB/SDA-tutorial-2.html). Get an approximate extent for the [Loafercreek](http://casoilresource.lawr.ucdavis.edu/sde/?series=loafercreek) soil series from [SEE](https://casoilresource.lawr.ucdavis.edu/see/#amador). See the `seriesExtent` [tutorial](http://ncss-tech.github.io/AQP/soilDB/series-extent.html) and [manual page](http://ncss-tech.github.io/soilDB/reference/seriesExtent.html) for additional options and related functions. @@ -1101,7 +1119,7 @@ summary(e[,-1]) knitr::kable(cor(e[,-1]), digits = 2) ``` -Quickly compare the two sets of samples. +Quickly compare the two sets of samples. ```{r fig.width=6, fig.height=4, eval=TRUE} # compile results into a list @@ -1129,16 +1147,16 @@ boxplot( Basic climate summaries from a standardized source (e.g. PRISM) might be a useful addition to an OSD, or checking the ranges reported in mapunits. -#### Raster Summary By Polygon: MLRA +### Raster Summary By Polygon: MLRA The following example is a simplified version of what is available in the [`soilReports`](https://github.com/ncss-tech/soilReports) package, reports on the [ncss-tech](https://github.com/ncss-tech/soil-pit/tree/master/reports) GitHub repository. Efficient summary of large raster data sources can be accomplished using: - * internally-compressed raster data sources, stored on a local disk, can be in any coordinate system - * polygons stored in an equal-area or UTM coordinate system, with CRS units of meters - * [fixed-density sampling](http://ncss-tech.github.io/AQP/sharpshootR/sample-vs-population.html) of polygons - * estimation of quantiles from collected raster samples +- internally-compressed raster data sources, stored on a local disk, can be in any coordinate system +- polygons stored in an equal-area or UTM coordinate system, with CRS units of meters +- [fixed-density sampling](http://ncss-tech.github.io/AQP/sharpshootR/sample-vs-population.html) of polygons +- estimation of quantiles from collected raster samples Back to our example data. The first step is to check the MLRA polygons (`mlra`); how many features per MLRA symbol? Note that some MLRA have more than one polygon. @@ -1280,9 +1298,9 @@ densityplot(~ value | group, data = ex.all, ### Example: Summarizing MLRA Raster Data with `lattice` graphics -Lattice graphics are useful for summarizing grouped comparisons. +Lattice graphics are useful for summarizing grouped comparisons. -The syntax is difficult to learn and remember, but there is a lot of [documentation online](http://www.statmethods.net/advgraphs/trellis.html). +The syntax is difficult to learn and remember, but there is a lot of [documentation online](http://www.statmethods.net/advgraphs/trellis.html). ```{r, fig.width=8, fig.height=4, eval=TRUE} library(lattice) @@ -1310,6 +1328,7 @@ bwplot(mlra ~ value | name, data = m, # setup plot and data so ) ``` +````{=html} +```` ## Additional Reading (Spatial) -* Ahmed, Zia. 2020. [Geospatial Data Science with R](https://zia207.github.io/geospatial-r-github.io/index.html). - -* Gimond, M., 2019. Intro to GIS and Spatial Analysis [https://mgimond.github.io/Spatial/](https://mgimond.github.io/Spatial/) - -* Hijmans, R.J. 2019. Spatial Data Science with R. [https://rspatial.org/](https://rspatial.org/) - -* Lovelace, R., J. Nowosad, and J. Muenchow, 2019. Geocomputation with R. CRC Press. [https://bookdown.org/robinlovelace/geocompr/](https://bookdown.org/robinlovelace/geocompr/) - -* Pebesma, E., and R.S. Bivand. 2005. Classes and methods for spatial data: The sp package. [https://cran.r-project.org/web/packages/sp/vignettes/intro_sp.pdf](https://cran.r-project.org/web/packages/sp/vignettes/intro_sp.pdf). +- Ahmed, Zia. 2020. [Geospatial Data Science with R](https://zia207.github.io/geospatial-r-github.io/index.html). -* Pebesma, E. and R. Bivand, 2019. Spatial Data Science. [https://keen-swartz-3146c4.netlify.com/](https://keen-swartz-3146c4.netlify.com/) +- Gimond, M., 2019. Intro to GIS and Spatial Analysis -* [Applied Spatial Data Analysis with R](http://www.asdar-book.org/) +- Hijmans, R.J. 2019. Spatial Data Science with R. +- Lovelace, R., J. Nowosad, and J. Muenchow, 2019. Geocomputation with R. CRC Press. +- Pebesma, E., and R.S. Bivand. 2005. Classes and methods for spatial data: The sp package. . +- Pebesma, E. and R. Bivand, 2019. Spatial Data Science. +- [Applied Spatial Data Analysis with R](http://www.asdar-book.org/) diff --git a/Part1/packages.bib b/Part1/packages.bib index 2049097c..8e0c6b9a 100644 --- a/Part1/packages.bib +++ b/Part1/packages.bib @@ -1,3 +1,11 @@ +@Manual{R-aqp, + title = {aqp: Algorithms for Quantitative Pedology}, + author = {Dylan Beaudette and Pierre Roudier and Andrew Brown}, + note = {R package version 2.1.0}, + url = {https://github.com/ncss-tech/aqp}, + year = {2025}, +} + @Manual{R-base, title = {R: A Language and Environment for Statistical Computing}, author = {{R Core Team}}, @@ -15,6 +23,63 @@ @Manual{R-bookdown url = {https://github.com/rstudio/bookdown}, } +@Manual{R-circular, + title = {circular: Circular Statistics}, + author = {Ulric Lund and Claudio Agostinelli}, + year = {2024}, + note = {R package version 0.5-1}, + url = {https://CRAN.R-project.org/package=circular}, +} + +@Manual{R-clhs, + title = {clhs: Conditioned Latin Hypercube Sampling}, + author = {Pierre Roudier}, + year = {2021}, + note = {R package version 0.9.0}, + url = {https://github.com/pierreroudier/clhs/}, +} + +@Manual{R-diagram, + title = {diagram: Functions for Visualising Simple Graphs (Networks), Plotting +Flow Diagrams}, + author = {Karline Soetaert}, + year = {2020}, + note = {R package version 1.6.5}, + url = {https://CRAN.R-project.org/package=diagram}, +} + +@Manual{R-dplyr, + title = {dplyr: A Grammar of Data Manipulation}, + author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan}, + year = {2023}, + note = {R package version 1.1.4}, + url = {https://dplyr.tidyverse.org}, +} + +@Manual{R-exactextractr, + title = {exactextractr: Fast Extraction from Raster Datasets using Polygons}, + author = {{Daniel Baston}}, + year = {2023}, + note = {R package version 0.10.0}, + url = {https://isciences.gitlab.io/exactextractr/}, +} + +@Manual{R-GGally, + title = {GGally: Extension to ggplot2}, + author = {Barret Schloerke and Di Cook and Joseph Larmarange and Francois Briatte and Moritz Marbach and Edwin Thoen and Amos Elberg and Jason Crowley}, + year = {2024}, + note = {R package version 2.2.1}, + url = {https://ggobi.github.io/ggally/}, +} + +@Manual{R-ggplot2, + title = {ggplot2: Create Elegant Data Visualisations Using the Grammar of Graphics}, + author = {Hadley Wickham and Winston Chang and Lionel Henry and Thomas Lin Pedersen and Kohske Takahashi and Claus Wilke and Kara Woo and Hiroaki Yutani and Dewey Dunnington and Teun {van den Brand}}, + year = {2024}, + note = {R package version 3.5.1}, + url = {https://ggplot2.tidyverse.org}, +} + @Manual{R-knitr, title = {knitr: A General-Purpose Package for Dynamic Report Generation in R}, author = {Yihui Xie}, @@ -23,6 +88,30 @@ @Manual{R-knitr url = {https://yihui.org/knitr/}, } +@Manual{R-lattice, + title = {lattice: Trellis Graphics for R}, + author = {Deepayan Sarkar}, + year = {2024}, + note = {R package version 0.22-6}, + url = {https://lattice.r-forge.r-project.org/}, +} + +@Manual{R-mapview, + title = {mapview: Interactive Viewing of Spatial Data in R}, + author = {Tim Appelhans and Florian Detsch and Christoph Reudenbach and Stefan Woellauer}, + year = {2023}, + note = {R package version 2.11.2}, + url = {https://github.com/r-spatial/mapview}, +} + +@Manual{R-rJava, + title = {rJava: Low-Level R to Java Interface}, + author = {Simon Urbanek}, + year = {2024}, + note = {R package version 1.0-11}, + url = {http://www.rforge.net/rJava/}, +} + @Manual{R-rmarkdown, title = {rmarkdown: Dynamic Documents for R}, author = {JJ Allaire and Yihui Xie and Christophe Dervieux and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone}, @@ -31,6 +120,89 @@ @Manual{R-rmarkdown url = {https://github.com/rstudio/rmarkdown}, } +@Manual{R-sf, + title = {sf: Simple Features for R}, + author = {Edzer Pebesma}, + year = {2024}, + note = {R package version 1.0-19}, + url = {https://r-spatial.github.io/sf/}, +} + +@Manual{R-shape, + title = {shape: Functions for Plotting Graphical Shapes, Colors}, + author = {Karline Soetaert}, + year = {2024}, + note = {R package version 1.4.6.1}, + url = {https://CRAN.R-project.org/package=shape}, +} + +@Manual{R-sharpshootR, + title = {sharpshootR: A Soil Survey Toolkit}, + author = {Dylan Beaudette and Jay Skovlin and Stephen Roecker and Andrew Brown}, + note = {R package version 2.3.2}, + url = {https://github.com/ncss-tech/sharpshootR}, + year = {2025}, +} + +@Manual{R-soilDB, + title = {soilDB: Soil Database Interface}, + author = {Dylan Beaudette and Jay Skovlin and Stephen Roecker and Andrew Brown}, + note = {R package version 2.8.7}, + url = {https://github.com/ncss-tech/soilDB/}, + year = {2025}, +} + +@Manual{R-soilReports, + title = {soilReports: R Markdown Reports and Convenience Functions for Soil Survey}, + author = {USDA-NRCS Soil Survey Staff}, + note = {R package version 0.8.2}, + url = {https://github.com/ncss-tech/soilReports}, + year = {2025}, +} + +@Manual{R-sp, + title = {sp: Classes and Methods for Spatial Data}, + author = {Edzer Pebesma and Roger Bivand}, + year = {2024}, + note = {R package version 2.1-4}, + url = {https://github.com/edzer/sp/}, +} + +@Manual{R-spcosa, + title = {spcosa: Spatial Coverage Sampling and Random Sampling from Compact +Geographical Strata}, + author = {Dennis Walvoort and Dick Brus and Jaap {de Gruijter}}, + year = {2023}, + note = {R package version 0.4-2}, + url = {https://git.wur.nl/Walvo001/spcosa}, +} + +@Manual{R-terra, + title = {terra: Spatial Data Analysis}, + author = {Robert J. Hijmans}, + year = {2024}, + note = {R package version 1.8-6}, + url = {https://rspatial.org/}, +} + +@Manual{R-tidyr, + title = {tidyr: Tidy Messy Data}, + author = {Hadley Wickham and Davis Vaughan and Maximilian Girlich}, + year = {2024}, + note = {R package version 1.3.1}, + url = {https://tidyr.tidyverse.org}, +} + +@Article{aqp2013, + title = {Algorithms for quantitative pedology: A toolkit for soil scientists}, + author = {{Beaudette} and {D.E.} and {Roudier} and {P.} and {O'Geen} and {A.T.}}, + journal = {Computers \& Geosciences}, + year = {2013}, + volume = {52}, + pages = {258-268}, + url = {http://dx.doi.org/10.1016/j.cageo.2012.10.020}, +} + @Book{bookdown2016, title = {bookdown: Authoring Books and Technical Documents with {R} Markdown}, author = {Yihui Xie}, @@ -41,6 +213,28 @@ @Book{bookdown2016 url = {https://bookdown.org/yihui/bookdown}, } +@Manual{circular2024, + title = {{R} package \texttt{circular}: Circular Statistics (version 0.5-1)}, + author = {Claudio Agostinelli and Ulric Lund}, + year = {2024}, + url = {https://CRAN.R-project.org/package=circular}, +} + +@Manual{clhs2011, + title = {clhs: a R package for conditioned Latin hypercube sampling.}, + author = {Pierre Roudier}, + year = {2011}, +} + +@Book{ggplot22016, + author = {Hadley Wickham}, + title = {ggplot2: Elegant Graphics for Data Analysis}, + publisher = {Springer-Verlag New York}, + year = {2016}, + isbn = {978-3-319-24277-4}, + url = {https://ggplot2.tidyverse.org}, +} + @Book{knitr2015, title = {Dynamic Documents with {R} and knitr}, author = {Yihui Xie}, @@ -62,6 +256,16 @@ @InCollection{knitr2014 note = {ISBN 978-1466561595}, } +@Book{lattice2008, + title = {Lattice: Multivariate Data Visualization with R}, + author = {Deepayan Sarkar}, + year = {2008}, + publisher = {Springer}, + address = {New York}, + isbn = {978-0-387-75968-5}, + url = {http://lmdvr.r-forge.r-project.org}, +} + @Book{rmarkdown2018, title = {R Markdown: The Definitive Guide}, author = {Yihui Xie and J.J. Allaire and Garrett Grolemund}, @@ -82,3 +286,83 @@ @Book{rmarkdown2020 url = {https://bookdown.org/yihui/rmarkdown-cookbook}, } +@Book{sf2023, + author = {Edzer Pebesma and Roger Bivand}, + title = {{Spatial Data Science: With applications in R}}, + year = {2023}, + publisher = {{Chapman and Hall/CRC}}, + url = {https://r-spatial.org/book/}, + doi = {10.1201/9780429459016}, +} + +@Article{sf2018, + author = {Edzer Pebesma}, + title = {{Simple Features for R: Standardized Support for Spatial Vector Data}}, + year = {2018}, + journal = {{The R Journal}}, + doi = {10.32614/RJ-2018-009}, + url = {https://doi.org/10.32614/RJ-2018-009}, + pages = {439--446}, + volume = {10}, + number = {1}, +} + +@Article{sp2005, + author = {Edzer J. Pebesma and Roger Bivand}, + title = {Classes and methods for spatial data in {R}}, + journal = {R News}, + year = {2005}, + volume = {5}, + number = {2}, + pages = {9--13}, + month = {November}, + url = {https://CRAN.R-project.org/doc/Rnews/}, +} + +@Book{sp2013, + author = {Roger S. Bivand and Edzer Pebesma and Virgilio Gomez-Rubio}, + title = {Applied spatial data analysis with {R}, Second edition}, + year = {2013}, + publisher = {Springer, NY}, + url = {https://asdar-book.org/}, +} + +@Book{spcosa2006, + title = {Sampling for Natural Resource Monitoring}, + author = {J. {de Gruijter} and D. Brus and M. Bierkens and M. Knotters}, + publisher = {Springer}, + address = {Berlin}, + year = {2006}, + doi = {10.1007/3-540-33161-1}, + isbn = {978-3-540-22486-0}, +} + +@Article{spcosa1999, + title = {A sampling scheme for estimating the mean extractable phosphorus concentration of fields for environmental regulation}, + author = {D. J. Brus and L. E. E. M. Spatjens and J. J. {de Gruijter}}, + journal = {Geoderma}, + year = {1999}, + volume = {89}, + pages = {129--148}, + doi = {10.1016/S0016-7061(98)00123-2}, +} + +@Article{spcosa2010, + title = {An R package for spatial coverage sampling and random sampling from compact geographical strata by k-means}, + author = {D. J. J. Walvoort and D. J. Brus and J. J. {de Gruijter}}, + journal = {Computers \& Geosciences}, + year = {2010}, + volume = {36}, + pages = {1261--1267}, + doi = {10.1016/j.cageo.2010.04.005}, +} + +@Book{spcosa2022, + title = {Spatial sampling with R}, + author = {D. J. Brus}, + publisher = {CRC Press}, + year = {2022}, + doi = {10.1201/9781003258940}, + url = {https://github.com/DickBrus/SpatialSamplingwithR}, +} + diff --git a/Part1/sand.Rhistory b/Part1/sand.Rhistory deleted file mode 100644 index 5b37e84d..00000000 --- a/Part1/sand.Rhistory +++ /dev/null @@ -1,25 +0,0 @@ -library(gitcreds) -library(credentials) -install.package("credentials") -install.packages("credentials") -library(credentials) -set_github_pat() -bookdown:::serve_book() -data("loafercreek") -n <- c("A", -"BAt", -"Bt1", -"Bt2", -"Cr", -"R") -# REGEX rules -p <- c("A", -"BA|AB", -"Bt|Bw", -"Bt3|Bt4|2B|C", -"Cr", -"R") -# Compute genhz labels and add to loafercreek dataset -loafercreek$genhz <- generalize.hz(loafercreek$hzname, n, p) -plot(loafercreek[1:5,], color = "genhz", label = "pedon_id") -bookdown:::serve_book()