From b54590ba57f24b88129c746ab363580d77d13d70 Mon Sep 17 00:00:00 2001 From: Andrew Gene Brown Date: Mon, 29 Jan 2024 21:54:58 -0800 Subject: [PATCH] build book --- book/data.html | 794 +++++----- book/eda.html | 732 +++++----- book/reference-keys.txt | 4 +- .../figure-html/unnamed-chunk-103-1.png | Bin 30100 -> 0 bytes .../figure-html/unnamed-chunk-104-1.png | Bin 42813 -> 0 bytes .../figure-html/unnamed-chunk-105-1.png | Bin 34740 -> 0 bytes .../figure-html/unnamed-chunk-106-1.png | Bin 54534 -> 30100 bytes ...hunk-103-2.png => unnamed-chunk-106-2.png} | Bin .../figure-html/unnamed-chunk-107-1.png | Bin 38897 -> 42813 bytes .../figure-html/unnamed-chunk-108-1.png | Bin 71177 -> 34740 bytes .../figure-html/unnamed-chunk-109-1.png | Bin 44539 -> 54534 bytes .../figure-html/unnamed-chunk-110-1.png | Bin 54081 -> 38897 bytes ...hunk-107-2.png => unnamed-chunk-110-2.png} | Bin .../figure-html/unnamed-chunk-111-1.png | Bin 0 -> 71177 bytes .../figure-html/unnamed-chunk-112-1.png | Bin 0 -> 44539 bytes .../figure-html/unnamed-chunk-113-1.png | Bin 16074 -> 54081 bytes .../figure-html/unnamed-chunk-116-1.png | Bin 0 -> 16074 bytes ...hunk-140-1.png => unnamed-chunk-143-1.png} | Bin .../figure-html/unnamed-chunk-163-1.png | Bin 491864 -> 0 bytes ...hunk-161-1.png => unnamed-chunk-164-1.png} | Bin .../figure-html/unnamed-chunk-166-1.png | Bin 36351 -> 492388 bytes .../figure-html/unnamed-chunk-169-1.png | Bin 0 -> 36351 bytes ...hunk-166-2.png => unnamed-chunk-169-2.png} | Bin ...hunk-170-1.png => unnamed-chunk-173-1.png} | Bin 96875 -> 96884 bytes .../figure-html/unnamed-chunk-175-1.png | Bin 106917 -> 0 bytes .../figure-html/unnamed-chunk-177-1.png | Bin 20596 -> 0 bytes .../figure-html/unnamed-chunk-178-1.png | Bin 0 -> 106825 bytes .../figure-html/unnamed-chunk-180-1.png | Bin 0 -> 20524 bytes .../figure-html/unnamed-chunk-186-1.png | Bin 42514 -> 0 bytes ...hunk-185-1.png => unnamed-chunk-188-1.png} | Bin .../figure-html/unnamed-chunk-189-1.png | Bin 44926 -> 42514 bytes .../figure-html/unnamed-chunk-192-1.png | Bin 0 -> 44926 bytes ...-chunk-59-1.png => unnamed-chunk-62-1.png} | Bin 43176 -> 42989 bytes ...-chunk-60-1.png => unnamed-chunk-63-1.png} | Bin ...-chunk-76-1.png => unnamed-chunk-79-1.png} | Bin ...-chunk-76-2.png => unnamed-chunk-79-2.png} | Bin ...-chunk-77-1.png => unnamed-chunk-80-1.png} | Bin book/sampling.html | 332 ++--- book/search_index.json | 2 +- book/spatial.html | 1296 ++++++++--------- 40 files changed, 1623 insertions(+), 1537 deletions(-) delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-103-1.png delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-104-1.png delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-105-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-103-2.png => unnamed-chunk-106-2.png} (100%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-107-2.png => unnamed-chunk-110-2.png} (100%) create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-111-1.png create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-112-1.png create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-116-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-140-1.png => unnamed-chunk-143-1.png} (100%) delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-163-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-161-1.png => unnamed-chunk-164-1.png} (100%) create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-169-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-166-2.png => unnamed-chunk-169-2.png} (100%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-170-1.png => unnamed-chunk-173-1.png} (83%) delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-175-1.png delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-177-1.png create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-178-1.png create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-180-1.png delete mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-186-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-185-1.png => unnamed-chunk-188-1.png} (100%) create mode 100644 book/s4ssbook_files/figure-html/unnamed-chunk-192-1.png rename book/s4ssbook_files/figure-html/{unnamed-chunk-59-1.png => unnamed-chunk-62-1.png} (71%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-60-1.png => unnamed-chunk-63-1.png} (100%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-76-1.png => unnamed-chunk-79-1.png} (100%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-76-2.png => unnamed-chunk-79-2.png} (100%) rename book/s4ssbook_files/figure-html/{unnamed-chunk-77-1.png => unnamed-chunk-80-1.png} (100%) diff --git a/book/data.html b/book/data.html index 983b0ae2..e5276199 100644 --- a/book/data.html +++ b/book/data.html @@ -1192,33 +1192,33 @@

2.7 Working with Data in R

2.7.1 Summaries

Now that you’ve loaded some data, you can look at additional ways to summarize and interact with data elements.

-
-

2.7.1.1 table()

+
+

2.7.1.1 table() and Cross Tabulation

The base R table() function is very useful for quick summary operations. It returns a named vector with the amount of each unique level of the a given vector.

-

The numeric vector of “counts” is commonly combined with other functions such as sort(), order(), prop.table(), is.na() or !is.na() (is not NA) to identify abundance, proportions, or missing data (NA).

+

The numeric vector of “counts” is commonly combined with other functions such as sort(), order(), prop.table(), is.na() to identify abundance, proportions, or missing data (NA).

# load required packages
 library(aqp)
 library(soilDB)
 
 data("gopheridge", package = "soilDB")
 
-# for these examples, we use the gopheridge dataset as our "selected set"
+# for these examples, we use the gopheridge object as our "selected set"
 pedons <- gopheridge 
-# NOTE: you can use fetchNASIS to load your own data, like this:
-# pedons <- fetchNASIS()
-
-# summarize which soil taxa we have loaded
-table(pedons$taxonname)
+ +## you can use fetchNASIS to load your own data, like this: +# pedons <- fetchNASIS() + +# summarize which soil taxa we have loaded +table(pedons$taxonname)
## 
 ## Gopheridge 
 ##         52
-
# sort results in descending order
+
# sort taxonomic names in descending order
 sort(table(pedons$taxonname), decreasing = TRUE)
## Gopheridge 
 ##         52
# could do the same thing for taxonomic subgroups 
-# or any column of the SPC at the site or horizon levels
-table(pedons$taxsubgrp)
+table(pedons$taxsubgrp)
## 
 ## mollic haploxeralfs  typic haploxerepts  ultic haploxeralfs  ultic haploxerolls 
 ##                   1                   6                  44                   1
@@ -1226,6 +1226,64 @@

2.7.1.1 table()## ## ultic haploxeralfs typic haploxerepts mollic haploxeralfs ultic haploxerolls ## 44 6 1 1 +

We can convert counts in the table() result into proportions with prop.table():

+
prop.table(table(pedons$taxsubgrp))
+
## 
+## mollic haploxeralfs  typic haploxerepts  ultic haploxeralfs  ultic haploxerolls 
+##          0.01923077          0.11538462          0.84615385          0.01923077
+

table() can be used to get counts over multiple dimensions of factor levels.

+

We call this process cross tabulation.

+

For instance, let’s cross tabulate taxonomic subgroup (taxsubgrp) and the particle size family class (taxpartsize) for pedons

+
table(pedons$taxsubgrp, pedons$taxpartsize)
+
##                      
+##                       clayey-skeletal coarse-loamy fine fine-loamy loamy-skeletal
+##   mollic haploxeralfs               0            0    0          0              1
+##   typic haploxerepts                1            0    0          0              5
+##   ultic haploxeralfs                2            0    1          1             40
+##   ultic haploxerolls                0            1    0          0              0
+

As expected gopheridge the vast majority of pedons are Loamy-skeletal Ultic Haploxeralfs.

+

Since pedons contains site and horizon level data, when cross-tabulating we need to be sure pair a column from the site data with other site-level columns, and the same for horizon-level. This is because all arguments to table() must have the same length.

+

For example, let’s cross-tabulate horizon designation (hzname) with horizon texture class (texcl).

+

We can use the addmargins() function to add the row and column sums to the margins of the table for easier interpretation when there are many rows/columns in the result.

+
addmargins(table(pedons$hzname, pedons$texcl))
+
##        
+##           c  cl   l scl sic sicl sil  sl Sum
+##   2BCt5   0   1   0   0   0    0   0   0   1
+##   2Bt1    0   0   3   0   0    0   0   0   3
+##   2Bt2    1   2   1   1   0    0   0   0   5
+##   2Bt3    1   3   1   0   0    0   0   0   5
+##   2Bt4    2   0   0   0   0    0   0   0   2
+##   2CBt    0   0   0   1   0    0   0   0   1
+##   2Cr     0   0   0   0   0    0   0   0   0
+##   2Crt    0   0   0   0   0    0   0   0   0
+##   2R      0   0   0   0   0    0   0   0   0
+##   A       0   0  33   0   0    0  12   3  48
+##   A1      0   0   2   0   0    0   2   0   4
+##   A2      0   0   2   0   0    0   2   0   4
+##   A3      0   0   1   0   0    0   0   0   1
+##   AB      0   0   2   0   0    0   2   0   4
+##   BA      0   0  17   0   0    1   0   0  18
+##   BC      0   0   4   0   0    0   0   0   4
+##   BCt     0   2   2   1   0    0   1   0   6
+##   Bt      0   0   1   1   1    0   0   0   3
+##   Bt1     0   3  28   1   0    3   5   0  40
+##   Bt2     3  15  14   1   1    3   1   0  38
+##   Bt3     4   8   6   1   0    1   1   0  21
+##   Bt4     1   2   2   0   0    0   0   0   5
+##   Bw      0   0   6   0   0    0   1   0   7
+##   Bw1     0   0   5   0   0    0   0   0   5
+##   Bw2     0   0   5   0   0    0   0   0   5
+##   Bw3     0   0   3   0   0    0   0   0   3
+##   C       1   0   1   0   0    0   0   0   2
+##   C/Brt   0   1   0   0   0    0   0   0   1
+##   CBt     0   0   0   1   0    0   0   0   1
+##   Cr      0   0   1   0   0    0   0   0   1
+##   Crt     0   0   0   0   0    0   0   0   0
+##   Ct      0   0   1   0   0    0   0   0   1
+##   Oe      0   0   0   0   0    0   0   0   0
+##   Oi      0   0   0   0   0    0   0   0   0
+##   R       0   0   0   0   0    0   0   0   0
+##   Sum    13  37 141   8   2    8  27   3 239
@@ -1269,35 +1327,37 @@

2.7.1.1 table()

2.7.2 Missing Values

-
table(pedons$taxsubgrp, useNA = "ifany")
+
table(pedons$taxsubgrp, useNA = "ifany")
## 
 ## mollic haploxeralfs  typic haploxerepts  ultic haploxeralfs  ultic haploxerolls 
 ##                   1                   6                  44                   1
-
#  is.na(...) 
-table(is.na(pedons$taxsubgrp))
+
#  is.na(...) 
+table(is.na(pedons$taxsubgrp))
## 
 ## FALSE 
 ##    52
-
#  is NOT NA !is.na(...)
-table(!is.na(pedons$taxsubgrp))
+
#  is NOT NA !is.na(...)
+table(!is.na(pedons$taxsubgrp))
## 
 ## TRUE 
 ##   52
-
# it can also be applied to horizon level columns in the SPC
-sort(table(pedons$texture), decreasing=TRUE)
+
# it can also be applied to horizon level columns in the SPC
+sort(table(pedons$texture), decreasing=TRUE)
## 
-##        BR         L      GR-L     GRV-L     CBV-L       SPM     GRX-L       SIL    GRV-CL    CBV-CL 
-##        58        36        33        24        18        14        12        12         9         8 
-##    GR-SIL     CBX-L    GRX-CL    CBX-CL   GRV-SIL        CL   GRV-SCL     GRX-C       MPM        SL 
-##         7         6         5         4         4         3         3         3         3         3 
-##      CB-L     GR-CL   GRX-SCL     PGR-C    PGRX-L      SICL    STV-CL     STV-L     STX-C     STX-L 
-##         2         2         2         2         2         2         2         2         2         2 
-##         C      CB-C     CB-CL    CB-SCL    CB-SIL   CBV-SIL   CBX-SCL      CN-L   CN-SICL     CNX-L 
-##         1         1         1         1         1         1         1         1         1         1 
-##  CNX-SICL     FLV-L      GR-C    GR-SIC  GRV-SICL   GRX-SIC   GRX-SIL  PCB-SICL PCBV-SICL     PCN-C 
-##         1         1         1         1         1         1         1         1         1         1 
-##   PCNX-CL    PGRV-C   PGRV-CL  PGRX-SCL  PGRX-SIL      ST-L     STV-C    STX-CL  STX-SICL 
-##         1         1         1         1         1         1         1         1         1
+## BR L GR-L GRV-L CBV-L SPM GRX-L SIL GRV-CL +## 58 36 33 24 18 14 12 12 9 +## CBV-CL GR-SIL CBX-L GRX-CL CBX-CL GRV-SIL CL GRV-SCL GRX-C +## 8 7 6 5 4 4 3 3 3 +## MPM SL CB-L GR-CL GRX-SCL PGR-C PGRX-L SICL STV-CL +## 3 3 2 2 2 2 2 2 2 +## STV-L STX-C STX-L C CB-C CB-CL CB-SCL CB-SIL CBV-SIL +## 2 2 2 1 1 1 1 1 1 +## CBX-SCL CN-L CN-SICL CNX-L CNX-SICL FLV-L GR-C GR-SIC GRV-SICL +## 1 1 1 1 1 1 1 1 1 +## GRX-SIC GRX-SIL PCB-SICL PCBV-SICL PCN-C PCNX-CL PGRV-C PGRV-CL PGRX-SCL +## 1 1 1 1 1 1 1 1 1 +## PGRX-SIL ST-L STV-C STX-CL STX-SICL +## 1 1 1 1 1

2.7.3 Logical Operators

@@ -1328,15 +1388,15 @@

2.7.4 Pattern Matchingregular expression (REGEX) syntax.

This process can be applied to many different columns in the SPC based on how you need to filter the data. This example pattern matches on the tax_subgroup column, but another useful application might be to pattern match on geomorphology or parent material.

Say we want to see what the variation of particle size classes are within a specific subgroup? We can use grep() to create a row index, then apply that index to the SoilProfileCollection.

-
# create a numeric index for pedons with taxsubgroup containing 'typic'
-idx <- grep('typic', pedons$taxsubgrp)
-idx
+
# create a numeric index for pedons with taxsubgroup containing 'typic'
+idx <- grep('typic', pedons$taxsubgrp)
+idx
## [1] 11 12 13 14 26 50
-
# use square bracket notation to subset 'typic' soils in `subset1` object
-subset1 <- pedons[idx,]
-
-# or use the index directly to summarize taxpartsize for 'typic' soils
-sort(table(pedons$taxpartsize[idx]), decreasing = TRUE)
+
# use square bracket notation to subset 'typic' soils in `subset1` object
+subset1 <- pedons[idx,]
+
+# or use the index directly to summarize taxpartsize for 'typic' soils
+sort(table(pedons$taxpartsize[idx]), decreasing = TRUE)
## 
 ##  loamy-skeletal clayey-skeletal 
 ##               5               1
@@ -1344,13 +1404,13 @@

2.7.4 Pattern Matching
# adjust margins
-par(mar=c(1,0,0,1))
-
-# plot the first 10 profiles of subset1
-plot(subset1[1:10, ], label = 'taxsubgrp', max.depth = 60)
-title('Pedons with the word "typic" at subgroup-level of Soil Taxonomy', line=-2)

-

+
# adjust margins
+par(mar=c(1,0,0,1))
+
+# plot the first 10 profiles of subset1
+plot(subset1[1:10, ], label = 'taxsubgrp', max.depth = 60)
+title('Pedons with the word "typic" at subgroup-level of Soil Taxonomy', line=-2)
+

For more information on using regular expressions in grep() for pattern matching operations, see: Regular-expression-syntax.

Quick check: Compare or run these commands with some code, and review the documentation, to answer the questions.

    @@ -1413,37 +1473,37 @@

    2.7.5 Filtering2.7.5.1 Filtering with aqp::subset()

    We use the SoilProfileCollection subset method, where we first specify a data (pedons) object then we can write expressions for the columns that exist in that object.

    Here, we combine two logical expressions to find taxsubgrp containing "alfs" (Alfisols) with obsdate before January 1st, 2010.

    -
    subset2 <- subset(pedons, grepl("alfs", taxsubgrp) & obs_date < as.POSIXlt("2010-01-01"))
    -
    -# check taxonomic range of particle size classes in the data
    -# overwhelmingly these are described as loamy-skeletal ultic haploxeralfs
    -sort(table(subset2$taxsubgrp), decreasing = TRUE)
    +
    subset2 <- subset(pedons, grepl("alfs", taxsubgrp) & obs_date < as.POSIXlt("2010-01-01"))
    +
    +# check taxonomic range of particle size classes in the data
    +# overwhelmingly these are described as loamy-skeletal ultic haploxeralfs
    +sort(table(subset2$taxsubgrp), decreasing = TRUE)
    ## 
     ##  ultic haploxeralfs mollic haploxeralfs 
     ##                  28                   1
    -
    sort(table(subset2$taxpartsize), decreasing = TRUE)
    +
    sort(table(subset2$taxpartsize), decreasing = TRUE)
    ## 
     ##  loamy-skeletal clayey-skeletal            fine      fine-loamy 
     ##              25               2               1               1
    -
    # check year described and taxpartsize
    -table(subset2$taxpartsize, substr(subset2$obs_date, 0, 4))
    +
    # check year described and taxpartsize
    +table(subset2$taxpartsize, substr(subset2$obs_date, 0, 4))
    ##                  
     ##                   2007 2008 2009
     ##   clayey-skeletal    1    0    1
     ##   fine               1    0    0
     ##   fine-loamy         1    0    0
     ##   loamy-skeletal    19    1    5
    -
    # a double equal sign '==' is used for exact character or numeric criteria
    -subset3 <- subset(subset2, taxpartsize == 'loamy-skeletal')
    -
    -table(subset3$taxpartsize)
    +
    # a double equal sign '==' is used for exact character or numeric criteria
    +subset3 <- subset(subset2, taxpartsize == 'loamy-skeletal')
    +
    +table(subset3$taxpartsize)
    ## 
     ## loamy-skeletal 
     ##             25
    -
    par(mar = c(0, 0, 2, 1))
    -plotSPC(subset3[1:12, ], print.id = FALSE)
    -title('Loamy-skeletal Ultic Haploxeralfs')
    -

    +
    par(mar = c(0, 0, 2, 1))
    +plotSPC(subset3[1:12, ], print.id = FALSE)
    +title('Loamy-skeletal Ultic Haploxeralfs')
    +

@@ -1453,11 +1513,11 @@

2.7.6 Dates and Times
as.POSIXct(24*60*60, origin = "1970-01-01", tz = "UTC")

+
as.POSIXct(24*60*60, origin = "1970-01-01", tz = "UTC")
## [1] "1970-01-02 UTC"

By default the timezone will match your current timezone. Dates without times are treated as being at midnight UTC.

You can customize the timezone with tz argument:

-
as.POSIXlt("01/01/1970", tz = "UTC", format = "%m/%d/%Y")
+
as.POSIXlt("01/01/1970", tz = "UTC", format = "%m/%d/%Y")
## [1] "1970-01-01 UTC"

POSIXlt and POSIXct objects can be formatted with the format() function. strptime() can be used to parse character vectors into date/times. You use as.POSIXlt() with character input, and as.POSIXct() with numeric input.

R also has the Date class which can be used for formatting calendar dates. You can convert POSIXlt/POSIXct objects to Date with as.Date()

@@ -1509,29 +1569,29 @@

2.9 fetchNASIS() dat

2.9.1 Inspecting Results

Here we inspect occurrence of andic soil properties in MT647.

We will download this “selected set” from the course website as an .rda file to save you the effort of crafting your selected set just for this example.

-
example.data.dir <- "C:/workspace2"
-example.data.path <- file.path(example.data.dir, "mt647.rda")
-
-if (!dir.exists(example.data.dir))
-  dir.create(example.data.dir, recursive = TRUE)
-
-download.file("https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/book/02/mt647.rda", 
-              destfile = example.data.path)
+
example.data.dir <- "C:/workspace2"
+example.data.path <- file.path(example.data.dir, "mt647.rda")
+
+if (!dir.exists(example.data.dir))
+  dir.create(example.data.dir, recursive = TRUE)
+
+download.file("https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/book/02/mt647.rda", 
+              destfile = example.data.path)

Downloading and installing the above .rda is equivalent to doing NASIS query “_PedonPC_Plus_DataDump_select” (MLRA04 Bozeman) for to fill your selected set for User Site ID: %MT647%, NASIS Site: MLRA04%, NASIS Group: 4-MIS Pedons, followed by mt647 <- fetchNASIS().

2.9.1.1 Load Example Data

To load the sample object data into R, just use load() and the path to the .rda file (example.data.path or "C:/workspace2/mt647.rda")

-
# load the sample data
-example.data.dir <- "C:/workspace2"
-load(file.path(example.data.dir, "mt647.rda"))
-
length(mt647)
+
# load the sample data
+example.data.dir <- "C:/workspace2"
+load(file.path(example.data.dir, "mt647.rda"))
+
length(mt647)
## [1] 481
-
table(site(mt647)$andic.soil.properties, useNA = "ifany")
+
table(site(mt647)$andic.soil.properties, useNA = "ifany")
## 
 ## FALSE  TRUE  <NA> 
 ##     2    83   396
-
# get just the profiles with andic.soil.properties == TRUE
-subset(mt647, andic.soil.properties)
+
# get just the profiles with andic.soil.properties == TRUE
+subset(mt647, andic.soil.properties)
## SoilProfileCollection with 83 profiles and 446 horizons
 ## profile ID: peiid  |  horizon ID: phiid 
 ## Depth range: 20 - 305 cm
@@ -1547,13 +1607,20 @@ 

2.9.1.1 Load Example Data2.9.1.1 Load Example DataWe can compare this to what we see in the NASIS Pedon Diagnostic Features table:

Any profiles that have have logic errors detected are stored in soilDB.env bad.pedon.ids variable after you run fetchNASIS. If this variable does not exist either you have not run fetchNASIS() in the current session or there are no errors

-
# these are the troublesome user pedon IDs
-get('bad.pedon.ids', envir = soilDB.env)
+
# these are the troublesome user pedon IDs
+get('bad.pedon.ids', envir = soilDB.env)
##  [1] "P93-037" "P90-008" "P90-004" "P90-009" "P93-058" "P93-059" "P90-025" "P90-002" "P90-012"
 ## [10] "P92-001" "P92-085" "P91-105" "P90-019" "P75-006" "P90-010" "P90-015" "P91-094" "P92-029"
 ## [19] "P92-076" "P93-026" "P93-035" "P93-063" "P93-064" "P93-041" "P93-043" "P93-044" "P93-083"
 ## [28] "P93-112" "P93-113" "P93-124" "P93-001" "P96-007" "P91-025" "P93-078" "P92-044" "P91-112"
 ## [37] "P92-038" "P90-018" "P93-057" "P93-084" "P91-059" "P90-016" "P92-063" "P92-048" "P93-052"
 ## [46] "F01-230" "F95-420" "F95-114" "F96-205"
-
# by default, rmHzErrors removes the "bad" illogical pedons
-any(mt647$pedon_id %in% get('bad.pedon.ids', envir=soilDB.env))
+
# by default, rmHzErrors removes the "bad" illogical pedons
+any(mt647$pedon_id %in% get('bad.pedon.ids', envir=soilDB.env))
## [1] FALSE

When fetchNASIS(..., rmHzErrors = TRUE) (the default), any horizons that were omitted from the SoilProfileCollection will be stored in the bad.horizons variable in the soilDB.env environment.

-
head(get('bad.horizons', envir=soilDB.env))
+
head(get('bad.horizons', envir=soilDB.env))
##      peiid   phiid pedon_id hzname hzdept hzdepb
 ## 67  868038 4270407  F01-230     Bw     NA     NA
 ## 68  868038 4270406  F01-230      E     NA     NA
@@ -1593,10 +1660,10 @@ 

2.9.1.2 Logic Checks for the

overlaps and gaps ("overlapOrGap")

  • missing depths ("missingDepth")

  • -
    logic_tests <- checkHzDepthLogic(mt647err)
    -
    -# look at first few (look OK; valid == TRUE)
    -head(logic_tests)
    +
    logic_tests <- checkHzDepthLogic(mt647err)
    +
    +# look at first few (look OK; valid == TRUE)
    +head(logic_tests)
    ##    peiid valid depthLogic sameDepth missingDepth overlapOrGap
     ## 1 828138  TRUE      FALSE     FALSE        FALSE        FALSE
     ## 2 828139  TRUE      FALSE     FALSE        FALSE        FALSE
    @@ -1604,8 +1671,8 @@ 

    2.9.1.2 Logic Checks for the

    -
    # these all have overlapOrGap errors
    -head(logic_tests[!logic_tests$valid, ])
    +
    # these all have overlapOrGap errors
    +head(logic_tests[!logic_tests$valid, ])
    ##     peiid valid depthLogic sameDepth missingDepth overlapOrGap
     ## 11 828148 FALSE      FALSE     FALSE        FALSE         TRUE
     ## 23 828160 FALSE      FALSE     FALSE        FALSE         TRUE
    @@ -1613,11 +1680,11 @@ 

    2.9.1.2 Logic Checks for the

    -
    # join the logic test data into the site table
    -site(mt647err) <- logic_tests
    +
    # join the logic test data into the site table
    +site(mt647err) <- logic_tests

    Use the $valid vector in result to filter out profiles with missing depths (logic_tests$valid == FALSE)

    -
    bad.profiles <- subset(mt647err, !valid)
    -bad.profiles
    +
    bad.profiles <- subset(mt647err, !valid)
    +bad.profiles
    ## SoilProfileCollection with 49 profiles and 338 horizons
     ## profile ID: peiid  |  horizon ID: phiid 
     ## Depth range: 15 - 152 cm
    @@ -1633,20 +1700,27 @@ 

    2.9.1.2 Logic Checks for the

    And also filter out the valid ones (logic_tests$valid == TRUE)

    -
    good.profiles <- subset(mt647err, logic_tests$valid)
    -good.profiles
    +
    good.profiles <- subset(mt647err, logic_tests$valid)
    +good.profiles
    ## SoilProfileCollection with 481 profiles and 2536 horizons
     ## profile ID: peiid  |  horizon ID: phiid 
     ## Depth range: 14 - 1552 cm
    @@ -1662,13 +1736,20 @@ 

    2.9.1.2 Logic Checks for the 2.10.1 Elements of get_exte

    2.10.2 Load Example Data

    Below is a summary of additional information that can be readily brought into R from your NASIS selected set via the get_extended_data_from_NASIS() function.

    To download the sample 2015MT663% data from the course page with R:

    -
    example.data.dir <- "C:/workspace2"
    -example.data.path <- file.path(example.data.dir, "mt663.rda")
    -
    -if (!dir.exists(example.data.dir))
    -  dir.create(example.data.dir, recursive = TRUE)
    -
    -download.file("https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/book/02/mt663.rda", 
    -              destfile = example.data.path)
    +
    example.data.dir <- "C:/workspace2"
    +example.data.path <- file.path(example.data.dir, "mt663.rda")
    +
    +if (!dir.exists(example.data.dir))
    +  dir.create(example.data.dir, recursive = TRUE)
    +
    +download.file("https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/book/02/mt663.rda", 
    +              destfile = example.data.path)

    Before continuing, imagine opening the NASIS client, populating your selected set with 2015MT663% using a query like “NSSC Pangaea – POINT-Pedon/Site by User Pedon ID

    Load the data like we did above.

    -
    # load the sample data
    -example.data.dir <- "C:/workspace2"
    -load(file.path(example.data.dir, "mt663.rda"))
    -
    ## fetch extended site and horizon data from local NASIS
    -# mt663ext <- get_extended_data_from_NASIS_db()
    +
    # load the sample data
    +example.data.dir <- "C:/workspace2"
    +load(file.path(example.data.dir, "mt663.rda"))
    +
    ## fetch extended site and horizon data from local NASIS
    +# mt663ext <- get_extended_data_from_NASIS_db()

    We could use the get_extended_data_from_NASIS_db() function if 2015MT663% or other data were in the selected set, but we will use the mt663ext data we loaded from the .rda file.

    The column names are the names of variables that you could join to your site or horizon data by various means. Generally these variable names, with a few exceptions, mirror the NASIS 7 data model names.

    -
    # site and pedon related extended data
    -
    -# list all dataframes in the extended data
    -str(mt663ext, 1)
    +
    # site and pedon related extended data
    +
    +# list all dataframes in the extended data
    +str(mt663ext, 1)
    ## List of 14
     ##  $ ecositehistory   :'data.frame':   0 obs. of  5 variables:
     ##  $ diagnostic       :'data.frame':   292 obs. of  4 variables:
    @@ -1742,67 +1823,68 @@ 

    2.10.2 Load Example Data
    # vegetation data summary
    -colnames(mt663ext$ecositehistory) 

    +
    # vegetation data summary
    +colnames(mt663ext$ecositehistory) 
    ## [1] "siteiid"         "ecositeid"       "ecositenm"       "ecositecorrdate" "es_classifier"
    -
    # diagnostic features
    -colnames(mt663ext$diagnostic) 
    +
    # diagnostic features
    +colnames(mt663ext$diagnostic) 
    ## [1] "peiid"    "featkind" "featdept" "featdepb"
    -
    # surface rock fragments
    -colnames(mt663ext$surf_frag_summary)
    +
    # surface rock fragments
    +colnames(mt663ext$surf_frag_summary)
    ##  [1] "peiid"               "surface_fgravel"     "surface_gravel"      "surface_cobbles"    
     ##  [5] "surface_stones"      "surface_boulders"    "surface_channers"    "surface_flagstones" 
     ##  [9] "surface_paragravel"  "surface_paracobbles"
    -
    # geomorphic description
    -colnames(mt663ext$geomorph)
    +
    # geomorphic description
    +colnames(mt663ext$geomorph)
    ## [1] "peiid"          "geomicrorelief" "geommicelev"    "geomfmod"       "geomfname"     
     ## [6] "geomfeatid"     "existsonfeat"   "geomfiidref"    "geomftname"
    -
    # taxonomic history data
    -colnames(mt663ext$taxhistory)
    +
    # taxonomic history data
    +colnames(mt663ext$taxhistory)
    ##  [1] "peiid"          "classdate"      "classifier"     "classtype"      "taxonname"     
     ##  [6] "localphase"     "taxonkind"      "seriesstatus"   "taxclname"      "taxpartsize"   
     ## [11] "taxorder"       "taxsuborder"    "taxgrtgroup"    "taxsubgrp"      "soiltaxedition"
     ## [16] "osdtypelocflag" "taxmoistcl"     "taxtempregime"  "taxfamother"    "psctopdepth"   
     ## [21] "pscbotdepth"
    -
    # linked photo stored in site textnotes
    -colnames(mt663ext$photo) 
    +
    # linked photo stored in site textnotes
    +colnames(mt663ext$photo) 
    ## [1] "siteiid"   "recdate"   "textcat"   "imagepath"
    -
    # site parent materials
    -colnames(mt663ext$pm)
    +
    # site parent materials
    +colnames(mt663ext$pm)
    ##  [1] "siteiid"      "seqnum"       "pmorder"      "pmdept"       "pmdepb"       "pmmodifier"  
     ##  [7] "pmgenmod"     "pmkind"       "pmorigin"     "pmweathering"
    -
    ###
    -### horizon related extended data
    -### 
    -
    -# rock fragments 
    -colnames(mt663ext$frag_summary) 
    +
    ###
    +### horizon related extended data
    +### 
    +
    +# rock fragments 
    +colnames(mt663ext$frag_summary) 
    ##  [1] "phiid"                "fine_gravel"          "gravel"               "cobbles"             
     ##  [5] "stones"               "boulders"             "channers"             "flagstones"          
     ##  [9] "parafine_gravel"      "paragravel"           "paracobbles"          "parastones"          
     ## [13] "paraboulders"         "parachanners"         "paraflagstones"       "unspecified"         
     ## [17] "total_frags_pct_nopf" "total_frags_pct"
    -
    # soil texture modifers
    -colnames(mt663ext$texmodifier)
    +
    # soil texture modifers
    +colnames(mt663ext$texmodifier)
    ## [1] "peiid"  "phiid"  "phtiid" "seqnum" "texmod"
    -
    # soil structure data
    -colnames(mt663ext$struct) 
    -
    ## [1] "phiid"         "structgrade"   "structsize"    "structtype"    "structid"      "structpartsto"
    +
    # soil structure data
    +colnames(mt663ext$struct) 
    +
    ## [1] "phiid"         "structgrade"   "structsize"    "structtype"    "structid"     
    +## [6] "structpartsto"

    2.10.3 Visualizing Common Landforms

    The following code generates a simple graphical summary of the 10 most commonly occurring "landform_string" (a calculated field in fetchNASIS()) to inspect which are the most common.

    -
    # load data from a NASIS selected set (or sample object)
    -pedons <- mt663
    -
    -# create 'lf' object of landform factors sorted in descending order
    -lf <- sort(table(pedons$landform_string), decreasing = TRUE)
    -
    -# plot top 10 or length, whichever is shorter
    -Hmisc::dotchart2(lf[1:pmin(10, length(lf))],
    -                 col = 'black',
    -                 xlim = c(0, max(lf)),
    -                 cex.labels = 0.75)
    +
    # load data from a NASIS selected set (or sample object)
    +pedons <- mt663
    +
    +# create 'lf' object of landform factors sorted in descending order
    +lf <- sort(table(pedons$landform_string), decreasing = TRUE)
    +
    +# plot top 10 or length, whichever is shorter
    +Hmisc::dotchart2(lf[1:pmin(10, length(lf))],
    +                 col = 'black',
    +                 xlim = c(0, max(lf)),
    +                 cex.labels = 0.75)

    For a challenge and to further inspect your own data try the above code with some other summaries of geomorphic data produced by fetchNASIS().

    You can swap landform_string for: landscape_string (landscape), hillslopeprof (2D), geomposmntn, geomposhill, geompostrce, geomposflats (3D), slope_shape, shapeacross, shapedown (slope shape across/down), microfeature_string (microfeature), or geomicrorelief_string (site observation microrelief).

    @@ -1817,11 +1899,11 @@

    2.10.4.1 Boolean Diagnostic Featu

    2.10.4.2 Thickness from Diagnostic Features Table

    The following is an example of how you could use the diagnostic features (if populated!) from the extended data to determine the thickness of a diagnostic feature of interest.

    -
    # get diagnostic features associated with pedons loaded from selected set
    -d <- diagnostic_hz(mt663)
    -
    -# summary of the diagnostic features in your data!
    -unique(d$featkind)
    +
    # get diagnostic features associated with pedons loaded from selected set
    +d <- diagnostic_hz(mt663)
    +
    +# summary of the diagnostic features in your data!
    +unique(d$featkind)
    ##  [1] "ochric epipedon"          "cambic horizon"           "lithic contact"          
     ##  [4] "mollic epipedon"          "argillic horizon"         "redox concentrations"    
     ##  [7] "andic soil properties"    "secondary carbonates"     "sapric soil materials"   
    @@ -1829,34 +1911,38 @@ 

    2.10.4.2 Thickness from Diagnosti ## [13] "spodic horizon" "glossic horizon" "spodic materials" ## [16] "lithologic discontinuity" "densic materials" "umbric epipedon" ## [19] "albic materials" NA

    -
    # tabulate
    -sort(table(droplevels(factor(d$featkind))), decreasing = TRUE)
    +
    # tabulate
    +sort(table(droplevels(factor(d$featkind))), decreasing = TRUE)
    ## 
    -##          ochric epipedon           cambic horizon         argillic horizon          mollic epipedon 
    -##                       61                       54                       43                       42 
    -##    andic soil properties           lithic contact     secondary carbonates          umbric epipedon 
    -##                       30                       20                        7                        7 
    -##            albic horizon           spodic horizon          glossic horizon           reduced matrix 
    -##                        6                        4                        3                        3 
    -##    sapric soil materials lithologic discontinuity          albic materials         aquic conditions 
    -##                        3                        2                        1                        1 
    -##         densic materials     redox concentrations         spodic materials 
    -##                        1                        1                        1
    -
    # subset argillic horizons
    -d <- d[d$featkind == 'argillic horizon', ]
    -
    -# create a new column and subtract the upper from the lower depth
    -d$argillic_thickness_cm <- d$featdepb - d$featdept
    -
    -# create another new column with the upper depth to the diagnostic feature
    -d$depth_to_argillic_cm <- d$featdept
    -
    -# omit NA values
    -d <- na.omit(d)
    -
    -# subset to pedon records IDs and calculated thickness
    -d <- d[, c('peiid', 'argillic_thickness_cm', 'depth_to_argillic_cm')]
    -head(d)
    +## ochric epipedon cambic horizon argillic horizon +## 61 54 43 +## mollic epipedon andic soil properties lithic contact +## 42 30 20 +## secondary carbonates umbric epipedon albic horizon +## 7 7 6 +## spodic horizon glossic horizon reduced matrix +## 4 3 3 +## sapric soil materials lithologic discontinuity albic materials +## 3 2 1 +## aquic conditions densic materials redox concentrations +## 1 1 1 +## spodic materials +## 1
    +
    # subset argillic horizons
    +d <- d[d$featkind == 'argillic horizon', ]
    +
    +# create a new column and subtract the upper from the lower depth
    +d$argillic_thickness_cm <- d$featdepb - d$featdept
    +
    +# create another new column with the upper depth to the diagnostic feature
    +d$depth_to_argillic_cm <- d$featdept
    +
    +# omit NA values
    +d <- na.omit(d)
    +
    +# subset to pedon records IDs and calculated thickness
    +d <- d[, c('peiid', 'argillic_thickness_cm', 'depth_to_argillic_cm')]
    +head(d)
    ##      peiid argillic_thickness_cm depth_to_argillic_cm
     ## 7  1092610                    56                   30
     ## 24 1092617                    38                   34
    @@ -1864,58 +1950,58 @@ 

    2.10.4.2 Thickness from Diagnosti ## 28 1092619 38 32 ## 30 1092620 29 24 ## 33 1092621 23 19

    -
    # left-join with existing site data
    -site(mt663) <- d
    -
    -# plot as histogram
    -par(mar = c(4.5, 4.5, 1, 1))
    -
    -# note additional arguments to adjust figure labels
    -hist(
    -  mt663$argillic_thickness_cm, 
    -  xlab = 'Thickness of argillic (cm)', 
    -  main = '',
    -  las = 1
    -)
    -

    -
    hist(
    -  mt663$depth_to_argillic_cm, 
    -  xlab = 'Depth to argillic top depth (cm)', 
    -  main = '',
    -  las = 1
    -)
    -

    +
    # left-join with existing site data
    +site(mt663) <- d
    +
    +# plot as histogram
    +par(mar = c(4.5, 4.5, 1, 1))
    +
    +# note additional arguments to adjust figure labels
    +hist(
    +  mt663$argillic_thickness_cm, 
    +  xlab = 'Thickness of argillic (cm)', 
    +  main = '',
    +  las = 1
    +)
    +

    +
    hist(
    +  mt663$depth_to_argillic_cm, 
    +  xlab = 'Depth to argillic top depth (cm)', 
    +  main = '',
    +  las = 1
    +)
    +

    Quick check: What can you do with the boolean diagnostic feature data stored in the site table of a fetchNASIS SoilProfileCollection?

    2.10.4.3 Diagnostic Feature Diagrams

    -
    # work up diagnostic plot based on the mt663 dataset loaded above
    -library(aqp)
    -library(soilDB)
    -library(sharpshootR)
    -
    -# can limit which diagnostic features to show by setting 'v' manually
    -v <- c('ochric.epipedon', 'mollic.epipedon', 'andic.soil.properties', 
    -       'argillic.horizon', 'cambic.horizon', 
    -       'lithic.contact')
    -
    -# the default concatenated landform_string may have multiple levels
    -#  depending on how the geomorphic tables were populated
    -#  these are concatenated using the ampersand (&) character
    -#  so take the first string split using ampersand as a delimiter
    -mt663$first_landform <- sapply(strsplit(mt663$landform_string, "&"), 
    -                                  function(x) x[[1]])
    -
    -# plot with diagnostic features ordered according to co-occurrence
    -# v: site-level attributes to consider
    -# k: number of clusters to identify
    -diagnosticPropertyPlot(
    -  mt663[1:30, ], v = v, k = 5,
    -  grid.label = 'site_id', 
    -  dend.label = 'first_landform', 
    -  sort.vars = TRUE
    -)
    -

    +
    # work up diagnostic plot based on the mt663 dataset loaded above
    +library(aqp)
    +library(soilDB)
    +library(sharpshootR)
    +
    +# can limit which diagnostic features to show by setting 'v' manually
    +v <- c('ochric.epipedon', 'mollic.epipedon', 'andic.soil.properties', 
    +       'argillic.horizon', 'cambic.horizon', 
    +       'lithic.contact')
    +
    +# the default concatenated landform_string may have multiple levels
    +#  depending on how the geomorphic tables were populated
    +#  these are concatenated using the ampersand (&) character
    +#  so take the first string split using ampersand as a delimiter
    +mt663$first_landform <- sapply(strsplit(mt663$landform_string, "&"), 
    +                                  function(x) x[[1]])
    +
    +# plot with diagnostic features ordered according to co-occurrence
    +# v: site-level attributes to consider
    +# k: number of clusters to identify
    +diagnosticPropertyPlot(
    +  mt663[1:30, ], v = v, k = 5,
    +  grid.label = 'site_id', 
    +  dend.label = 'first_landform', 
    +  sort.vars = TRUE
    +)
    +

    @@ -1923,35 +2009,35 @@

    2.10.4.3 Diagnostic Feature Diagr

    2.11 Exercise 3: Diagnostic Horizons in Your Own Data

    Use the following script to generate a diagnostic-feature diagram for the pedon data you’ve loaded from your NASIS selected set. Alternately, you may use the MT663 data from the example above, just substitute the object mt663 for f.

    You can select a subset of desired diagnostic properties or use all diagnostic feature columns.

    -
    library(aqp)
    -library(soilDB)
    -library(sharpshootR)
    -
    -# Load data
    -f <- fetchNASIS(from = 'pedons')
    -
    -# ... May need to use subset() to reduce the number of pedons!
    -
    -# get all diagnostic feature columns from site data 
    -# by pattern matching on '[.]' in the site attribute names
    -# this is not a generic rule, but works here
    -idx <- grep('[.]', siteNames(f))
    -v <- siteNames(f)[idx]
    -
    -# inspect v
    -v
    -
    -# insert diagnostics of interest from the possible list in 'v'
    -v <- c('ochric.epipedon', 'cambic.horizon', 
    -       'argillic.horizon', 'paralithic.contact',
    -       'lithic.contact')
    -
    -# generate diagnostic property diagram
    -diagnosticPropertyPlot(
    -  f, v = v, k = 5, 
    -  grid.label = 'site_id', 
    -  dend.label = 'taxonname'
    -)
    +
    library(aqp)
    +library(soilDB)
    +library(sharpshootR)
    +
    +# Load data
    +f <- fetchNASIS(from = 'pedons')
    +
    +# ... May need to use subset() to reduce the number of pedons!
    +
    +# get all diagnostic feature columns from site data 
    +# by pattern matching on '[.]' in the site attribute names
    +# this is not a generic rule, but works here
    +idx <- grep('[.]', siteNames(f))
    +v <- siteNames(f)[idx]
    +
    +# inspect v
    +v
    +
    +# insert diagnostics of interest from the possible list in 'v'
    +v <- c('ochric.epipedon', 'cambic.horizon', 
    +       'argillic.horizon', 'paralithic.contact',
    +       'lithic.contact')
    +
    +# generate diagnostic property diagram
    +diagnosticPropertyPlot(
    +  f, v = v, k = 5, 
    +  grid.label = 'site_id', 
    +  dend.label = 'taxonname'
    +)

    For more information on generating diagnostic feature diagrams, see the following tutorial: