+
+
+
+
+
+Exercise 1
+
+
#install.packages("R.utils")
+
+library (R.utils)
+
+
Loading required package: R.oo
+
+
+
Loading required package: R.methodsS3
+
+
+
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
+
+
+
R.oo v1.26.0 (2024-01-24 05:12:50 UTC) successfully loaded. See ?R.oo for help.
+
+
+
+Attaching package: 'R.oo'
+
+
+
The following object is masked from 'package:R.methodsS3':
+
+ throw
+
+
+
The following objects are masked from 'package:methods':
+
+ getClasses, getMethods
+
+
+
The following objects are masked from 'package:base':
+
+ attach, detach, load, save
+
+
+
R.utils v2.12.3 (2023-11-18 01:00:02 UTC) successfully loaded. See ?R.utils for help.
+
+
+
+Attaching package: 'R.utils'
+
+
+
The following object is masked from 'package:utils':
+
+ timestamp
+
+
+
The following objects are masked from 'package:base':
+
+ cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings
+
+
download.file (
+ "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz" ,
+ destfile = file.path ("~" , "Downloads" , "met_all.gz" ),
+ method = "libcurl" ,
+ timeout = 60
+ )
+
+ met <- data.table:: fread (file.path ("~" , "Downloads" , "met_all.gz" ))
+ met <- as.data.frame (met)
+
+
+
+Exercise 2
+
+There are 2,377,343 rows and 30 columns.
+
+
+Exercise 3
+
+
+
+
'data.frame': 2377343 obs. of 30 variables:
+ $ USAFID : int 690150 690150 690150 690150 690150 690150 690150 690150 690150 690150 ...
+ $ WBAN : int 93121 93121 93121 93121 93121 93121 93121 93121 93121 93121 ...
+ $ year : int 2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
+ $ month : int 8 8 8 8 8 8 8 8 8 8 ...
+ $ day : int 1 1 1 1 1 1 1 1 1 1 ...
+ $ hour : int 0 1 2 3 4 5 6 7 8 9 ...
+ $ min : int 56 56 56 56 56 56 56 56 56 56 ...
+ $ lat : num 34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 ...
+ $ lon : num -116 -116 -116 -116 -116 ...
+ $ elev : int 696 696 696 696 696 696 696 696 696 696 ...
+ $ wind.dir : int 220 230 230 210 120 NA 320 10 320 350 ...
+ $ wind.dir.qc : chr "5" "5" "5" "5" ...
+ $ wind.type.code : chr "N" "N" "N" "N" ...
+ $ wind.sp : num 5.7 8.2 6.7 5.1 2.1 0 1.5 2.1 2.6 1.5 ...
+ $ wind.sp.qc : chr "5" "5" "5" "5" ...
+ $ ceiling.ht : int 22000 22000 22000 22000 22000 22000 22000 22000 22000 22000 ...
+ $ ceiling.ht.qc : int 5 5 5 5 5 5 5 5 5 5 ...
+ $ ceiling.ht.method: chr "9" "9" "9" "9" ...
+ $ sky.cond : chr "N" "N" "N" "N" ...
+ $ vis.dist : int 16093 16093 16093 16093 16093 16093 16093 16093 16093 16093 ...
+ $ vis.dist.qc : chr "5" "5" "5" "5" ...
+ $ vis.var : chr "N" "N" "N" "N" ...
+ $ vis.var.qc : chr "5" "5" "5" "5" ...
+ $ temp : num 37.2 35.6 34.4 33.3 32.8 31.1 29.4 28.9 27.2 26.7 ...
+ $ temp.qc : chr "5" "5" "5" "5" ...
+ $ dew.point : num 10.6 10.6 7.2 5 5 5.6 6.1 6.7 7.8 7.8 ...
+ $ dew.point.qc : chr "5" "5" "5" "5" ...
+ $ atm.press : num 1010 1010 1011 1012 1013 ...
+ $ atm.press.qc : int 5 5 5 5 5 5 5 5 5 5 ...
+ $ rh : num 19.9 21.8 18.5 16.9 17.4 ...
+
+
+Variables of interest are: year, day, hour, elev, temp, and wind.sp.
+
+
+Exercise 4
+
+
+
+
+
+
+ 1 2 3 4 5 6 7 8 9 10 11 12 13
+75975 75923 76915 76594 76332 76734 77677 77766 75366 75450 76187 75052 76906
+ 14 15 16 17 18 19 20 21 22 23 24 25 26
+77852 76217 78015 78219 79191 76709 75527 75786 78312 77413 76965 76806 79114
+ 27 28 29 30 31
+79789 77059 71712 74931 74849
+
+
+
+
+ 0 1 2 3 4 5 6 7 8 9 10
+ 99434 93482 93770 96703 110504 112128 106235 101985 100310 102915 101880
+ 11 12 13 14 15 16 17 18 19 20 21
+100470 103605 97004 96507 97635 94942 94184 100179 94604 94928 96070
+ 22 23
+ 94046 93823
+
+
+
+
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
+ -40.00 19.60 23.50 23.59 27.80 56.00 60089
+
+
+
+
Min. 1st Qu. Median Mean 3rd Qu. Max.
+ -13.0 101.0 252.0 415.8 400.0 9999.0
+
+
+
+
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
+ 0.00 0.00 2.10 2.46 3.60 36.00 79693
+
+
met[met$ elev== 9999.0 , "elev" ] <- NA
+summary (met$ elev)
+
+
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
+ -13 101 252 413 400 4113 710
+
+
met <- met[met$ temp > - 40 , ]
+head (met[order (met$ temp), ])
+
+
USAFID WBAN year month day hour min lat lon elev wind.dir
+1203053 722817 3068 2019 8 1 0 56 38.767 -104.3 1838 190
+1203055 722817 3068 2019 8 1 1 56 38.767 -104.3 1838 180
+1203128 722817 3068 2019 8 3 11 56 38.767 -104.3 1838 NA
+1203129 722817 3068 2019 8 3 12 56 38.767 -104.3 1838 NA
+1203222 722817 3068 2019 8 6 21 56 38.767 -104.3 1838 280
+1203225 722817 3068 2019 8 6 22 56 38.767 -104.3 1838 240
+ wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
+1203053 5 N 7.2 5 NA 9
+1203055 5 N 7.7 5 NA 9
+1203128 9 C 0.0 5 NA 9
+1203129 9 C 0.0 5 NA 9
+1203222 5 N 2.6 5 NA 9
+1203225 5 N 7.7 5 NA 9
+ ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
+1203053 9 N NA 9 N 5
+1203055 9 N NA 9 N 5
+1203128 9 N NA 9 N 5
+1203129 9 N NA 9 N 5
+1203222 9 N NA 9 N 5
+1203225 9 N NA 9 N 5
+ temp temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
+1203053 -17.2 5 NA 9 NA 9 NA
+1203055 -17.2 5 NA 9 NA 9 NA
+1203128 -17.2 5 NA 9 NA 9 NA
+1203129 -17.2 5 NA 9 NA 9 NA
+1203222 -17.2 5 NA 9 NA 9 NA
+1203225 -17.2 5 NA 9 NA 9 NA
+
+
+There are 710 missing values in the wind speed variable.
+
+
+Exercise 5
+The suspicious temperature value of -17.2C has a location with a latitude of 38.767 and a longitude of -104.3. This is located in Yoder, Colorado.
+It does not seem reasonable that Yoder, Colorado would have a temperature reading of -17.2C in August.
+This location is near the USAF Academy Bullseye Auxiliary Airfield in Yoder, Colorado, which has an elevation of approximately 6,036 ft (1,840 m) . Therefore, the range of elevations make sense (-13m to 4113m).
+
+
+Exercise 6
+
+
elev <- met[which (met$ elev == max (met$ elev, na.rm = TRUE )), ]
+summary (elev)
+
+
USAFID WBAN year month day
+ Min. :720385 Min. :419 Min. :2019 Min. :8 Min. : 1.0
+ 1st Qu.:720385 1st Qu.:419 1st Qu.:2019 1st Qu.:8 1st Qu.: 8.0
+ Median :720385 Median :419 Median :2019 Median :8 Median :16.0
+ Mean :720385 Mean :419 Mean :2019 Mean :8 Mean :16.1
+ 3rd Qu.:720385 3rd Qu.:419 3rd Qu.:2019 3rd Qu.:8 3rd Qu.:24.0
+ Max. :720385 Max. :419 Max. :2019 Max. :8 Max. :31.0
+
+ hour min lat lon elev
+ Min. : 0.00 Min. : 6.00 Min. :39.8 Min. :-105.8 Min. :4113
+ 1st Qu.: 6.00 1st Qu.:13.00 1st Qu.:39.8 1st Qu.:-105.8 1st Qu.:4113
+ Median :12.00 Median :36.00 Median :39.8 Median :-105.8 Median :4113
+ Mean :11.66 Mean :34.38 Mean :39.8 Mean :-105.8 Mean :4113
+ 3rd Qu.:18.00 3rd Qu.:53.00 3rd Qu.:39.8 3rd Qu.:-105.8 3rd Qu.:4113
+ Max. :23.00 Max. :59.00 Max. :39.8 Max. :-105.8 Max. :4113
+
+ wind.dir wind.dir.qc wind.type.code wind.sp
+ Min. : 10.0 Length:2117 Length:2117 Min. : 0.000
+ 1st Qu.:250.0 Class :character Class :character 1st Qu.: 4.100
+ Median :300.0 Mode :character Mode :character Median : 6.700
+ Mean :261.5 Mean : 7.245
+ 3rd Qu.:310.0 3rd Qu.: 9.800
+ Max. :360.0 Max. :21.100
+ NA's :237 NA's :168
+ wind.sp.qc ceiling.ht ceiling.ht.qc ceiling.ht.method
+ Length:2117 Min. : 30 Min. :5.000 Length:2117
+ Class :character 1st Qu.: 2591 1st Qu.:5.000 Class :character
+ Mode :character Median :22000 Median :5.000 Mode :character
+ Mean :15145 Mean :5.008
+ 3rd Qu.:22000 3rd Qu.:5.000
+ Max. :22000 Max. :9.000
+ NA's :4
+ sky.cond vis.dist vis.dist.qc vis.var
+ Length:2117 Min. : 0 Length:2117 Length:2117
+ Class :character 1st Qu.:16093 Class :character Class :character
+ Mode :character Median :16093 Mode :character Mode :character
+ Mean :15913
+ 3rd Qu.:16093
+ Max. :16093
+ NA's :683
+ vis.var.qc temp temp.qc dew.point
+ Length:2117 Min. : 1.00 Length:2117 Min. :-6.0000
+ Class :character 1st Qu.: 6.00 Class :character 1st Qu.: 0.0000
+ Mode :character Median : 8.00 Mode :character Median : 0.0000
+ Mean : 8.13 Mean : 0.8729
+ 3rd Qu.:10.00 3rd Qu.: 2.0000
+ Max. :15.00 Max. : 7.0000
+
+ dew.point.qc atm.press atm.press.qc rh
+ Length:2117 Min. : NA Min. :9 Min. :53.63
+ Class :character 1st Qu.: NA 1st Qu.:9 1st Qu.:58.10
+ Mode :character Median : NA Median :9 Median :61.39
+ Mean :NaN Mean :9 Mean :60.62
+ 3rd Qu.: NA 3rd Qu.:9 3rd Qu.:61.85
+ Max. : NA Max. :9 Max. :70.01
+ NA's :2117
+
+
cor (elev$ temp, elev$ wind.sp, use= "complete" )
+
+
cor (elev$ temp, elev$ hour, use= "complete" )
+
+
cor (elev$ wind.sp, elev$ day, use= "complete" )
+
+
cor (elev$ wind.sp, elev$ hour, use= "complete" )
+
+
cor (elev$ temp, elev$ day, use= "complete" )
+
+
+
+
+Exercise 7
+
+
#install.packages("leaflet")
+
+library (leaflet)
+
+leaflet (elev) %>%
+ addProviderTiles ('OpenStreetMap' ) %>%
+ addCircles (lat= ~ lat,lng= ~ lon, opacity= 1 , fillOpacity= 1 , radius= 100 )
+
+
+
+
+Attaching package: 'lubridate'
+
+
+
The following objects are masked from 'package:base':
+
+ date, intersect, setdiff, union
+
+
elev$ date <- with (elev, ymd_h (paste (year, month, day, hour, sep= ' ' )))
+summary (elev$ date)
+
+
Min. 1st Qu.
+"2019-08-01 00:00:00.0000" "2019-08-08 11:00:00.0000"
+ Median Mean
+"2019-08-16 22:00:00.0000" "2019-08-16 14:09:56.8823"
+ 3rd Qu. Max.
+"2019-08-24 11:00:00.0000" "2019-08-31 22:00:00.0000"
+
+
elev <- elev[order (elev$ date), ]
+head (elev)
+
+
USAFID WBAN year month day hour min lat lon elev wind.dir
+221697 720385 419 2019 8 1 0 36 39.8 -105.766 4113 170
+221698 720385 419 2019 8 1 0 54 39.8 -105.766 4113 100
+221699 720385 419 2019 8 1 1 12 39.8 -105.766 4113 90
+221700 720385 419 2019 8 1 1 35 39.8 -105.766 4113 110
+221701 720385 419 2019 8 1 1 53 39.8 -105.766 4113 120
+221702 720385 419 2019 8 1 2 12 39.8 -105.766 4113 120
+ wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
+221697 5 N 8.8 5 1372 5
+221698 5 N 2.6 5 1372 5
+221699 5 N 3.1 5 1981 5
+221700 5 N 4.1 5 2134 5
+221701 5 N 4.6 5 2134 5
+221702 5 N 6.2 5 22000 5
+ ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc temp
+221697 M N NA 9 N 5 9
+221698 M N NA 9 N 5 9
+221699 M N NA 9 N 5 9
+221700 M N NA 9 N 5 9
+221701 M N NA 9 N 5 9
+221702 9 N NA 9 N 5 9
+ temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
+221697 5 1 5 NA 9 57.61039
+221698 5 1 5 NA 9 57.61039
+221699 5 2 5 NA 9 61.85243
+221700 5 2 5 NA 9 61.85243
+221701 5 2 5 NA 9 61.85243
+221702 5 2 5 NA 9 61.85243
+ date
+221697 2019-08-01 00:00:00
+221698 2019-08-01 00:00:00
+221699 2019-08-01 01:00:00
+221700 2019-08-01 01:00:00
+221701 2019-08-01 01:00:00
+221702 2019-08-01 02:00:00
+
+
#histograms of the elevation, temperature, and wind speed variables for the whole dataset
+
+
+hist (met$ elev)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#line graphs of temperature vs. date and wind speed vs. date
+
+plot (met$ temp, met$ date)
+
+
+
+
+
+
+
+
plot (met$ wind.sp, met$ date)
+
+
+
+
+
+
+
+
+
+
+Exercise 8
+What does “qc” mean for some of the data variable names?
+
+
+