diff --git a/lab 3.html b/lab 3.html new file mode 100644 index 0000000..b59cd04 --- /dev/null +++ b/lab 3.html @@ -0,0 +1,958 @@ + + + + + + + + + + +PM 566 Lab 3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

PM 566 Lab 3

+
+ + + +
+ +
+
Author
+
+

Erica Shin

+
+
+ + + +
+ + + +
+ + +
+

Exercise 1

+
+
#install.packages("R.utils")
+
+library(R.utils)
+
+
Loading required package: R.oo
+
+
+
Loading required package: R.methodsS3
+
+
+
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
+
+
+
R.oo v1.26.0 (2024-01-24 05:12:50 UTC) successfully loaded. See ?R.oo for help.
+
+
+

+Attaching package: 'R.oo'
+
+
+
The following object is masked from 'package:R.methodsS3':
+
+    throw
+
+
+
The following objects are masked from 'package:methods':
+
+    getClasses, getMethods
+
+
+
The following objects are masked from 'package:base':
+
+    attach, detach, load, save
+
+
+
R.utils v2.12.3 (2023-11-18 01:00:02 UTC) successfully loaded. See ?R.utils for help.
+
+
+

+Attaching package: 'R.utils'
+
+
+
The following object is masked from 'package:utils':
+
+    timestamp
+
+
+
The following objects are masked from 'package:base':
+
+    cat, commandArgs, getOption, isOpen, nullfile, parse, use, warnings
+
+
download.file(
+  "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
+  destfile = file.path("~", "Downloads", "met_all.gz"),
+  method   = "libcurl",
+  timeout  = 60
+)
+
+met <- data.table::fread(file.path("~", "Downloads", "met_all.gz"))
+met <- as.data.frame(met)
+
+
+
+

Exercise 2

+
+
dim(met)
+
+
[1] 2377343      30
+
+
nrow(met)
+
+
[1] 2377343
+
+
ncol(met)
+
+
[1] 30
+
+
+

There are 2,377,343 rows and 30 columns.

+
+
+

Exercise 3

+
+
str(met)
+
+
'data.frame':   2377343 obs. of  30 variables:
+ $ USAFID           : int  690150 690150 690150 690150 690150 690150 690150 690150 690150 690150 ...
+ $ WBAN             : int  93121 93121 93121 93121 93121 93121 93121 93121 93121 93121 ...
+ $ year             : int  2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
+ $ month            : int  8 8 8 8 8 8 8 8 8 8 ...
+ $ day              : int  1 1 1 1 1 1 1 1 1 1 ...
+ $ hour             : int  0 1 2 3 4 5 6 7 8 9 ...
+ $ min              : int  56 56 56 56 56 56 56 56 56 56 ...
+ $ lat              : num  34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 34.3 ...
+ $ lon              : num  -116 -116 -116 -116 -116 ...
+ $ elev             : int  696 696 696 696 696 696 696 696 696 696 ...
+ $ wind.dir         : int  220 230 230 210 120 NA 320 10 320 350 ...
+ $ wind.dir.qc      : chr  "5" "5" "5" "5" ...
+ $ wind.type.code   : chr  "N" "N" "N" "N" ...
+ $ wind.sp          : num  5.7 8.2 6.7 5.1 2.1 0 1.5 2.1 2.6 1.5 ...
+ $ wind.sp.qc       : chr  "5" "5" "5" "5" ...
+ $ ceiling.ht       : int  22000 22000 22000 22000 22000 22000 22000 22000 22000 22000 ...
+ $ ceiling.ht.qc    : int  5 5 5 5 5 5 5 5 5 5 ...
+ $ ceiling.ht.method: chr  "9" "9" "9" "9" ...
+ $ sky.cond         : chr  "N" "N" "N" "N" ...
+ $ vis.dist         : int  16093 16093 16093 16093 16093 16093 16093 16093 16093 16093 ...
+ $ vis.dist.qc      : chr  "5" "5" "5" "5" ...
+ $ vis.var          : chr  "N" "N" "N" "N" ...
+ $ vis.var.qc       : chr  "5" "5" "5" "5" ...
+ $ temp             : num  37.2 35.6 34.4 33.3 32.8 31.1 29.4 28.9 27.2 26.7 ...
+ $ temp.qc          : chr  "5" "5" "5" "5" ...
+ $ dew.point        : num  10.6 10.6 7.2 5 5 5.6 6.1 6.7 7.8 7.8 ...
+ $ dew.point.qc     : chr  "5" "5" "5" "5" ...
+ $ atm.press        : num  1010 1010 1011 1012 1013 ...
+ $ atm.press.qc     : int  5 5 5 5 5 5 5 5 5 5 ...
+ $ rh               : num  19.9 21.8 18.5 16.9 17.4 ...
+
+
+

Variables of interest are: year, day, hour, elev, temp, and wind.sp.

+
+
+

Exercise 4

+
+
table(met$year)
+
+

+   2019 
+2377343 
+
+
table(met$day)
+
+

+    1     2     3     4     5     6     7     8     9    10    11    12    13 
+75975 75923 76915 76594 76332 76734 77677 77766 75366 75450 76187 75052 76906 
+   14    15    16    17    18    19    20    21    22    23    24    25    26 
+77852 76217 78015 78219 79191 76709 75527 75786 78312 77413 76965 76806 79114 
+   27    28    29    30    31 
+79789 77059 71712 74931 74849 
+
+
table(met$hour)
+
+

+     0      1      2      3      4      5      6      7      8      9     10 
+ 99434  93482  93770  96703 110504 112128 106235 101985 100310 102915 101880 
+    11     12     13     14     15     16     17     18     19     20     21 
+100470 103605  97004  96507  97635  94942  94184 100179  94604  94928  96070 
+    22     23 
+ 94046  93823 
+
+
summary(met$temp)
+
+
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
+ -40.00   19.60   23.50   23.59   27.80   56.00   60089 
+
+
summary(met$elev)
+
+
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
+  -13.0   101.0   252.0   415.8   400.0  9999.0 
+
+
summary(met$wind.sp)
+
+
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
+   0.00    0.00    2.10    2.46    3.60   36.00   79693 
+
+
met[met$elev==9999.0, "elev"] <- NA
+summary(met$elev)
+
+
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
+    -13     101     252     413     400    4113     710 
+
+
met <- met[met$temp > -40, ]
+head(met[order(met$temp), ])
+
+
        USAFID WBAN year month day hour min    lat    lon elev wind.dir
+1203053 722817 3068 2019     8   1    0  56 38.767 -104.3 1838      190
+1203055 722817 3068 2019     8   1    1  56 38.767 -104.3 1838      180
+1203128 722817 3068 2019     8   3   11  56 38.767 -104.3 1838       NA
+1203129 722817 3068 2019     8   3   12  56 38.767 -104.3 1838       NA
+1203222 722817 3068 2019     8   6   21  56 38.767 -104.3 1838      280
+1203225 722817 3068 2019     8   6   22  56 38.767 -104.3 1838      240
+        wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
+1203053           5              N     7.2          5         NA             9
+1203055           5              N     7.7          5         NA             9
+1203128           9              C     0.0          5         NA             9
+1203129           9              C     0.0          5         NA             9
+1203222           5              N     2.6          5         NA             9
+1203225           5              N     7.7          5         NA             9
+        ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
+1203053                 9        N       NA           9       N          5
+1203055                 9        N       NA           9       N          5
+1203128                 9        N       NA           9       N          5
+1203129                 9        N       NA           9       N          5
+1203222                 9        N       NA           9       N          5
+1203225                 9        N       NA           9       N          5
+         temp temp.qc dew.point dew.point.qc atm.press atm.press.qc rh
+1203053 -17.2       5        NA            9        NA            9 NA
+1203055 -17.2       5        NA            9        NA            9 NA
+1203128 -17.2       5        NA            9        NA            9 NA
+1203129 -17.2       5        NA            9        NA            9 NA
+1203222 -17.2       5        NA            9        NA            9 NA
+1203225 -17.2       5        NA            9        NA            9 NA
+
+
+

There are 710 missing values in the wind speed variable.

+
+
+

Exercise 5

+

The suspicious temperature value of -17.2C has a location with a latitude of 38.767 and a longitude of -104.3. This is located in Yoder, Colorado.

+

It does not seem reasonable that Yoder, Colorado would have a temperature reading of -17.2C in August.

+

This location is near the USAF Academy Bullseye Auxiliary Airfield in Yoder, Colorado, which has an elevation of approximately 6,036 ft (1,840 m). Therefore, the range of elevations make sense (-13m to 4113m).

+
+
+

Exercise 6

+
+
elev <- met[which(met$elev == max(met$elev, na.rm = TRUE)), ]
+summary(elev)
+
+
     USAFID            WBAN          year          month        day      
+ Min.   :720385   Min.   :419   Min.   :2019   Min.   :8   Min.   : 1.0  
+ 1st Qu.:720385   1st Qu.:419   1st Qu.:2019   1st Qu.:8   1st Qu.: 8.0  
+ Median :720385   Median :419   Median :2019   Median :8   Median :16.0  
+ Mean   :720385   Mean   :419   Mean   :2019   Mean   :8   Mean   :16.1  
+ 3rd Qu.:720385   3rd Qu.:419   3rd Qu.:2019   3rd Qu.:8   3rd Qu.:24.0  
+ Max.   :720385   Max.   :419   Max.   :2019   Max.   :8   Max.   :31.0  
+                                                                         
+      hour            min             lat            lon              elev     
+ Min.   : 0.00   Min.   : 6.00   Min.   :39.8   Min.   :-105.8   Min.   :4113  
+ 1st Qu.: 6.00   1st Qu.:13.00   1st Qu.:39.8   1st Qu.:-105.8   1st Qu.:4113  
+ Median :12.00   Median :36.00   Median :39.8   Median :-105.8   Median :4113  
+ Mean   :11.66   Mean   :34.38   Mean   :39.8   Mean   :-105.8   Mean   :4113  
+ 3rd Qu.:18.00   3rd Qu.:53.00   3rd Qu.:39.8   3rd Qu.:-105.8   3rd Qu.:4113  
+ Max.   :23.00   Max.   :59.00   Max.   :39.8   Max.   :-105.8   Max.   :4113  
+                                                                               
+    wind.dir     wind.dir.qc        wind.type.code        wind.sp      
+ Min.   : 10.0   Length:2117        Length:2117        Min.   : 0.000  
+ 1st Qu.:250.0   Class :character   Class :character   1st Qu.: 4.100  
+ Median :300.0   Mode  :character   Mode  :character   Median : 6.700  
+ Mean   :261.5                                         Mean   : 7.245  
+ 3rd Qu.:310.0                                         3rd Qu.: 9.800  
+ Max.   :360.0                                         Max.   :21.100  
+ NA's   :237                                           NA's   :168     
+  wind.sp.qc          ceiling.ht    ceiling.ht.qc   ceiling.ht.method 
+ Length:2117        Min.   :   30   Min.   :5.000   Length:2117       
+ Class :character   1st Qu.: 2591   1st Qu.:5.000   Class :character  
+ Mode  :character   Median :22000   Median :5.000   Mode  :character  
+                    Mean   :15145   Mean   :5.008                     
+                    3rd Qu.:22000   3rd Qu.:5.000                     
+                    Max.   :22000   Max.   :9.000                     
+                    NA's   :4                                         
+   sky.cond            vis.dist     vis.dist.qc          vis.var         
+ Length:2117        Min.   :    0   Length:2117        Length:2117       
+ Class :character   1st Qu.:16093   Class :character   Class :character  
+ Mode  :character   Median :16093   Mode  :character   Mode  :character  
+                    Mean   :15913                                        
+                    3rd Qu.:16093                                        
+                    Max.   :16093                                        
+                    NA's   :683                                          
+  vis.var.qc             temp         temp.qc            dew.point      
+ Length:2117        Min.   : 1.00   Length:2117        Min.   :-6.0000  
+ Class :character   1st Qu.: 6.00   Class :character   1st Qu.: 0.0000  
+ Mode  :character   Median : 8.00   Mode  :character   Median : 0.0000  
+                    Mean   : 8.13                      Mean   : 0.8729  
+                    3rd Qu.:10.00                      3rd Qu.: 2.0000  
+                    Max.   :15.00                      Max.   : 7.0000  
+                                                                        
+ dew.point.qc         atm.press     atm.press.qc       rh       
+ Length:2117        Min.   : NA    Min.   :9     Min.   :53.63  
+ Class :character   1st Qu.: NA    1st Qu.:9     1st Qu.:58.10  
+ Mode  :character   Median : NA    Median :9     Median :61.39  
+                    Mean   :NaN    Mean   :9     Mean   :60.62  
+                    3rd Qu.: NA    3rd Qu.:9     3rd Qu.:61.85  
+                    Max.   : NA    Max.   :9     Max.   :70.01  
+                    NA's   :2117                                
+
+
cor(elev$temp, elev$wind.sp, use="complete")
+
+
[1] -0.09373843
+
+
cor(elev$temp, elev$hour, use="complete")
+
+
[1] 0.4397261
+
+
cor(elev$wind.sp, elev$day, use="complete")
+
+
[1] 0.3643079
+
+
cor(elev$wind.sp, elev$hour, use="complete")
+
+
[1] 0.08807315
+
+
cor(elev$temp, elev$day, use="complete")
+
+
[1] -0.003857766
+
+
+
+
+

Exercise 7

+
+
#install.packages("leaflet")
+
+library(leaflet)
+
+leaflet(elev) %>%
+  addProviderTiles('OpenStreetMap') %>% 
+  addCircles(lat=~lat,lng=~lon, opacity=1, fillOpacity=1, radius=100)
+
+
+ +
+
library(lubridate)
+
+

+Attaching package: 'lubridate'
+
+
+
The following objects are masked from 'package:base':
+
+    date, intersect, setdiff, union
+
+
elev$date <- with(elev, ymd_h(paste(year, month, day, hour, sep= ' ')))
+summary(elev$date)
+
+
                      Min.                    1st Qu. 
+"2019-08-01 00:00:00.0000" "2019-08-08 11:00:00.0000" 
+                    Median                       Mean 
+"2019-08-16 22:00:00.0000" "2019-08-16 14:09:56.8823" 
+                   3rd Qu.                       Max. 
+"2019-08-24 11:00:00.0000" "2019-08-31 22:00:00.0000" 
+
+
elev <- elev[order(elev$date), ]
+head(elev)
+
+
       USAFID WBAN year month day hour min  lat      lon elev wind.dir
+221697 720385  419 2019     8   1    0  36 39.8 -105.766 4113      170
+221698 720385  419 2019     8   1    0  54 39.8 -105.766 4113      100
+221699 720385  419 2019     8   1    1  12 39.8 -105.766 4113       90
+221700 720385  419 2019     8   1    1  35 39.8 -105.766 4113      110
+221701 720385  419 2019     8   1    1  53 39.8 -105.766 4113      120
+221702 720385  419 2019     8   1    2  12 39.8 -105.766 4113      120
+       wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
+221697           5              N     8.8          5       1372             5
+221698           5              N     2.6          5       1372             5
+221699           5              N     3.1          5       1981             5
+221700           5              N     4.1          5       2134             5
+221701           5              N     4.6          5       2134             5
+221702           5              N     6.2          5      22000             5
+       ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc temp
+221697                 M        N       NA           9       N          5    9
+221698                 M        N       NA           9       N          5    9
+221699                 M        N       NA           9       N          5    9
+221700                 M        N       NA           9       N          5    9
+221701                 M        N       NA           9       N          5    9
+221702                 9        N       NA           9       N          5    9
+       temp.qc dew.point dew.point.qc atm.press atm.press.qc       rh
+221697       5         1            5        NA            9 57.61039
+221698       5         1            5        NA            9 57.61039
+221699       5         2            5        NA            9 61.85243
+221700       5         2            5        NA            9 61.85243
+221701       5         2            5        NA            9 61.85243
+221702       5         2            5        NA            9 61.85243
+                      date
+221697 2019-08-01 00:00:00
+221698 2019-08-01 00:00:00
+221699 2019-08-01 01:00:00
+221700 2019-08-01 01:00:00
+221701 2019-08-01 01:00:00
+221702 2019-08-01 02:00:00
+
+
#histograms of the elevation, temperature, and wind speed variables for the whole dataset
+
+
+hist(met$elev)
+
+
+
+

+
+
+
+
hist(met$temp)
+
+
+
+

+
+
+
+
hist(met$wind.sp)
+
+
+
+

+
+
+
+
#line graphs of temperature vs. date and wind speed vs. date
+
+plot(met$temp, met$date)
+
+
+
+

+
+
+
+
plot(met$wind.sp, met$date)
+
+
+
+

+
+
+
+
+
+
+

Exercise 8

+

What does “qc” mean for some of the data variable names?

+
+ +
+ + +
+ + + + + \ No newline at end of file