This repository has been archived by the owner on May 14, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathget_ag_bulletin.R
222 lines (217 loc) · 6.88 KB
/
get_ag_bulletin.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#' Get BOM agriculture bulletin information for select stations
#'
#' Fetch the \acronym{BOM} agricultural bulletin information and return it in a
#' data frame
#'
#' @param state Australian state or territory as full name or postal code.
#' Fuzzy string matching via \code{\link[base]{agrep}} is done. Defaults to
#' \dQuote{AUS} returning all state bulletins, see Details for more.
#'
#' @details Allowed state and territory postal codes, only one state per request
#' or all using \code{AUS}.
#' \describe{
#' \item{ACT}{Australian Capital Territory (will return NSW)}
#' \item{NSW}{New South Wales}
#' \item{NT}{Northern Territory}
#' \item{QLD}{Queensland}
#' \item{SA}{South Australia}
#' \item{TAS}{Tasmania}
#' \item{VIC}{Victoria}
#' \item{WA}{Western Australia}
#' \item{AUS}{Australia, returns forecast for all states, NT and ACT}
#' }
#'
#' @return
#' A data frame as a \code{\link[data.table]{data.table}} object of Australia
#' \acronym{BOM} agricultural bulletin information. For full details of fields
#' and units returned see Appendix 3 in the \CRANpkg{bomrang} vignette, use \cr
#' \code{vignette("bomrang", package = "bomrang")} to view.
#'
#' @examples
#' \donttest{
#' ag_bulletin <- get_ag_bulletin(state = "QLD")
#' ag_bulletin
#' }
#'
#' @references
#' Agricultural observations are retrieved from the Australian Bureau of
#' Meteorology (\acronym{BOM}) Weather Data Services Agriculture Bulletins, \cr
#' \url{http://www.bom.gov.au/catalogue/observations/about-agricultural.shtml}
#'
#' and
#'
#' Australian Bureau of Meteorology (\acronym{BOM})) Weather Data Services
#' Observation of Rainfall, \cr
#' \url{http://www.bom.gov.au/climate/how/observations/rain-measure.shtml}
#'
#' Station location and other metadata are sourced from the Australian Bureau of
#' Meteorology (\acronym{BOM}) webpage, Bureau of Meteorology Site Numbers: \cr
#' \url{http://www.bom.gov.au/climate/cdo/about/site-num.shtml}
#'
#' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} and Paul Melloy
#' \email{paul@@melloy.com.au}
#'
#' @seealso \link{parse_ag_bulletin}
#'
#' @export get_ag_bulletin
get_ag_bulletin <- function(state = "AUS") {
# this is just a placeholder for functionality with parse_ag_bulletin()
filepath <- NULL
# see internal_functions.R for these functions
the_state <- .check_states(state)
location <- .validate_filepath(filepath)
bulletin_out <-
.return_bulletin(file_loc = location, cleaned_state = the_state)
return(bulletin_out)
}
# Ag bulletin functions for get() and parse() ----------------------------------
.return_bulletin <- function(file_loc, cleaned_state) {
# create vector of XML files
AUS_XML <- c(
"IDN65176.xml",
# NSW
"IDD65176.xml",
# NT
"IDQ60604.xml",
# QLD
"IDS65176.xml",
# SA
"IDT65176.xml",
# TAS
"IDV65176.xml",
# VIC
"IDW65176.xml" # WA
)
if (cleaned_state != "AUS") {
xml_url <- .create_bom_file(AUS_XML,
.the_state = cleaned_state,
.file_loc = file_loc)
bulletin_out <- .parse_bulletin(xml_url)
if (is.null(bulletin_out)) {
return(invisible(NULL))
}
return(bulletin_out[])
} else {
file_list <- paste0(file_loc, "/", AUS_XML)
bulletin_out <-
lapply(X = file_list, FUN = .parse_bulletin)
bulletin_out <- data.table::rbindlist(bulletin_out, fill = TRUE)
return(bulletin_out[])
}
}
#' @noRd
.parse_bulletin <- function(xml_url) {
# CRAN NOTE avoidance
stations_site_list <-
site <- obs_time_local <- obs_time_utc <- r <- .SD <- NULL # nocov
# load the XML from ftp
if (substr(xml_url, 1, 3) == "ftp") {
xml_object <- .get_url(remote_file = xml_url)
if (is.null(xml_object)) {
return(invisible(NULL))
}
} else {# load the XML from local
xml_object <- xml2::read_xml(xml_url)
}
# get definitions (and all possible value fields to check against)
definition_attrs <- xml2::xml_find_all(xml_object, "//data-def")
definition_attrs <- xml2::xml_attrs(definition_attrs)
definition_attrs <-
lapply(definition_attrs, function(x)
x[[1]][[1]])
# get the actual observations and create a data table
observations <- xml2::xml_find_all(xml_object, ".//d")
out <- data.table::data.table(
obs_time_local = xml2::xml_find_first(observations, ".//ancestor::obs") %>%
xml2::xml_attr("obs-time-local"),
obs_time_utc = xml2::xml_find_first(observations, ".//ancestor::obs") %>%
xml2::xml_attr("obs-time-utc"),
time_zone = xml2::xml_find_first(observations, ".//ancestor::obs") %>%
xml2::xml_attr("time-zone"),
site = xml2::xml_find_first(observations, ".//ancestor::obs") %>%
xml2::xml_attr("site"),
station = xml2::xml_find_first(observations, ".//ancestor::obs") %>%
xml2::xml_attr("station"),
observation = observations %>% xml2::xml_attr("t"),
values = observations %>% xml2::xml_text("t"),
product_id = substr(basename(xml_url),
1,
nchar(basename(xml_url)) - 4)
)
out <- data.table::dcast(
out,
product_id + obs_time_local + obs_time_utc + time_zone + site + station ~
observation,
value.var = "values"
)
# check that all fields are present, if not add missing col with NAs
missing <-
setdiff(unlist(definition_attrs), names(out[, -c(1:5)]))
if (length(missing) != 0) {
out[, eval(missing) := NA]
}
# remove leading 0 to merge with stations_site_list
out[, site := gsub("^0{1,2}", "", out$site)]
# merge with AAC codes
# load AAC code/town name list to join with final output
load(system.file("extdata", "stations_site_list.rda", # nocov
package = "bomrang")) # nocov
data.table::setDT(stations_site_list)
data.table::setkey(stations_site_list, "site")
data.table::setkey(out, "site")
out <- stations_site_list[out, on = "site"]
# tidy up the cols
refcols <- c(
"product_id",
"state",
"dist",
"name",
"wmo",
"site",
"station",
"obs_time_local",
"obs_time_utc",
"time_zone",
"lat",
"lon",
"elev",
"bar_ht",
"start",
"end",
"r",
"tn",
"tx",
"twd",
"ev",
"tg",
"sn",
"solr",
"t5",
"t10",
"t20",
"t50",
"t1m",
"wr"
)
# set col classes
# factor
out[, c(1:3, 11:12) := lapply(.SD, function(x)
as.factor(x)),
.SDcols = c(1:3, 11:12)]
# dates
out[, obs_time_local := gsub("T", " ", obs_time_local)]
out[, obs_time_utc := gsub("T", " ", obs_time_utc)]
out[, c(13:14) := lapply(.SD, function(x)
as.POSIXct(x,
origin = "1970-1-1",
format = "%Y%m%d %H%M")),
.SDcols = c(13:14)]
# set "Tce" to 0.01
out[, r := gsub("Tce", "0.01", r)]
# set numeric cols
out[, c(4:7, 9:10, 17:30) := lapply(.SD, as.numeric),
.SDcols = c(4:7, 9:10, 17:30)]
data.table::setcolorder(out, refcols)
# return from main function
return(out)
}