-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcamels_prepare_data_Knoben.R
76 lines (59 loc) · 2.33 KB
/
camels_prepare_data_Knoben.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
if (!require("pacman")) {
install.packages("pacman")
}
pacman::p_load(
tidyverse,
lubridate,
zeallot
)
# data --------------------------------------------------------------------
filenames <- dir("./data/CAMELS_Knoben/processed/")
date <- read_csv("./data/CAMELS_Knoben/date.csv", col_names = "date") # the raw data only have a datenum column
# 559 catchments used in the calibration study
selected_catchment_id <- read_csv("./data/CAMELS_Knoben/knoben_selected_catchment.csv") %>% pull(catchment_id) %>% unique()
# function ----------------------------------------------------------------
data_raw <- vector("list", length(filenames))
for (i in 1:length(filenames)){
data_raw[[i]] <- read_csv(
paste0("./data/CAMELS_Knoben/processed/", filenames[[i]]),
col_names = c("date", "P", "T", "PET", "Q")
) %>%
select(-date) %>%
bind_cols(date) %>%
mutate(catchment_id = str_extract(filenames[[i]], "[0-9]+"))
}
data_raw <- data_raw %>%
bind_rows() %>%
select(catchment_id, date, everything())
# process data ------------------------------------------------------------
# select catchments used in the KNOBEN ET AL. 2020 study
data_process <- data_raw %>%
filter(catchment_id %in% selected_catchment_id)
# change missing Q (marked by negative values) to NA
data_process <- data_process %>%
mutate(Q = replace(Q, Q<0, NA_real_))
# write data for all catchments, record length = 7670 for each catchment
data_process %>%
select(-catchment_id, -date) %>%
write_csv(file = "./data/camels_all.csv")
# split and write csv files, note there is a one-year warm-up period for each subset,
# record length = 3652 for each catchment
data_process %>%
filter(date <= ymd("1998-12-31")) %>%
select(-catchment_id, -date) %>%
write_csv(file = "./data/camels_train_val.csv")
# record length = 2922 for each catchment
data_process %>%
filter(date <= ymd("1996-12-31")) %>%
select(-catchment_id, -date) %>%
write_csv(file = "./data/camels_train.csv")
# record length = 1095 for each catchment
data_process %>%
filter(date <= ymd("1998-12-31"), date >= ymd("1996-01-02")) %>%
select(-catchment_id, -date) %>%
write_csv(file = "./data/camels_val.csv")
# record length = 4383 for each catchment
data_process %>%
filter(date >= ymd("1998-01-01")) %>%
select(-catchment_id, -date) %>%
write_csv(file = "./data/camels_test.csv")