-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcamels_de_processing.R
143 lines (102 loc) · 4.11 KB
/
camels_de_processing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
if (!require("pacman")) {
install.packages("pacman")
}
pacman::p_load(
tidyverse,
lubridate,
zeallot,
fs
)
# Read data ---------------------------------------------------------------
timeseries_files <- dir_ls("D:/data/CAMELS DE/Version 0.1.0/timeseries/")
data <- vector("list", length(timeseries_files))
for (i in seq_along(timeseries_files)){
timeseries_file <- timeseries_files[[i]]
catchment_name <- basename(timeseries_file) %>%
str_split("_", simplify = T) %>%
.[length(.)] %>%
str_sub(end =-5)
timeseries <- read_csv(timeseries_file, show_col_types = FALSE) %>%
select(date, Q = discharge_spec, P = precipitation_mean, T = temperature_mean)
timeseries_simulated_file <- paste0("D:/data/CAMELS DE/Version 0.1.0/timeseries_simulated/CAMELS_DE_discharge_sim_", catchment_name, ".csv")
timeseries_simulated <- read_csv(timeseries_simulated_file, show_col_types = FALSE) %>%
select(date, PET = pet_hargreaves)
data[[i]] <- tibble(
catchment_name = catchment_name,
data = list(timeseries %>% left_join(timeseries_simulated, by = join_by(date)) %>%
select(date, P, T, PET, Q))
)
}
data <- data %>% bind_rows()
save(data, file = "./data/camels_de.Rda")
# Split data --------------------------------------------------------------
gc()
data_process <- data %>%
unnest(data) %>%
rename(Date = date)
# use record from 1989-01-01 to 2009-12-31 for the modeling study
# the data from 1988-01-02 is for warm-up
data_process <- data_process %>%
filter(Date >= ymd("1988-01-02"),
Date <= ymd("2009-12-31"))
# calibration are from 1989-01-01 to 1998-12-31
# testing from 1999-01-01 to 2009-12-31
# all the forcing data is available, some of the Q data is missing
# catchments with missing Q records is stored in `incomplete_catchments`
minimal_required_Q_length = 365*2 # at least 2 years of data should be available in each period
incomplete_catchment_calibration <- data_process %>%
filter(Date <= ymd("1998-12-31"),
Date >= ymd("1989-01-01")) %>%
group_by(catchment_name) %>%
summarise(data = list(tibble(Q))) %>%
mutate(
n_complete_record = map_dbl(
data, function(x) complete.cases(x) %>% sum()
)
) %>%
filter(n_complete_record < minimal_required_Q_length) %>%
pull(catchment_name)
incomplete_catchment_test <- data_process %>%
filter(Date >= ymd("1999-01-01")) %>%
group_by(catchment_name) %>%
summarise(data = list(tibble(Q))) %>%
mutate(
n_complete_record = map_dbl(
data, function(x) complete.cases(x) %>% sum()
)
) %>%
filter(n_complete_record < minimal_required_Q_length) %>%
pull(catchment_name)
incomplete_catchments <-
c(incomplete_catchment_calibration,
incomplete_catchment_test) %>%
unique()
# 1347 catchments left
data_process %>%
filter(!(catchment_name %in% incomplete_catchments)) %>% pull(catchment_name) %>% unique() %>% length()
# keep data of complete catchments
data_process <- data_process %>%
filter(!(catchment_name %in% incomplete_catchments))
# Split the data ----------------------------------------------------------
# calibration are from 1989-01-01 to 1998-12-31
# forcing from 1998-01-02 is used for warm-up
data_calibration <- data_process %>%
filter(Date >= ymd("1988-01-02"),
Date <= ymd("1998-12-31"))
data_calibration %>% count(catchment_name) %>% pull(n) %>% unique() # length = 4017
# Q from 1999-01-01 is used for testing, forcing from 1998-01-01 is used for warm-up
data_test <- data_process %>%
filter(Date >= ymd("1998-01-01"))
data_test %>% count(catchment_name) %>% pull(n) %>% unique() # length = 4383
# date range
data_calibration$Date %>% range() # from "1988-01-02" to "1998-12-31", with the first year for warm-up only
data_test$Date %>% range() # from "1998-01-01" to "2009-12-31", with the first year for warm-up only
# save data ---------------------------------------------------------------
data_calibration %>%
arrange(catchment_name, Date) %>%
select(P:Q) %>%
write_csv(file = "./data/data_calibration_camels_de.csv")
data_test %>%
arrange(catchment_name, Date) %>%
select(P:Q) %>%
write_csv(file = "./data/data_test_camels_de.csv")