-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path06_prepare_cnn_input.R
236 lines (215 loc) · 9.79 KB
/
06_prepare_cnn_input.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
################################################################################
################################################################################
# PREPARE CNN INPUT
################################################################################
################################################################################
# load packages
library(sf)
library(raster)
# set paths
basedir <- "C:/Users/Zoe/Documents/understory_classification"
path_clips <- paste0(basedir, "/4_Daten/clips_2cm_unscaled") # input
path_output <- paste0(basedir, "/4_Daten/model_input_2cm_standardized") # output
path_plot <- paste0(basedir, "/4_Daten/vegetation/Export_ODK_clean_checked_filtered_no_overlap.kml") # input
path_values <- paste0(path_clips, "/raster_values_labels_unscaled.csv") # output
# set parameters
crs_raster_las <- "+proj=utm +zone=32 +ellps=WGS84 +units=m +vunits=m +no_defs"
raster_amount <- length(c(
"ortho", "anisotropy_max", "curvature_max", "linearity_max",
"linearity_sd", "planarity_mean", "planarity_sd", "nDSM",
"point_density", "reflectance_mean", "reflectance_sd"
))
# set folder names for each input type
tls_set <- c("nDSM", "reflectance", "point_density")
rgb_set <- c("ortho")
geo_set <- c("geometry")
all_set <- c(tls_set, rgb_set, geo_set)
# set names and folder names for each combination
combo_1 <- c()
combo_1$folders <- c(tls_set)
combo_1$name <- "tls"
#
combo_2 <- c() # maybe this combination is unnecessary
combo_2$folders <- c(tls_set, rgb_set)
combo_2$name <- "tls_rgb"
#
combo_3 <- c()
combo_3$folders <- c(tls_set, geo_set)
combo_3$name <- "tls_geo"
#
combo_4 <- c()
combo_4$folders <- c(tls_set, rgb_set, geo_set)
combo_4$name <- "tls_rgb_geo"
################################################################################
# HELPER FUNCTIONS
################################################################################
check_create_dir <- function(path) {
# checks if directory exists
# if not, creates it
if (!dir.exists(path)) {
dir.create(path)
}
}
################################################################################
get_list_full_plots <- function(clip_dir, raster_amount) {
# returns list of plots for which all types of rasters exist
# get all files in clip folder
clip_list <- list.files(clip_dir, pattern = "[.]tif", recursive = TRUE, full.names = TRUE)
# extract all unique plot numbers
plot_IDs <- c()
for (clip_path in clip_list) {
parts <- strsplit(strsplit(clip_path, "[.]")[[1]][1], "_")[[1]]
plot <- as.numeric(parts[length(parts)])
plot_IDs <- c(plot_IDs, plot)
}
plot_IDs <- sort(unique(plot_IDs))
# loop through plot numbers and save those with all rasters
full_plot_IDs <- c()
for (plot_ID in plot_IDs) {
n_raster <- length(clip_list[grepl(paste0("_", plot_ID, ".tif"), clip_list)])
if (n_raster == raster_amount) {
full_plot_IDs <- c(full_plot_IDs, plot_ID)
}
}
return(full_plot_IDs)
}
################################################################################
clip_value_csv <- function(plot_path, clip_dir, output_path) {
# create csv with all unscaled raster clip values
# empty vector for raster values
values <- c()
# load plots
vegetation <- st_read(plot_path)
labels <- unique(vegetation$Name)
labels <- labels[!(labels %in% c("rock", "grass"))]
# get all raster paths
clip_paths <- list.files(clip_dir, pattern = "[.]tif", recursive = TRUE, full.names = TRUE)
clip_paths <- clip_paths[!grepl("DTM", clip_paths)]
# make list with plot_ID & vegetation_ID and labels
label_ID_lookup <- list()
for (label in labels) {
label_subset <- vegetation$Description[vegetation$Name == label]
plot_IDs <- as.numeric(lapply(strsplit(label_subset, "veg_ID: "), tail, n = 1))
label_ID_lookup[[label]] <- plot_IDs
}
# loop through clip types
clip_types <- unique(sapply(strsplit(basename(clip_paths), "_area"), "[[", 1))
for (type in clip_types) {
print(paste0("type: ", type))
clip_type_paths <- clip_paths[grepl(type, clip_paths)]
for (label in labels) {
print(paste0("label: ", label))
label_IDs <- label_ID_lookup[[label]]
label_clip_type_paths <- clip_type_paths[grepl(paste(paste0("_", label_IDs, "[.]tif"), collapse = "|"), clip_type_paths)]
# loop through clips
for (path in label_clip_type_paths) {
raster <- stack(path)
clip_label_vals <- values(raster)
if (type != "ortho") {
values <- rbind(values, data.frame("type" = type, "label" = label, "values" = as.numeric(clip_label_vals)))
} else {
values <- rbind(values, data.frame("type" = "R", "label" = label, "values" = as.numeric(clip_label_vals[, 1])))
values <- rbind(values, data.frame("type" = "G", "label" = label, "values" = as.numeric(clip_label_vals[, 2])))
values <- rbind(values, data.frame("type" = "B", "label" = label, "values" = as.numeric(clip_label_vals[, 3])))
}
}
}
}
# save values
write.csv(values, output_path, row.names = F)
return(values)
}
################################################################################
rescale_values <- function(values_path) {
# creates lookup table for normalization / standardization
values <- read.csv(values_path)
lookup <- list()
types <- unique(values$type)
for (type in types) {
lookup[[paste0(type, "_min")]] <- min(values$values[values$type == type], na.rm = TRUE)
lookup[[paste0(type, "_max")]] <- max(values$values[values$type == type], na.rm = TRUE)
lookup[[paste0(type, "_mean")]] <- mean(values$values[values$type == type], na.rm = TRUE)
lookup[[paste0(type, "_sd")]] <- sd(values$values[values$type == type], na.rm = TRUE)
}
return(lookup)
}
################################################################################
# STACK & SAVE & SORT CLIPS
################################################################################
stack_save_clips <- function(clip_dir, plot_path, values_path, output_dir, selection_rasters, selection_IDs, normalize = TRUE, standardize = FALSE) {
# stacks all rasters for each clip & saves it in a folder based on the label
check_create_dir(output_dir)
# load lookup table
if (normalize & standardize) {
stop("either normalize OR standardize")
}
if (normalize | standardize) {
lookup_table <- rescale_values(values_path)
}
# get all clip paths of the selected raster types
clip_list <- list.files(clip_dir, pattern = "[.]tif", recursive = TRUE)
contained <- c()
for (clip_path in clip_list) {
contained <- c(contained, strsplit(clip_path, "/")[[1]][1] %in% selection_rasters)
}
clip_list <- clip_list[contained]
# load vegetation plots & create one subfolder for each label
plots <- st_read(plot_path)
# extract all plot IDs from vegetation plots
all_IDs <- c()
for (idx in 1:nrow(plots)) {
all_IDs <- c(all_IDs, as.numeric(strsplit(plots$Description[idx], " ")[[1]][4]))
}
# loop through all selected plot IDs
for (plot_ID in selection_IDs) {
# get row from vegetation plots where this plot ID is same
plot_idx <- which(all_IDs == plot_ID)
# load & stack all clips of the plot
plot_paths <- sort(clip_list[grepl(paste0("_", plot_ID, ".tif"), clip_list)])
plot_names <- unlist(lapply(strsplit(basename(plot_paths), "_area"), head, n = 1))
plot_rasters <- list()
for (idx in 1:length(plot_paths)) {
raster_bands <- stack(paste0(clip_dir, "/", plot_paths[idx]))
# rescale values
bands <- if (plot_names[idx] != "ortho") c(plot_names[idx]) else c("R", "G", "B")
for (i in 1:length(bands)) {
if (normalize | standardize) {
if (normalize) {
min_val <- lookup_table[[paste0(bands[i], "_min")]]
max_val <- lookup_table[[paste0(bands[i], "_max")]]
raster_bands[[i]] <- (raster_bands[[i]] - min_val) / (max_val - min_val)
}
if (standardize) {
mean_val <- lookup_table[[paste0(bands[i], "_mean")]]
sd_val <- lookup_table[[paste0(bands[i], "_sd")]]
raster_bands[[i]] <- scale(raster_bands[[i]], center = mean_val, scale = sd_val)
}
}
}
plot_rasters[[idx]] <- raster_bands
}
plot_stack <- stack(plot_rasters)
# determine label & save in according folder
plot_label <- plots$Name[plot_idx]
writeRaster(plot_stack, paste0(output_dir, "/", plot_label, "_", plot_ID, ".tif"))
}
}
################################################################################
# EXECUTION
################################################################################
# create output folder
check_create_dir(path_output)
# save clip raster values in a file for rescaling, takes a long time, do once
# clip_value_csv(path_plot, path_clips, path_values)
# get plot_IDs where all rasters are available
full_IDs <- get_list_full_plots(path_clips, raster_amount)
# scale, stack, save raster clips
stack_save_clips(path_clips, path_plot, path_values, paste0(path_output, "/", combo_1$name),
combo_1$folders, full_IDs, normalize = FALSE, standardize = TRUE)
stack_save_clips(path_clips, path_plot, path_values, paste0(path_output, "/", combo_2$name),
combo_2$folders, full_IDs, normalize = FALSE, standardize = TRUE)
stack_save_clips(path_clips, path_plot, path_values, paste0(path_output, "/", combo_3$name),
combo_3$folders, full_IDs, normalize = FALSE, standardize = TRUE)
stack_save_clips(path_clips, path_plot, path_values, paste0(path_output, "/", combo_4$name),
combo_4$folders, full_IDs, normalize = FALSE, standardize = TRUE)
################################################################################