diff --git a/src/functions/add_uncertainties_chem.R b/src/functions/add_uncertainties_chem.R index 84c6fec..50d860b 100644 --- a/src/functions/add_uncertainties_chem.R +++ b/src/functions/add_uncertainties_chem.R @@ -26,19 +26,46 @@ add_uncertainties_chem <- function(survey_form, # Uncertainties (in the required parameter units) # TOC: + # layer_type organic and survey_year < 2000: +-11.8 g kg-1 # layer_type organic and survey_year >= 2000: +- 5.2 g kg-1 # layer_type mineral and survey_year < 2000: +- 3.5 g kg-1 # layer_type mineral and survey_year >= 2000: +- 1.5 g kg-1 + # Based on BioSoil reanalysis of '90s and '00s soil samples by a + # central lab + # Hiederer R, Durrant Houston T, Micheli E. Evaluation of BioSoil + # Demonstration Project - Soil Data Analysis. EUR 24729 EN. Luxembourg + # (Luxembourg): Publications Office of the European Union; 2011. JRC63301 + + # Total N: + + lqa <- get_env(paste0(unlist(str_split(survey_form, "_"))[1], + "_lqa")) %>% + filter(code_parameter == "Total_N") %>% # "P_extr", "Total_N", "org_C" + filter(!is.na(control_chart_std)) %>% + arrange(control_chart_std) %>% + pull(control_chart_std) %>% + # There seem to be some extremely high values which do not seem + # reliable. + # Use the upper 80 % quantile (because higher ones may be unreliable, + # e.g. due to calibration errors or mistakes in calculations) + quantile(0.80) + + # 4 g kg-1 + + uncertainties <- data.frame( - parameter_som = c("organic_carbon_total"), - parameter_pfh = c("horizon_c_organic_total"), - org_before_2000 = c(11.8), - org_after_2000 = c(5.2), - min_before_2000 = c(3.5), - min_after_2000 = c(1.5), - source = c("Confidence interval of central lab reanalyses by JRC (2011)") + parameter_som = c("organic_carbon_total", "n_total", + "extrac_p"), + parameter_pfh = c("horizon_c_organic_total", "horizon_n_total", + NA), + org_before_2000 = c(11.8, 4, 4.9), + org_after_2000 = c(5.2, 4, 4.9), + min_before_2000 = c(3.5, 4, 4.9), + min_after_2000 = c(1.5, 4, 4.9), + source = c("Confidence interval of central lab reanalyses by JRC (2011)", + "LQA", "LQA") ) @@ -47,7 +74,7 @@ add_uncertainties_chem <- function(survey_form, if (survey_form_type == "som") { if (is.null(parameters)) { - parameters <- c("organic_carbon_total") + parameters <- c("organic_carbon_total", "n_total", "extrac_p") } assertthat::assert_that( @@ -61,7 +88,7 @@ add_uncertainties_chem <- function(survey_form, if (survey_form_type == "pfh") { if (is.null(parameters)) { - parameters <- c("horizon_c_organic_total") + parameters <- c("horizon_c_organic_total", "horizon_n_total") } assertthat::assert_that( @@ -80,6 +107,17 @@ add_uncertainties_chem <- function(survey_form, parameter_max_i <- paste0(parameter_i, "_max") + # Theoretically maximum possible value + + max_i <- read.csv2("./data/additional_data/ranges_qaqc.csv") %>% + filter(!is.na(max_possible)) %>% + filter(parameter_som == parameter_i | + parameter_pfh == parameter_i) %>% + distinct(max_possible) %>% + pull(max_possible) + + assertthat::assert_that(length(max_i) == 1) + if (!parameter_min_i %in% names(df)) { @@ -144,13 +182,13 @@ add_uncertainties_chem <- function(survey_form, !is.na(.data[[parameter_i]]), case_when( layer_type %in% c("forest_floor", "peat") & survey_year < 2000 ~ - pmin(.data[[parameter_max_i]] + unc_i$org_before_2000, 1000), + pmin(.data[[parameter_max_i]] + unc_i$org_before_2000, max_i), layer_type %in% c("forest_floor", "peat") & survey_year >= 2000 ~ - pmin(.data[[parameter_max_i]] + unc_i$org_after_2000, 1000), + pmin(.data[[parameter_max_i]] + unc_i$org_after_2000, max_i), layer_type == "mineral" & survey_year < 2000 ~ - pmin(.data[[parameter_max_i]] + unc_i$min_before_2000, 1000), + pmin(.data[[parameter_max_i]] + unc_i$min_before_2000, max_i), layer_type == "mineral" & survey_year >= 2000 ~ - pmin(.data[[parameter_max_i]] + unc_i$min_after_2000, 1000)), + pmin(.data[[parameter_max_i]] + unc_i$min_after_2000, max_i)), NA_integer_)) %>% relocate({{parameter_min_i}}, .after = {{parameter_i}}) %>% relocate({{parameter_max_i}}, .after = {{parameter_min_i}}) diff --git a/src/functions/depth_join.R b/src/functions/depth_join.R index b42dffc..bca93f9 100644 --- a/src/functions/depth_join.R +++ b/src/functions/depth_join.R @@ -125,7 +125,10 @@ depth_join <- function(df1, } if (mode == "time_specific_ff_concentrations") { - possible_parameters <- "organic_carbon_total" + possible_parameters <- c("organic_carbon_total", + "n_total", + "extrac_p", + "extrac_s") } parameters <- unique( diff --git a/src/functions/gapfill_from_old_data.R b/src/functions/gapfill_from_old_data.R index 0572468..71c9fa7 100644 --- a/src/functions/gapfill_from_old_data.R +++ b/src/functions/gapfill_from_old_data.R @@ -4,6 +4,7 @@ gapfill_from_old_data <- function(survey_form, save_to_env = FALSE, parameters = c("bulk_density", "organic_carbon_total", + "n_total", "organic_layer_weight", "coarse_fragment_vol", "part_size_clay", @@ -408,6 +409,7 @@ gapfill_from_old_data <- function(survey_form, assertthat::assert_that( identical(parameters, c("bulk_density", "organic_carbon_total", + "n_total", "organic_layer_weight", "coarse_fragment_vol", "part_size_clay", @@ -460,6 +462,7 @@ gapfill_from_old_data <- function(survey_form, select(unique_layer_repetition, bulk_density, organic_carbon_total, + n_total, organic_layer_weight, coarse_fragment_vol, part_size_clay, @@ -467,6 +470,7 @@ gapfill_from_old_data <- function(survey_form, part_size_sand) %>% rename(bulk_density_afscdb = bulk_density, organic_carbon_total_afscdb = organic_carbon_total, + n_total_afscdb = n_total, organic_layer_weight_afscdb = organic_layer_weight, coarse_fragment_vol_afscdb = coarse_fragment_vol, part_size_clay_afscdb = part_size_clay, @@ -504,6 +508,16 @@ gapfill_from_old_data <- function(survey_form, organic_carbon_total = coalesce( .data$organic_carbon_total, .data$organic_carbon_total_afscdb)) %>% + # N total + mutate( + n_total_source = case_when( + !is.na(n_total_source) ~ n_total_source, + !is.na(.data$n_total) ~ "som (same year)", + !is.na(.data$n_total_afscdb) ~ "FSCDB.LII (2012)", + TRUE ~ NA_character_), + n_total = coalesce( + .data$n_total, + .data$n_total_afscdb)) %>% # Organic layer weight mutate( organic_layer_weight_source = case_when( @@ -570,6 +584,27 @@ gapfill_from_old_data <- function(survey_form, filter(is.na(.data$unique_survey_so_som)) %>% select(-unique_survey_so_som, -unique_layer_repetition) + # Some Romanian records were missing in both databases, + # but the partner confirmed that they should be inserted. + + layers_to_check <- c("52_2009_13_M12_3", + "52_2009_13_M24_3", + "52_2009_13_M48_3") + + if (all(!layers_to_check %in% df$unique_layer_repetition)) { + + assertthat::assert_that( + all(layers_to_check %in% so_som_afscdb$unique_layer_repetition)) + + so_som_missing_records <- rbind( + so_som_missing_records, + so_som_afscdb %>% + filter(unique_layer_repetition %in% layers_to_check) %>% + select(-unique_survey_so_som, -unique_layer_repetition) + ) + } + + # If there are any unique surveys in afscdb which are missing in so_som if (nrow(so_som_missing_records) > 0) { @@ -656,6 +691,7 @@ gapfill_from_old_data <- function(survey_form, "Record inserted from FSCDB.LII."), bulk_density_afscdb = bulk_density, organic_carbon_total_afscdb = organic_carbon_total, + n_total_afscdb = n_total, organic_layer_weight_afscdb = organic_layer_weight, coarse_fragment_vol_afscdb = coarse_fragment_vol, part_size_clay_afscdb = part_size_clay, @@ -747,7 +783,7 @@ gapfill_from_old_data <- function(survey_form, n_total_orig, origin_merged, origin_merge_info, - bulk_density_afscdb, organic_carbon_total_afscdb, + bulk_density_afscdb, organic_carbon_total_afscdb, n_total_afscdb, organic_layer_weight_afscdb, coarse_fragment_vol_afscdb, part_size_clay_afscdb, part_size_silt_afscdb, part_size_sand_afscdb) @@ -789,15 +825,17 @@ gapfill_from_old_data <- function(survey_form, code_plot = ifelse(code_country == 58 & code_plot == 2255, 255, code_plot)) %>% - mutate(repetition = ifelse(code_country == 58 & code_plot == 2188, - 2, - plot_id), - plot_id = ifelse(code_country == 58 & code_plot == 2188, - "58_188", - plot_id), - code_plot = ifelse(code_country == 58 & code_plot == 2188, - 188, - code_plot)) %>% + # do not apply this because then, there seem to be two profiles + # with 58_188 as plot_id + # mutate(repetition = ifelse(code_country == 58 & code_plot == 2188, + # 2, + # repetition), + # plot_id = ifelse(code_country == 58 & code_plot == 2188, + # "58_188", + # plot_id), + # code_plot = ifelse(code_country == 58 & code_plot == 2188, + # 188, + # code_plot)) %>% mutate( unique_survey = paste0(code_country, "_", survey_year, "_", @@ -1173,6 +1211,7 @@ gapfill_from_old_data <- function(survey_form, assertthat::assert_that( identical(parameters, c("bulk_density", "organic_carbon_total", + "n_total", "organic_layer_weight", "coarse_fragment_vol", "part_size_clay", @@ -1223,10 +1262,12 @@ gapfill_from_old_data <- function(survey_form, select(unique_layer_repetition_s1_som, bulk_density, organic_carbon_total, + n_total, organic_layer_weight, coarse_fragment_vol) %>% rename(bulk_density_fscdb = bulk_density, organic_carbon_total_fscdb = organic_carbon_total, + n_total_fscdb = n_total, organic_layer_weight_fscdb = organic_layer_weight, coarse_fragment_vol_fscdb = coarse_fragment_vol), by = join_by("unique_layer_repetition" == @@ -1262,6 +1303,16 @@ gapfill_from_old_data <- function(survey_form, organic_carbon_total = coalesce( .data$organic_carbon_total, .data$organic_carbon_total_fscdb)) %>% + # N total + mutate( + n_total_source = case_when( + !is.na(n_total_source) ~ n_total_source, + !is.na(.data$n_total) ~ "som (same year)", + !is.na(.data$n_total_fscdb) ~ "FSCDB.LI (2002)", + TRUE ~ NA_character_), + n_total = coalesce( + .data$n_total, + .data$n_total_fscdb)) %>% # Organic layer weight mutate( organic_layer_weight_source = case_when( @@ -1365,6 +1416,7 @@ gapfill_from_old_data <- function(survey_form, other_obs = "Record inserted from FSCDB.LI.", bulk_density_fscdb = bulk_density, organic_carbon_total_fscdb = organic_carbon_total, + n_total_fscdb = n_total, organic_layer_weight_fscdb = organic_layer_weight, coarse_fragment_vol_fscdb = coarse_fragment_vol, part_size_clay_fscdb = part_size_clay, @@ -1428,6 +1480,7 @@ gapfill_from_old_data <- function(survey_form, coarse_fragment_vol_orig, organic_layer_weight_orig, organic_carbon_total_orig, n_total_orig, bulk_density_fscdb, organic_carbon_total_fscdb, + n_total_fscdb, organic_layer_weight_fscdb, coarse_fragment_vol_fscdb) assertthat::assert_that(all(names(df) == names(s1_som_fscdb_to_add))) diff --git a/src/functions/gapfill_internally.R b/src/functions/gapfill_internally.R index cb375eb..c7f1e1b 100644 --- a/src/functions/gapfill_internally.R +++ b/src/functions/gapfill_internally.R @@ -45,6 +45,8 @@ gapfill_internally <- function(survey_form, rename(code_layer = horizon_master) %>% rename(organic_carbon_total = horizon_c_organic_total) %>% rename(organic_carbon_total_source = horizon_c_organic_total_source) %>% + rename(n_total = horizon_n_total) %>% + rename(n_total_source = horizon_n_total_source) %>% rename(unique_survey_repetition = unique_survey_profile) } @@ -1540,7 +1542,8 @@ gapfill_internally <- function(survey_form, rename(code_layer = horizon_master) %>% rename(unique_survey_repetition = unique_survey_profile) %>% # Rename analytical parameters to join - rename(organic_carbon_total = horizon_c_organic_total) + rename(organic_carbon_total = horizon_c_organic_total) %>% + rename(n_total = horizon_n_total) cat(paste0(" \nAdd 'pfh' forest floor analytical data to '", survey_form, "'.\n")) @@ -1570,7 +1573,9 @@ gapfill_internally <- function(survey_form, # layers known, than there are unique below-ground repetitions (profiles): # count_all_ff_nona < count_distinct_bg_up - profs_potential <- df %>% + # Carbon + + profs_potential_c <- df %>% # Filter out redundant layers filter(!is.na(layer_number)) %>% # Filter out forest floor layers without olw @@ -1596,7 +1601,7 @@ gapfill_internally <- function(survey_form, NA_real_)) %>% filter(count_bg_layers_nona > 0) - plots_gapfill <- profs_potential %>% + plots_gapfill_c <- profs_potential_c %>% # Group per unique_survey group_by(unique_survey, code_country, code_plot) %>% reframe(all_any_ff_na = all(any_ff_na == TRUE), @@ -1609,10 +1614,59 @@ gapfill_internally <- function(survey_form, any_ff_na_nona == TRUE) %>% filter(count_all_ff_nona < count_distinct_bg_up) - profs_gapfill <- profs_potential %>% - filter(unique_survey %in% plots_gapfill$unique_survey) %>% + profs_gapfill_c <- profs_potential_c %>% + filter(unique_survey %in% plots_gapfill_c$unique_survey) %>% + filter(any_ff_na == TRUE) + + + # Nitrogen + + profs_potential_n <- df %>% + # Filter out redundant layers + filter(!is.na(layer_number)) %>% + # Filter out forest floor layers without olw + filter(layer_type != "forest_floor" | + (layer_type == "forest_floor" & + !is.na(organic_layer_weight))) %>% + # Group per profile + group_by(unique_survey_repetition, + unique_survey, code_country, code_plot) %>% + reframe(any_ff_na = (any(.data$layer_type == "forest_floor" & + is.na(.data$n_total))), + all_ff_nona = + all(!is.na(n_total[ + which(layer_type == "forest_floor")])), + count_bg_layers = + length(which(.data$layer_type != "forest_floor")), + count_bg_layers_nona = + length(which(.data$layer_type != "forest_floor" & + !is.na(.data$n_total))), + n_total_bg_up = + ifelse(any(layer_number_bg == 1), + n_total[which(layer_number_bg == 1)], + NA_real_)) %>% + filter(count_bg_layers_nona > 0) + + plots_gapfill_n <- profs_potential_n %>% + # Group per unique_survey + group_by(unique_survey, code_country, code_plot) %>% + reframe(all_any_ff_na = all(any_ff_na == TRUE), + any_ff_na_nona = + any(any_ff_na == TRUE) & any(all_ff_nona == TRUE), + count_all_ff_nona = length(which(.data$all_ff_nona == TRUE)), + count_distinct_bg_up = n_distinct(n_total_bg_up, + na.rm = TRUE)) %>% + filter(all_any_ff_na == TRUE | + any_ff_na_nona == TRUE) %>% + filter(count_all_ff_nona < count_distinct_bg_up) + + profs_gapfill_n <- profs_potential_n %>% + filter(unique_survey %in% plots_gapfill_n$unique_survey) %>% filter(any_ff_na == TRUE) + + + # Combine # som @@ -1620,11 +1674,12 @@ gapfill_internally <- function(survey_form, if (survey_form_type == "som") { df <- df %>% + # Carbon mutate( organic_carbon_total_source = ifelse( is.na(organic_carbon_total) & layer_type == "forest_floor" & - unique_survey %in% profs_gapfill$unique_survey, + unique_survey %in% profs_gapfill_c$unique_survey, # Sources for gap-filling case_when( !is.na(som_organic_carbon_total) ~ @@ -1635,13 +1690,36 @@ gapfill_internally <- function(survey_form, organic_carbon_total = ifelse( is.na(organic_carbon_total) & layer_type == "forest_floor" & - unique_survey %in% profs_gapfill$unique_survey, + unique_survey %in% profs_gapfill_c$unique_survey, # Gap-fill coalesce( som_organic_carbon_total, pfh_organic_carbon_total), # Else, don't change - organic_carbon_total)) + organic_carbon_total)) %>% + # Nitrogen + mutate( + n_total_source = ifelse( + is.na(n_total) & + layer_type == "forest_floor" & + unique_survey %in% profs_gapfill_n$unique_survey, + # Sources for gap-filling + case_when( + !is.na(som_n_total) ~ + "som (other repetitions in same period)", + !is.na(pfh_n_total) ~ "pfh (same period)"), + # Original data are kept + n_total_source), + n_total = ifelse( + is.na(n_total) & + layer_type == "forest_floor" & + unique_survey %in% profs_gapfill_n$unique_survey, + # Gap-fill + coalesce( + som_n_total, + pfh_n_total), + # Else, don't change + n_total)) } @@ -1649,11 +1727,12 @@ gapfill_internally <- function(survey_form, if (survey_form_type == "pfh") { df <- df %>% + # carbon mutate( organic_carbon_total_source = ifelse( is.na(organic_carbon_total) & layer_type == "forest_floor" & - unique_survey %in% profs_gapfill$unique_survey, + unique_survey %in% profs_gapfill_c$unique_survey, # Sources for gap-filling case_when( !is.na(pfh_organic_carbon_total) ~ @@ -1664,13 +1743,36 @@ gapfill_internally <- function(survey_form, organic_carbon_total = ifelse( is.na(organic_carbon_total) & layer_type == "forest_floor" & - unique_survey %in% profs_gapfill$unique_survey, + unique_survey %in% profs_gapfill_c$unique_survey, # Gap-fill coalesce( pfh_organic_carbon_total, som_organic_carbon_total), # Else, don't change - organic_carbon_total)) + organic_carbon_total)) %>% + # nitrogen + mutate( + n_total_source = ifelse( + is.na(n_total) & + layer_type == "forest_floor" & + unique_survey %in% profs_gapfill_n$unique_survey, + # Sources for gap-filling + case_when( + !is.na(pfh_n_total) ~ + "pfh (other repetitions in same period)", + !is.na(som_n_total) ~ "som (same period)"), + # Original data are kept + n_total_source), + n_total = ifelse( + is.na(n_total) & + layer_type == "forest_floor" & + unique_survey %in% profs_gapfill_n$unique_survey, + # Gap-fill + coalesce( + pfh_n_total, + som_n_total), + # Else, don't change + n_total)) } } # End of "if gapfill_ff_concentrations is true" @@ -1689,6 +1791,8 @@ gapfill_internally <- function(survey_form, rename(horizon_master = code_layer) %>% rename(horizon_c_organic_total = organic_carbon_total) %>% rename(horizon_c_organic_total_source = organic_carbon_total_source) %>% + rename(horizon_n_total = n_total) %>% + rename(horizon_n_total_source = n_total_source) %>% rename(unique_survey_profile = unique_survey_repetition) } diff --git a/src/functions/graph_interval.R b/src/functions/graph_interval.R index 2210f61..e5f4fee 100644 --- a/src/functions/graph_interval.R +++ b/src/functions/graph_interval.R @@ -3,10 +3,13 @@ graph_interval <- function(data, response, group, + shorter_var_name = "c", + src_survey = "s1_so", aspect.ratio = NULL, width = 6.81, mode = "light", version = "", + x_min = 0, x_max = 100, number_of_groups = NULL, x_title = NULL, @@ -25,6 +28,32 @@ graph_interval <- function(data, col_front <- "black" } + if (shorter_var_name == "c") { + var_name <- "Carbon" + unit_pre <- "t" + } else if (shorter_var_name == "n") { + var_name <- "Nitrogen" + unit_pre <- "t" + } else if (shorter_var_name == "extrac_p") { + var_name <- "Extractable phosphorus" + unit_pre <- "kg" + } + + if (shorter_var_name == "c") { + var_letter <- "C" + } else if (shorter_var_name == "n") { + var_letter <- "N" + } else if (shorter_var_name == "extrac_p") { + var_letter <- "P" + } + + if (src_survey == "s1_so") { + src_survey_name <- "Level I and Level II" + } else if (src_survey == "s1") { + src_survey_name <- "Level I" + } else if (src_survey == "so") { + src_survey_name <- "Level II" + } if (is.null(path_export)) { path_export <- "./output/stocks/" @@ -102,9 +131,22 @@ graph_interval <- function(data, decreasing = FALSE)]) response_name <- case_when( - response == "stock_forest_floor" ~ "(forest floor)", - response == "stock" ~ "(forest floor + soil)", - response == "stock_topsoil" ~ "(forest floor + topsoil)", + response == "stock_forest_floor" ~ + paste0("**", var_name, " stock** (forest floor)"), + response == "stock" ~ + paste0("**", var_name, " stock** (forest floor + soil)"), + response == "stock_topsoil" ~ + paste0("**", var_name, " stock** (forest floor + topsoil)"), + response == "di_ff_top" ~ + paste0("**", var_letter, + "DI** (*forest floor* / *below-ground topsoil* ", + str_to_lower(var_name), + " stock)"), + response == "di_top_soil" ~ + paste0("**", var_letter, + "DI** (*below-ground topsoil* / *below-ground* ", + str_to_lower(var_name), + " stock)"), .default = "") group_name <- case_when( @@ -150,7 +192,7 @@ p <- ggplot() + x = Mean, shape = type), color = "white", size = 1.2) + - coord_cartesian(xlim = c(0, x_max), + coord_cartesian(xlim = c(x_min, x_max), expand = FALSE) + scale_color_viridis_d(option = "viridis", direction = -1, @@ -165,15 +207,14 @@ p <- ggplot() + scale_shape_manual(values = c("Mean + 95 % CI\n(bootstrapped)" = 16), name = "") + labs( - title = paste0("**Carbon stock** ", - response_name, + title = paste0(response_name, "
", "as a function of **", group_name, "**"), - subtitle = paste0("(ICP Forests Level I and Level II)"), - x = paste0("**Carbon stock** ", + subtitle = paste0("(ICP Forests ", src_survey_name, ")"), + x = paste0("**", var_name, " stock** ", "(mean over time)", " ", #"
", - "(t C ha-1)"), + "(", unit_pre, " ", var_letter, " ha-1)"), y = NULL, # "Mean + 95 % conf. int. mean (bootstrapped)" caption = caption) + diff --git a/src/functions/tidy.R b/src/functions/tidy.R index ef04616..8056c72 100644 --- a/src/functions/tidy.R +++ b/src/functions/tidy.R @@ -38,6 +38,10 @@ tidy <- function(survey_form, "coarse_fragment_vol_fscdb", "horizon_bulk_dens_est", "horizon_coarse_weight", + "n_total_fscdb", + "n_total_afscdb", + "pfh_n_total", + "som_n_total", "organic_carbon_total_afscdb", "organic_layer_weight_afscdb", "organic_layer_weight_bd", diff --git a/src/stock_calculations/functions/check_stock_plausibility.R b/src/stock_calculations/functions/check_stock_plausibility.R index 39bb92c..127fc7c 100644 --- a/src/stock_calculations/functions/check_stock_plausibility.R +++ b/src/stock_calculations/functions/check_stock_plausibility.R @@ -1,5 +1,8 @@ -check_stock_plausibility <- function(data_frame) { +check_stock_plausibility <- function(data_frame, + parameter = "organic_carbon_total", + shorter_var_name = "c", + survey_form_orig) { stopifnot(require("tidyverse"), require("assertthat"), @@ -13,14 +16,30 @@ check_stock_plausibility <- function(data_frame) { require("triangle"), require("broom")) - cat(paste0(" \nCheck plausibility of stocks\n")) + if (length(unlist(strsplit(survey_form_orig, "_"))) == 2) { + + s1_pre <- paste0("s1_", unlist(strsplit(survey_form_orig, "_"))[2], "_") + so_pre <- paste0("so_", unlist(strsplit(survey_form_orig, "_"))[2], "_") + + } else if (length(unlist(strsplit(survey_form_orig, "_"))) == 1) { + + s1_pre <- "s1_" + so_pre <- "so_" + } + + + + + cat(paste0(" \nCheck plausibility of '", parameter, "' stocks\n")) source("./src/functions/get_env.R") df <- data_frame %>% mutate( - plausible_fscc = TRUE, - implausible_fscc_reason = NA) + plaus_total_fscc = TRUE, + plaus_forest_floor_fscc = TRUE, + use_stock_topsoil = FALSE, + implaus_fscc_reason = NA) # Assert that these variables exist @@ -47,424 +66,1525 @@ check_stock_plausibility <- function(data_frame) { # Retrieve data per layer - df_layer <- get_env(paste0(code_survey, "_layers")) + df_layer <- get_env(paste0(survey_form_orig, "_layers")) - # Retrieve data per profile - profile_stocks <- get_env(paste0(code_survey, "_profile_c_stocks")) + # If TOC ---- - # Define plausible limits ---- + if (parameter == "organic_carbon_total") { - if (exists("plot_c_stocks_summ")) { - plaus_range_stock <- get_env("plot_c_stocks_summ") %>% - pull(stock) %>% - quantile(c(0.005, 0.995), na.rm = TRUE) %>% - round(1) + # Retrieve data per profile - plaus_range_stock_change <- get_env("plot_c_stocks_summ") %>% - pull(stock_change) %>% - quantile(c(0.05, 0.95), na.rm = TRUE) %>% - round(1) # approximately +-5 t C ha-1 year-1 + profile_stocks <- get_env(paste0(code_survey, "_profile_c_stocks")) - plaus_range_stock_change_rel <- get_env("plot_c_stocks_summ") %>% - pull(stock_change_rel) %>% - quantile(c(0.005, 0.995), na.rm = TRUE) %>% - round(1) # approximately +-10 % year-1 - } + # Define plausible limits ---- - plaus_range_stock <- c(21, 894) - plaus_range_stock_change <- c(-5, 5) - plaus_range_stock_change_rel <- c(-10, 10) - - - if (exists("so_layers") && - exists("s1_layers")) { - - plaus_range_olw <- bind_rows(get_env("so_layers"), - get_env("s1_layers")) %>% - group_by(plot_id, survey_form, survey_year) %>% - reframe(organic_layer_weight = - sum(organic_layer_weight, na.rm = TRUE)) %>% - arrange(organic_layer_weight) %>% - pull(organic_layer_weight) %>% - quantile(c(0.995), na.rm = TRUE) %>% - round(1) - - plaus_range_toc_ff <- bind_rows(get_env("so_layers"), - get_env("s1_layers")) %>% - filter(layer_type == "forest_floor") %>% - pull(parameter_for_stock) %>% - quantile(c(0.005, 0.995), na.rm = TRUE) %>% - round(1) - - plaus_range_toc_bg <- bind_rows(get_env("so_layers"), - get_env("s1_layers")) %>% - group_by(plot_id, survey_form, survey_year) %>% - reframe(toc_max = ifelse( - any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), - max(parameter_for_stock[which(layer_type != "forest_floor")], - na.rm = TRUE), - NA_real_)) %>% - pull(toc_max) %>% - quantile(c(0.005), na.rm = TRUE) %>% - round(1) - - plaus_range_c_density <- bind_rows(get_env("so_layers"), - get_env("s1_layers")) %>% - pull(density) %>% - quantile(c(0.975), na.rm = TRUE) %>% - round(1) - - plaus_range_toc_ff_change <- bind_rows(get_env("so_layers"), - get_env("s1_layers")) %>% - group_by(plot_id, survey_form, survey_year) %>% - reframe(toc_avg = ifelse( - any(!is.na(parameter_for_stock[which(layer_type == "forest_floor")])), - mean(parameter_for_stock[which(layer_type == "forest_floor")], - na.rm = TRUE), - NA_real_)) %>% - filter(!is.na(toc_avg)) %>% - # Calculate slope of linear regression - # (the TOc should not change too drastically) - group_by(plot_id, survey_form) %>% - filter(n() >= 2) %>% # Filter groups with at least two records - do(tidy(lm(toc_avg ~ survey_year, data = .))) %>% - filter(term == "survey_year") %>% - select(plot_id, survey_form, estimate) %>% - rename(slope_toc_ff = estimate) %>% - pull(slope_toc_ff) %>% - quantile(c(0.025, 0.975), na.rm = TRUE) %>% - round(1) + if (exists("plot_c_stocks_summ")) { - } + plaus_range_stock <- get_env("plot_c_stocks_summ") %>% + pull(stock) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) - plaus_range_olw <- 143 - plaus_range_toc_ff <- c(42, 567) - plaus_range_toc_bg <- 3 - plaus_range_c_density <- 8 - plaus_range_toc_ff_change <- c(-25, 25) + plaus_range_stock_change <- get_env("plot_c_stocks_summ") %>% + pull(stock_change) %>% + quantile(c(0.05, 0.95), na.rm = TRUE) %>% + round(1) # approximately +-5 t C ha-1 year-1 - # Define rules ---- + plaus_range_stock_change_rel <- get_env("plot_c_stocks_summ") %>% + pull(stock_change_rel) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) # approximately +-10 % year-1 + } - # First set of rules: plausibility of a values - # within a certain plot x survey_year. - # Stock flagged as implausible if not met. + plaus_range_stock <- c(21, 894) + plaus_range_stock_change <- c(-5, 5) + plaus_range_stock_change_rel <- c(-10, 10) - rule_1 <- "Stock should be between 21 and 894 t C ha-1 (99 % quantile)." - rule_2 <- "Organic layer weight should be below 143 kg m-2 (99 % quantile)." - rule_3 <- paste0("Total organic carbon of forest floor should be ", - "between 42 and 567 g kg-1 (99 % quantile).") - rule_4 <- paste0("Max. below-ground total organic carbon should be ", - "above 3 g kg-1 (99 % quantile).") - rule_5 <- "Carbon density should be below 8 t C ha-1 cm-1 (~95 % quantile)." - rule_6 <- "Plots without Mull as humus form should have a forest floor." + if (exists("so_layers") && + exists("s1_layers")) { + plaus_range_olw <- bind_rows(get_env("so_layers"), + get_env("s1_layers")) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(organic_layer_weight = + sum(organic_layer_weight, na.rm = TRUE)) %>% + arrange(organic_layer_weight) %>% + pull(organic_layer_weight) %>% + quantile(c(0.975), na.rm = TRUE) %>% + round(1) - # Second set of rules: plausibility of changes between survey years - # from a certain plot. - # Stock of earliest survey year flagged as implausible if not met. + plaus_range_toc_ff <- bind_rows(get_env("so_layers"), + get_env("s1_layers")) %>% + filter(layer_type == "forest_floor") %>% + pull(parameter_for_stock) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) - rule_7 <- paste0("Annual sequestration rate should be ", - "between -5 and 5 t C ha-1 year-1 (~90 % quantile).") - rule_8 <- paste0("Rel. annual sequestration rate should be ", - "between -10 and 10 % year-1 (99 % quantile).") + plaus_range_toc_bg <- bind_rows(get_env("so_layers"), + get_env("s1_layers")) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(toc_max = ifelse( + any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), + max(parameter_for_stock[which(layer_type != "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + pull(toc_max) %>% + quantile(c(0.005), na.rm = TRUE) %>% + round(1) + + plaus_range_c_density <- bind_rows(get_env("so_layers"), + get_env("s1_layers")) %>% + pull(density) %>% + quantile(c(0.975), na.rm = TRUE) %>% + round(1) + + plaus_range_toc_ff_change <- bind_rows(get_env("so_layers"), + get_env("s1_layers")) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(toc_avg = ifelse( + any(!is.na(parameter_for_stock[which(layer_type == "forest_floor")])), + mean(parameter_for_stock[which(layer_type == "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(toc_avg)) %>% + # Calculate slope of linear regression + # (the TOc should not change too drastically) + group_by(plot_id, survey_form) %>% + filter(n() >= 2) %>% # Filter groups with at least two records + do(broom::tidy(lm(toc_avg ~ survey_year, data = .))) %>% + filter(term == "survey_year") %>% + select(plot_id, survey_form, estimate) %>% + rename(slope_toc_ff = estimate) %>% + pull(slope_toc_ff) %>% + quantile(c(0.025, 0.975), na.rm = TRUE) %>% + round(1) - rule_9 <- paste0("Change rate of mean forest floor total organic carbon ", - "should be between -25 and 25 g kg-1 year-1 ", - "(~95 % quantile).") + } + plaus_range_olw <- 64 + plaus_range_toc_ff <- c(42, 567) + plaus_range_toc_bg <- 3 + plaus_range_c_density <- 8 + plaus_range_toc_ff_change <- c(-25, 25) - # plausible_fscc - # implausible_fscc_reason + # Define rules ---- - # Evaluate for different plots ---- + ## Total/below-ground stock ---- - plot_ids <- unique(df$plot_id) + # If these rules are not met, the total stock or at least the below-ground + # stock is considered implausible, which implies that FSCC recommends not + # to use the stock data from the given plot survey. - # Set progress bar - progress_bar <- txtProgressBar(min = 0, - max = length(plot_ids), style = 3) + # First set of rules: plausibility of a values + # within a certain plot x survey_year. + # Stock flagged as implausible if not met. - for (i in seq_along(plot_ids)) { + rule_1 <- "Stock should be between 21 and 894 t C ha-1 (99 % quantile)." - vec_i <- which(df$plot_id == plot_ids[i]) + rule_2 <- paste0("Max. below-ground total organic carbon should be ", + "above 3 g kg-1 (99 % quantile).") - vec_som_i <- which(df$plot_id == plot_ids[i] & - grepl("som", df$survey_form)) + rule_3 <- "Carbon density should be below 8 t C ha-1 cm-1 (~95 % quantile)." - vec_pfh_i <- which(df$plot_id == plot_ids[i] & - grepl("pfh", df$survey_form)) + # Second set of rules: plausibility of changes between survey years + # from a certain plot. + # Stock of earliest survey year flagged as implausible if not met. - df_i <- df %>% - filter(plot_id == plot_ids[i]) + rule_4 <- paste0("Annual sequestration rate should be ", + "between -5 and 5 t C ha-1 year-1 (~90 % quantile).") - # Individual plot surveys ---- + rule_5 <- paste0("Rel. annual sequestration rate should be ", + "between -10 and 10 % year-1 (99 % quantile).") - for (j in vec_i) { - implausible_reasons_j <- NULL - ## rule_1 ---- - # "Stock should be between 21 and 894 t C ha-1 (99 % quantile)" - if (df$stock[j] < plaus_range_stock[1] || - df$stock[j] > plaus_range_stock[2]) { + ## Subsoil stock ---- - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_1, - implausible_reasons_j) - } + rule_6 <- paste0("Observation depth should be >= 30 cm or ", + ">= 70 % of eff_soil_depth.") - ## rule_2 ---- - # "Organic layer weight should be below 143 kg m-2 (99 % quantile)." - olw_j <- df_layer %>% - filter(plot_id == plot_ids[i]) %>% - filter(survey_form == df$survey_form[j]) %>% - filter(survey_year == df$survey_year[j]) %>% - filter(layer_type == "forest_floor") %>% - group_by(plot_id, profile_id_form, - survey_form, survey_year) %>% - reframe(organic_layer_weight = ifelse( - any(!is.na(organic_layer_weight)), - sum(organic_layer_weight, na.rm = TRUE), - NA_real_)) %>% - filter(!is.na(organic_layer_weight)) %>% - pull(organic_layer_weight) - if (any(olw_j > plaus_range_olw)) { - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_2, - implausible_reasons_j) - } + ## Forest floor stock ---- + # If these rules are not met, the forest floor stock is considered + # implausible, which implies that below-ground stocks (stock_below_ground + # and stock_below_ground_topsoil) can still be used if plaus_total_fscc is + # TRUE. + # FSCC only recommends not to use the stock data from the forest floor + # (stock_forest_floor) and total stock (stock and stock_topsoil), as these + # include forest floor stocks. - ## rule_3 ---- - # "Total organic carbon of forest floor should be - # between 42 and 567 g kg-1 (99 % quantile)." + # First set of rules: plausibility of a values + # within a certain plot x survey_year. + # Stock flagged as implausible if not met. - toc_ff_j <- df_layer %>% - filter(plot_id == plot_ids[i]) %>% - filter(survey_form == df$survey_form[j]) %>% - filter(survey_year == df$survey_year[j]) %>% - filter(layer_type == "forest_floor") %>% - filter(!is.na(parameter_for_stock)) %>% - pull(parameter_for_stock) + rule_7 <- paste0("Plots should have non-OL forest floor layers, ", + "unless their humus form is Mull.") - if (any(toc_ff_j < plaus_range_toc_ff[1]) || - any(toc_ff_j > plaus_range_toc_ff[2])) { + rule_8 <- "Non-OL layers should have a known organic layer weight." - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_3, - implausible_reasons_j) - } + rule_9 <- "Organic layer weight should be below 64 kg m-2 (95 % quantile)." + rule_10 <- paste0("Total organic carbon of forest floor should be ", + "between 42 and 567 g kg-1 (99 % quantile).") - ## rule_4 ---- - # "Max. below-ground total organic carbon should be - # above 3 g kg-1 (99 % quantile)." + # Second set of rules: plausibility of changes between survey years + # from a certain plot. + # Stock of earliest survey year flagged as implausible if not met. - toc_bg_j <- df_layer %>% - filter(plot_id == plot_ids[i]) %>% - filter(survey_form == df$survey_form[j]) %>% - filter(survey_year == df$survey_year[j]) %>% - filter(layer_type != "forest_floor") %>% - group_by(plot_id, profile_id_form, - survey_form, survey_year) %>% - reframe(toc_max = ifelse( - any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), - max(parameter_for_stock[which(layer_type != "forest_floor")], - na.rm = TRUE), - NA_real_)) %>% - filter(!is.na(toc_max)) %>% - pull(toc_max) + rule_11 <- paste0("Change rate of mean forest floor total organic carbon ", + "should be between -25 and 25 g kg-1 year-1 ", + "(~95 % quantile).") - if (any(toc_bg_j < plaus_range_toc_bg)) { - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_4, - implausible_reasons_j) - } - ## rule_5 ---- - # "Carbon density should be below 8 t C ha-1 cm-1 (~95 % quantile)." - plaus_range_c_density + # plausible_fscc + # implausible_fscc_reason - c_dens_j <- df_layer %>% - filter(plot_id == plot_ids[i]) %>% - filter(survey_form == df$survey_form[j]) %>% - filter(survey_year == df$survey_year[j]) %>% - filter(!is.na(density)) %>% - pull(density) + # Evaluate for different plots ---- - if (any(c_dens_j > plaus_range_c_density)) { + plot_ids <- unique(df$plot_id) - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_5, - implausible_reasons_j) - } + # Set progress bar + progress_bar <- txtProgressBar(min = 0, + max = length(plot_ids), style = 3) + for (i in seq_along(plot_ids)) { - ## rule_6 ---- - # "Plots without Mull as humus form should have any non-OL forest floor." + vec_i <- which(df$plot_id == plot_ids[i]) - if ((is.na(df$unknown_forest_floor[j]) || - df$unknown_forest_floor[j] == FALSE) && - (is.na(df$stock_forest_floor[j]) || - df$stock_forest_floor[j] == 0) && - grepl("mull", df$humus_form[j], ignore.case = TRUE)) { + vec_som_i <- which(df$plot_id == plot_ids[i] & + grepl("som", df$survey_form)) - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_6, - implausible_reasons_j) + vec_pfh_i <- which(df$plot_id == plot_ids[i] & + grepl("pfh", df$survey_form)) - } + df_i <- df %>% + filter(plot_id == plot_ids[i]) - if (!identical(implausible_reasons_j, NULL)) { + ## Total/below-ground stock ---- - df$implausible_fscc_reason[j] <- implausible_reasons_j + # If these rules are not met, the total stock or at least the below-ground + # stock is considered implausible, which implies that FSCC recommends not + # to use the stock data from the given plot survey. - } - } # End of j in vec_i + ## Individual plot surveys ---- + for (j in vec_i) { + implausible_reasons_j <- NULL - # Change between surveys for individual plot ---- + ### rule_1 ---- + # "Stock should be between 21 and 894 t C ha-1 (99 % quantile)" - for (vec_selected_i in c("vec_som_i", "vec_pfh_i")) { + if (df$stock[j] < plaus_range_stock[1] || + df$stock[j] > plaus_range_stock[2]) { - vec_sel_i <- get(vec_selected_i) + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_1, + implausible_reasons_j) + } - if (identical(vec_sel_i, integer(0))) { - next - } - if (length(vec_sel_i) <= 1) { - next - } + ### rule_2 ---- + # "Max. below-ground total organic carbon should be + # above 3 g kg-1 (99 % quantile)." + + toc_bg_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type != "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(toc_max = ifelse( + any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), + max(parameter_for_stock[which(layer_type != "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(toc_max)) %>% + pull(toc_max) + + if (any(toc_bg_j < plaus_range_toc_bg)) { - vec_sel_i <- vec_sel_i[order(df$survey_year[vec_sel_i])] + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_2, + implausible_reasons_j) + } - for (j in vec_sel_i) { - implausible_reasons_j <- NULL - # Evaluate based on the earliest survey, - # assuming that this one is overall less reliable + ### rule_3 ---- + # "Carbon density should be below 8 t C ha-1 cm-1 (~95 % quantile)." + + c_dens_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(!is.na(density)) %>% + pull(density) + + if (any(c_dens_j > plaus_range_c_density)) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_3, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + + } # End of j in vec_i + + - if (which(j == vec_sel_i) == length(vec_sel_i)) { + ## Change between surveys for individual plot ---- + + for (vec_selected_i in c("vec_som_i", "vec_pfh_i")) { + + vec_sel_i <- get(vec_selected_i) + + if (identical(vec_sel_i, integer(0))) { + next + } + + if (length(vec_sel_i) <= 1) { next } - # Row index of next survey year to compare with + vec_sel_i <- vec_sel_i[order(df$survey_year[vec_sel_i])] - j_next <- vec_sel_i[which(j == vec_sel_i) + 1] + for (j in vec_sel_i) { + implausible_reasons_j <- NULL - ## rule_7 ---- - # "Annual sequestration rate should be - # between -5 and 5 t C ha-1 year-1 (~90 % quantile)." + # Evaluate based on the earliest survey, + # assuming that this one is overall less reliable - seq_rate_j <- (df$stock[j_next] - df$stock[j]) / - (df$survey_year[j_next] - df$survey_year[j]) + if (which(j == vec_sel_i) == length(vec_sel_i)) { + next + } - if (seq_rate_j < plaus_range_stock_change[1] || - seq_rate_j > plaus_range_stock_change[2]) { + # Row index of next survey year to compare with - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_7, + j_next <- vec_sel_i[which(j == vec_sel_i) + 1] + + + ### rule_4 ---- + # "Annual sequestration rate should be + # between -5 and 5 t C ha-1 year-1 (~90 % quantile)." + + seq_rate_j <- (df$stock[j_next] - df$stock[j]) / + (df$survey_year[j_next] - df$survey_year[j]) + + if (seq_rate_j < plaus_range_stock_change[1] || + seq_rate_j > plaus_range_stock_change[2]) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_4, + implausible_reasons_j) + } + + + + ### rule_5 ---- + # "Rel. annual sequestration rate should be + # between -10 and 10 % year-1 (99 % quantile)." + + seq_rate_rel_j <- ((df$stock[j_next] - df$stock[j]) / + (df$survey_year[j_next] - df$survey_year[j])) / df$stock[j] * 100 + + if (seq_rate_rel_j < plaus_range_stock_change_rel[1] || + seq_rate_rel_j > plaus_range_stock_change_rel[2]) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_5, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } # End of j (with j_next) in vec_i + + } # End of som versus pfh + + + + + + ## Subsoil stock ---- + + for (j in vec_i) { + + implausible_reasons_j <- NULL + + ### rule_6 ---- + # "Observation depth should be >= 30 cm or >= 70 % of eff_soil_depth." + + obs_depth_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type != "forest_floor") + + if ("gapfilled_post_layer1" %in% names(obs_depth_j)) { + + obs_depth_j <- obs_depth_j %>% + filter(is.na(gapfilled_post_layer1) | + gapfilled_post_layer1 == "") + } + + use_stock_topsoil_j <- obs_depth_j %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(obs_depth = max(depth_bottom, na.rm = TRUE), + depth_stock = unique(depth_stock)) %>% + rowwise() %>% + mutate( + use_stock_topsoil = + (.data$obs_depth < 30 & + .data$obs_depth < 0.7 * .data$depth_stock)) %>% + ungroup() %>% + pull(use_stock_topsoil) + + + if (any(use_stock_topsoil_j == TRUE)) { + + df$use_stock_topsoil[j] <- TRUE + implausible_reasons_j <- paste(rule_6, implausible_reasons_j) } - ## rule_8 ---- - # "Rel. annual sequestration rate should be - # between -10 and 10 % year-1 (99 % quantile)." - seq_rate_rel_j <- ((df$stock[j_next] - df$stock[j]) / - (df$survey_year[j_next] - df$survey_year[j])) / df$stock[j] * 100 + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } + + + + + + + ## Forest floor stock ---- + + # If these rules are not met, the forest floor stock is considered + # implausible, which implies that below-ground stocks (stock_below_ground + # and stock_below_ground_topsoil) can still be used if plaus_total_fscc is TRUE. + # FSCC only recommends not to use the stock data from the forest floor + # (stock_forest_floor) and total stock (stock and stock_topsoil), as these + # include forest floor stocks. + + ## Individual plot surveys ---- + + for (j in vec_i) { + + implausible_reasons_j <- NULL + + + ### rule_7 ---- + # "Plots should have non-OL forest floor layers + # unless their humus form is Mull." + + ff_layers_j <- df_layer %>% + # Layers containing "L" are already filtered out + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") + + # No need to further group it per profile and then per plot, because + # any non-OL forest floor layers in whichever profile are already + # sufficient - if (seq_rate_rel_j < plaus_range_stock_change_rel[1] || - seq_rate_rel_j > plaus_range_stock_change_rel[2]) { + if (nrow(ff_layers_j) == 0 && + !grepl("mull", df$humus_form[j], ignore.case = TRUE)) { - df$plausible_fscc[j] <- FALSE - implausible_reasons_j <- paste(rule_8, + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_7, implausible_reasons_j) + } - ## rule_9 ---- - # "Change rate of mean forest floor total organic carbon - # should be between -25 and 25 g kg-1 year-1 (~95 % quantile)." - toc_ff_rate_j <- df_layer %>% + ### rule_8 ---- + # "Non-OL layers should have a known organic layer weight." + + ff_olw_pres_j <- df_layer %>% + # Layers containing "L" are already filtered out filter(plot_id == plot_ids[i]) %>% filter(survey_form == df$survey_form[j]) %>% - filter(survey_year %in% df$survey_year[c(j, j_next)]) %>% + filter(survey_year == df$survey_year[j]) %>% filter(layer_type == "forest_floor") %>% group_by(plot_id, profile_id_form, survey_form, survey_year) %>% - reframe(toc_avg = ifelse( - any(!is.na( - parameter_for_stock[which(layer_type == "forest_floor")])), - weighted.mean( - parameter_for_stock[which(layer_type == "forest_floor")], - w = organic_layer_weight, - na.rm = TRUE), - NA_real_)) %>% - group_by(plot_id, survey_form, survey_year) %>% - reframe(toc_avg = mean(toc_avg, na.rm = TRUE)) %>% - filter(!is.na(toc_avg)) + reframe( + all_pres_olw = all(!is.na(organic_layer_weight))) %>% + ungroup() + + # As soon as one of the profiles has a complete set of OLW data, + # we can estimate a forest floor carbon stock + + if (nrow(ff_olw_pres_j) > 0) { + + if (all(ff_olw_pres_j$all_pres_olw == FALSE)) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_8, + implausible_reasons_j) + + } + } + - if (n_distinct(toc_ff_rate_j$survey_year) == 2) { + ### rule_9 ---- + # "Organic layer weight should be below 64 kg m-2 (95 % quantile)." - toc_ff_rate_j <- toc_ff_rate_j %>% - group_by(plot_id, survey_form) %>% - do(tidy(lm(toc_avg ~ survey_year, data = .))) %>% - filter(term == "survey_year") %>% - pull(estimate) + olw_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(organic_layer_weight = ifelse( + any(!is.na(organic_layer_weight)), + sum(organic_layer_weight, na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(organic_layer_weight)) %>% + pull(organic_layer_weight) - if (toc_ff_rate_j < plaus_range_toc_ff_change[1] || - toc_ff_rate_j > plaus_range_toc_ff_change[2]) { + if (!identical(olw_j, numeric(0))) { - df$plausible_fscc[j] <- FALSE + if (any(olw_j > plaus_range_olw)) { + + df$plaus_forest_floor_fscc[j] <- FALSE implausible_reasons_j <- paste(rule_9, implausible_reasons_j) } } + ### rule_10 ---- + # "Total organic carbon of forest floor should be + # between 42 and 567 g kg-1 (99 % quantile)." + + toc_ff_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") %>% + filter(!is.na(parameter_for_stock)) %>% + pull(parameter_for_stock) + + if (any(toc_ff_j < plaus_range_toc_ff[1]) || + any(toc_ff_j > plaus_range_toc_ff[2])) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_10, + implausible_reasons_j) + } + + + if (!identical(implausible_reasons_j, NULL)) { - if (!is.na(df$implausible_fscc_reason[j])) { + if (!is.na(df$implaus_fscc_reason[j])) { - df$implausible_fscc_reason[j] <- - paste(df$implausible_fscc_reason[j], + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], implausible_reasons_j) } else { - df$implausible_fscc_reason[j] <- implausible_reasons_j + df$implaus_fscc_reason[j] <- implausible_reasons_j } } - } # End of j (with j_next) in vec_i + } # End of j in vec_i + + + + + ## Change between surveys for individual plot ---- + + for (vec_selected_i in c("vec_som_i", "vec_pfh_i")) { + + vec_sel_i <- get(vec_selected_i) + + if (identical(vec_sel_i, integer(0))) { + next + } + + if (length(vec_sel_i) <= 1) { + next + } + + vec_sel_i <- vec_sel_i[order(df$survey_year[vec_sel_i])] + + for (j in vec_sel_i) { + + implausible_reasons_j <- NULL + + # Evaluate based on the earliest survey, + # assuming that this one is overall less reliable + + if (which(j == vec_sel_i) == length(vec_sel_i)) { + next + } + + # Row index of next survey year to compare with + + j_next <- vec_sel_i[which(j == vec_sel_i) + 1] + + + + ### rule_11 ---- + # "Change rate of mean forest floor total organic carbon + # should be between -25 and 25 g kg-1 year-1 (~95 % quantile)." + + toc_ff_rate_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year %in% df$survey_year[c(j, j_next)]) %>% + filter(layer_type == "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + mutate( + organic_layer_weight = ifelse( + any(is.na(organic_layer_weight)), + 1, + organic_layer_weight)) %>% + reframe(toc_avg = ifelse( + any(!is.na( + parameter_for_stock[which(layer_type == "forest_floor")])), + weighted.mean( + parameter_for_stock[which(layer_type == "forest_floor")], + w = organic_layer_weight, + na.rm = TRUE), + NA_real_)) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(toc_avg = mean(toc_avg, na.rm = TRUE)) %>% + filter(!is.na(toc_avg)) + + if (n_distinct(toc_ff_rate_j$survey_year) == 2) { + + toc_ff_rate_j <- toc_ff_rate_j %>% + group_by(plot_id, survey_form) %>% + do(broom::tidy(lm(toc_avg ~ survey_year, data = .))) %>% + filter(term == "survey_year") %>% + pull(estimate) + + if (toc_ff_rate_j < plaus_range_toc_ff_change[1] || + toc_ff_rate_j > plaus_range_toc_ff_change[2]) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_11, + implausible_reasons_j) + } + } + + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } # End of j (with j_next) in vec_i + + } # End of som versus pfh + + + + + # Update progress bar + setTxtProgressBar(progress_bar, i) + + } # End of evaluation plot_ids + + + close(progress_bar) + + + + + } else # End of "if organic_carbon_total" + + + # . ---- + # If not TOC ---- + + { + + + # Retrieve data per profile + + profile_stocks <- get_env(paste0(survey_form_orig, "_profile_", + shorter_var_name, + "_stocks")) + + # Define plausible limits ---- + + if (exists(paste0("plot_", shorter_var_name, "_stocks_summ"))) { + + plaus_range_stock <- + get_env(paste0("plot_", shorter_var_name, "_stocks_summ")) %>% + pull(stock) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) + + plaus_range_stock_change <- + get_env(paste0("plot_", shorter_var_name, "_stocks_summ")) %>% + pull(stock_change) %>% + quantile(c(0.05, 0.95), na.rm = TRUE) %>% + round(1) + + plaus_range_stock_change_rel <- + get_env(paste0("plot_", shorter_var_name, "_stocks_summ")) %>% + pull(stock_change_rel) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) # approximately +-10 % year-1 + } + + if (parameter == "n_total") { + + plaus_range_stock <- c(1, 30) # To correct + plaus_range_stock_change <- c(-0.3, 0.3) # To correct + + } else if (parameter == "extrac_p") { + + plaus_range_stock <- c(143, 8255) # To correct + plaus_range_stock_change <- c(-110, 110) # To correct + + } else { + + warning("Plausible limits for this parameter unknown!") + } + + plaus_range_stock_change_rel <- c(-10, 10) + + + + + if (exists(paste0(so_pre, "layers")) && + exists(paste0(s1_pre, "layers"))) { + + plaus_range_olw <- bind_rows(get_env(paste0(so_pre, "layers")) # , + # get_env(paste0(s1_pre, "layers")) + ) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(organic_layer_weight = + sum(organic_layer_weight, na.rm = TRUE)) %>% + arrange(organic_layer_weight) %>% + pull(organic_layer_weight) %>% + quantile(c(0.975), na.rm = TRUE) %>% + round(1) + + plaus_range_par_ff <- bind_rows(get_env(paste0(so_pre, "layers")) # , + # get_env(paste0(s1_pre, "layers")) + ) %>% + filter(layer_type == "forest_floor") %>% + pull(parameter_for_stock) %>% + quantile(c(0.005, 0.995), na.rm = TRUE) %>% + round(1) + + plaus_range_par_bg <- bind_rows(get_env(paste0(so_pre, "layers")) # , + # get_env(paste0(s1_pre, "layers")) + ) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(toc_max = ifelse( + any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), + max(parameter_for_stock[which(layer_type != "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + pull(toc_max) %>% + quantile(c(0.005), na.rm = TRUE) %>% + round(1) + + plaus_range_par_density <- bind_rows(get_env(paste0(so_pre, "layers")) # , + # get_env(paste0(s1_pre, "layers")) + ) %>% + pull(density) %>% + quantile(c(0.975), na.rm = TRUE) %>% + round(1) + + plaus_range_par_ff_change <- bind_rows(get_env(paste0(so_pre, "layers")) + # , + # get_env(paste0(s1_pre, "layers")) + ) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(par_avg = ifelse( + any(!is.na(parameter_for_stock[which(layer_type == "forest_floor")])), + mean(parameter_for_stock[which(layer_type == "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(par_avg)) %>% + # Calculate slope of linear regression + # (the TOc should not change too drastically) + group_by(plot_id, survey_form) %>% + filter(n() >= 2) %>% # Filter groups with at least two records + do(broom::tidy(lm(par_avg ~ survey_year, data = .))) %>% + filter(term == "survey_year") %>% + select(plot_id, survey_form, estimate) %>% + rename(slope_par_ff = estimate) %>% + pull(slope_par_ff) %>% + quantile(c(0.025, 0.975), na.rm = TRUE) %>% + round(1) + + } + + plaus_range_olw <- 64 + + if (parameter == "n_total") { + + plaus_range_par_ff <- c(3, 24) # To correct + plaus_range_par_bg <- 0.2 # To correct + plaus_range_par_density <- 0.5 # To correct + plaus_range_par_ff_change <- c(-1.5, 1.5) # To correct + + } else if (parameter == "extrac_p") { - } # End of som versus pfh + plaus_range_par_ff <- c(132, 1552) # To correct + plaus_range_par_bg <- 18 # To correct + plaus_range_par_density <- 91 # To correct + plaus_range_par_ff_change <- c(-35, 35) # To correct - # Update progress bar - setTxtProgressBar(progress_bar, i) + } else { - } # End of evaluation plot_ids + warning("Plausible limits for this parameter unknown!") + + } + + + + # Define rules ---- + + ## Total/below-ground stock ---- + + # If these rules are not met, the total stock or at least the below-ground + # stock is considered implausible, which implies that FSCC recommends not + # to use the stock data from the given plot survey. + + # First set of rules: plausibility of a values + # within a certain plot x survey_year. + # Stock flagged as implausible if not met. + + rule_1 <- paste0("Stock should be between ", plaus_range_stock[1], + "and ", plaus_range_stock[2], + " t C ha-1 (99 % quantile).") + + rule_2 <- paste0("Max. below-ground ", parameter, " should be ", + "above ", plaus_range_par_bg, + " g kg-1 (99 % quantile).") + + rule_3 <- paste0(str_to_title(shorter_var_name), + " density should be below ", plaus_range_par_density, + " t C ha-1 cm-1 (~95 % quantile).") + + # Second set of rules: plausibility of changes between survey years + # from a certain plot. + # Stock of earliest survey year flagged as implausible if not met. + + rule_4 <- paste0("Annual sequestration rate should be ", + "between ", plaus_range_stock_change[1], " and ", + plaus_range_stock_change[2], + " t C ha-1 year-1 (~90 % quantile).") + + rule_5 <- paste0("Rel. annual sequestration rate should be ", + "between ", plaus_range_stock_change_rel[1], + " and ", plaus_range_stock_change_rel[2], + " % year-1 (99 % quantile).") + + + + + ## Subsoil stock ---- + + rule_6 <- paste0("Observation depth should be >= 30 cm or ", + ">= 70 % of eff_soil_depth.") + + + + + ## Forest floor stock ---- + + # If these rules are not met, the forest floor stock is considered + # implausible, which implies that below-ground stocks (stock_below_ground + # and stock_below_ground_topsoil) can still be used if plaus_total_fscc is + # TRUE. + # FSCC only recommends not to use the stock data from the forest floor + # (stock_forest_floor) and total stock (stock and stock_topsoil), as these + # include forest floor stocks. + + # First set of rules: plausibility of a values + # within a certain plot x survey_year. + # Stock flagged as implausible if not met. + + rule_7 <- paste0("Plots should have non-OL forest floor layers, ", + "unless their humus form is Mull.") + + rule_8 <- "Non-OL layers should have a known organic layer weight." + + rule_9 <- paste0("Organic layer weight should be below ", plaus_range_olw, + " kg m-2 (95 % quantile).") + + rule_10 <- paste0(str_to_title(parameter), " of forest floor should be ", + "between ", plaus_range_par_ff[1], " and ", + plaus_range_par_ff[2], + " g kg-1 (99 % quantile).") + + # Second set of rules: plausibility of changes between survey years + # from a certain plot. + # Stock of earliest survey year flagged as implausible if not met. + + rule_11 <- paste0("Change rate of mean forest floor ", parameter, " ", + "should be between ", plaus_range_par_ff_change[1], + " and ", plaus_range_par_ff_change[2], " g kg-1 year-1 ", + "(~95 % quantile).") + + + + + + # plausible_fscc + # implausible_fscc_reason + + # Evaluate for different plots ---- + + plot_ids <- unique(df$plot_id) + + # Set progress bar + progress_bar <- txtProgressBar(min = 0, + max = length(plot_ids), style = 3) + + for (i in seq_along(plot_ids)) { + + vec_i <- which(df$plot_id == plot_ids[i]) + + vec_som_i <- which(df$plot_id == plot_ids[i] & + grepl("som", df$survey_form)) + + vec_pfh_i <- which(df$plot_id == plot_ids[i] & + grepl("pfh", df$survey_form)) + + df_i <- df %>% + filter(plot_id == plot_ids[i]) + + ## Total/below-ground stock ---- + + # If these rules are not met, the total stock or at least the below-ground + # stock is considered implausible, which implies that FSCC recommends not + # to use the stock data from the given plot survey. + + + ## Individual plot surveys ---- + + for (j in vec_i) { + + implausible_reasons_j <- NULL + + ### rule_1 ---- + # "Stock should be between 21 and 894 t C ha-1 (99 % quantile)" + + if (df$stock[j] < plaus_range_stock[1] || + df$stock[j] > plaus_range_stock[2]) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_1, + implausible_reasons_j) + } + + + ### rule_2 ---- + # "Max. below-ground total organic carbon should be + # above 3 g kg-1 (99 % quantile)." + + par_bg_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type != "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(toc_max = ifelse( + any(!is.na(parameter_for_stock[which(layer_type != "forest_floor")])), + max(parameter_for_stock[which(layer_type != "forest_floor")], + na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(toc_max)) %>% + pull(toc_max) + + if (any(par_bg_j < plaus_range_par_bg)) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_2, + implausible_reasons_j) + } + + + + ### rule_3 ---- + # "Carbon density should be below 8 t C ha-1 cm-1 (~95 % quantile)." + + par_dens_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(!is.na(density)) %>% + pull(density) + + if (any(par_dens_j > plaus_range_par_density)) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_3, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + + } # End of j in vec_i + + + + ## Change between surveys for individual plot ---- + + for (vec_selected_i in c("vec_som_i", "vec_pfh_i")) { + + vec_sel_i <- get(vec_selected_i) + + if (identical(vec_sel_i, integer(0))) { + next + } + + if (length(vec_sel_i) <= 1) { + next + } + + vec_sel_i <- vec_sel_i[order(df$survey_year[vec_sel_i])] + + for (j in vec_sel_i) { + + implausible_reasons_j <- NULL + + # Evaluate based on the earliest survey, + # assuming that this one is overall less reliable + + if (which(j == vec_sel_i) == length(vec_sel_i)) { + next + } + + # Row index of next survey year to compare with + + j_next <- vec_sel_i[which(j == vec_sel_i) + 1] + + + ### rule_4 ---- + # "Annual sequestration rate should be + # between -5 and 5 t C ha-1 year-1 (~90 % quantile)." + + seq_rate_j <- (df$stock[j_next] - df$stock[j]) / + (df$survey_year[j_next] - df$survey_year[j]) + + if (seq_rate_j < plaus_range_stock_change[1] || + seq_rate_j > plaus_range_stock_change[2]) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_4, + implausible_reasons_j) + } + + + + ### rule_5 ---- + # "Rel. annual sequestration rate should be + # between -10 and 10 % year-1 (99 % quantile)." + + seq_rate_rel_j <- ((df$stock[j_next] - df$stock[j]) / + (df$survey_year[j_next] - df$survey_year[j])) / + df$stock[j] * 100 + + if (seq_rate_rel_j < plaus_range_stock_change_rel[1] || + seq_rate_rel_j > plaus_range_stock_change_rel[2]) { + + df$plaus_total_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_5, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } # End of j (with j_next) in vec_i + + } # End of som versus pfh + + + + + + ## Subsoil stock ---- + + for (j in vec_i) { + + implausible_reasons_j <- NULL + + ### rule_6 ---- + # "Observation depth should be >= 30 cm or >= 70 % of eff_soil_depth." + + obs_depth_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type != "forest_floor") + + if ("gapfilled_post_layer1" %in% names(obs_depth_j)) { + + obs_depth_j <- obs_depth_j %>% + filter(is.na(gapfilled_post_layer1) | + gapfilled_post_layer1 == "") + } + + use_stock_topsoil_j <- obs_depth_j %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(obs_depth = max(depth_bottom, na.rm = TRUE), + depth_stock = unique(depth_stock)) %>% + rowwise() %>% + mutate( + use_stock_topsoil = + (.data$obs_depth < 30 & + .data$obs_depth < 0.7 * .data$depth_stock)) %>% + ungroup() %>% + pull(use_stock_topsoil) + + + if (any(use_stock_topsoil_j == TRUE)) { + + df$use_stock_topsoil[j] <- TRUE + implausible_reasons_j <- paste(rule_6, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } + + + + + + + ## Forest floor stock ---- + + # If these rules are not met, the forest floor stock is considered + # implausible, which implies that below-ground stocks (stock_below_ground + # and stock_below_ground_topsoil) can still be used if plaus_total_fscc is + # TRUE. + # FSCC only recommends not to use the stock data from the forest floor + # (stock_forest_floor) and total stock (stock and stock_topsoil), as these + # include forest floor stocks. + + ## Individual plot surveys ---- + + for (j in vec_i) { + + implausible_reasons_j <- NULL + + + ### rule_7 ---- + # "Plots should have non-OL forest floor layers + # unless their humus form is Mull." + + ff_layers_j <- df_layer %>% + # Layers containing "L" are already filtered out + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") + + # No need to further group it per profile and then per plot, because + # any non-OL forest floor layers in whichever profile are already + # sufficient + + if (nrow(ff_layers_j) == 0 && + !grepl("mull", df$humus_form[j], ignore.case = TRUE)) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_7, + implausible_reasons_j) + + } + + + + ### rule_8 ---- + # "Non-OL layers should have a known organic layer weight." + + ff_olw_pres_j <- df_layer %>% + # Layers containing "L" are already filtered out + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe( + all_pres_olw = all(!is.na(organic_layer_weight))) %>% + ungroup() + + # As soon as one of the profiles has a complete set of OLW data, + # we can estimate a forest floor carbon stock + + if (nrow(ff_olw_pres_j) > 0) { + + if (all(ff_olw_pres_j$all_pres_olw == FALSE)) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_8, + implausible_reasons_j) + + } + } + + + ### rule_9 ---- + # "Organic layer weight should be below 64 kg m-2 (95 % quantile)." + + olw_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + reframe(organic_layer_weight = ifelse( + any(!is.na(organic_layer_weight)), + sum(organic_layer_weight, na.rm = TRUE), + NA_real_)) %>% + filter(!is.na(organic_layer_weight)) %>% + pull(organic_layer_weight) + + if (!identical(olw_j, numeric(0))) { + + if (any(olw_j > plaus_range_olw)) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_9, + implausible_reasons_j) + } + } + + + ### rule_10 ---- + # "Total organic carbon of forest floor should be + # between 42 and 567 g kg-1 (99 % quantile)." + + par_ff_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year == df$survey_year[j]) %>% + filter(layer_type == "forest_floor") %>% + filter(!is.na(parameter_for_stock)) %>% + pull(parameter_for_stock) + + if (any(par_ff_j < plaus_range_par_ff[1]) || + any(par_ff_j > plaus_range_par_ff[2])) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_10, + implausible_reasons_j) + } + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } # End of j in vec_i + + + + + ## Change between surveys for individual plot ---- + + for (vec_selected_i in c("vec_som_i", "vec_pfh_i")) { + + vec_sel_i <- get(vec_selected_i) + + if (identical(vec_sel_i, integer(0))) { + next + } + + if (length(vec_sel_i) <= 1) { + next + } + + vec_sel_i <- vec_sel_i[order(df$survey_year[vec_sel_i])] + + for (j in vec_sel_i) { + + implausible_reasons_j <- NULL + + # Evaluate based on the earliest survey, + # assuming that this one is overall less reliable + + if (which(j == vec_sel_i) == length(vec_sel_i)) { + next + } + + # Row index of next survey year to compare with + + j_next <- vec_sel_i[which(j == vec_sel_i) + 1] + + + + ### rule_11 ---- + # "Change rate of mean forest floor total organic carbon + # should be between -25 and 25 g kg-1 year-1 (~95 % quantile)." + + par_ff_rate_j <- df_layer %>% + filter(plot_id == plot_ids[i]) %>% + filter(survey_form == df$survey_form[j]) %>% + filter(survey_year %in% df$survey_year[c(j, j_next)]) %>% + filter(layer_type == "forest_floor") %>% + group_by(plot_id, profile_id_form, + survey_form, survey_year) %>% + mutate( + organic_layer_weight = ifelse( + any(is.na(organic_layer_weight)), + 1, + organic_layer_weight)) %>% + reframe(par_avg = ifelse( + any(!is.na( + parameter_for_stock[which(layer_type == "forest_floor")])), + weighted.mean( + parameter_for_stock[which(layer_type == "forest_floor")], + w = organic_layer_weight, + na.rm = TRUE), + NA_real_)) %>% + group_by(plot_id, survey_form, survey_year) %>% + reframe(par_avg = mean(par_avg, na.rm = TRUE)) %>% + filter(!is.na(par_avg)) + + if (n_distinct(par_ff_rate_j$survey_year) == 2) { + + par_ff_rate_j <- par_ff_rate_j %>% + group_by(plot_id, survey_form) %>% + do(broom::tidy(lm(par_avg ~ survey_year, data = .))) %>% + filter(term == "survey_year") %>% + pull(estimate) + + if (par_ff_rate_j < plaus_range_par_ff_change[1] || + par_ff_rate_j > plaus_range_par_ff_change[2]) { + + df$plaus_forest_floor_fscc[j] <- FALSE + implausible_reasons_j <- paste(rule_11, + implausible_reasons_j) + } + } + + + + + if (!identical(implausible_reasons_j, NULL)) { + + if (!is.na(df$implaus_fscc_reason[j])) { + + df$implaus_fscc_reason[j] <- + paste(df$implaus_fscc_reason[j], + implausible_reasons_j) + } else { + + df$implaus_fscc_reason[j] <- implausible_reasons_j + + } + } + + } # End of j (with j_next) in vec_i + + } # End of som versus pfh + + + + + # Update progress bar + setTxtProgressBar(progress_bar, i) + + } # End of evaluation plot_ids + + + close(progress_bar) + + + + + } # End of "if not TOC" + + + + + + + + + + # Summarise different filters into clear filters per stock type ---- + + # Current filters: + + # - plaus_total_fscc + # - plaus_forest_floor_fscc + # - use_stock_topsoil + + # - implaus_fscc_reason + + + # New filters: + + # - stock_plaus + # - stock_topsoil_plaus + # - stock_below_ground_plaus + # - stock_below_ground_topsoil_plaus + # - stock_forest_floor_plaus + + # - implaus_fscc_reason + + + df <- df %>% + relocate(use_stock_topsoil, .before = plaus_total_fscc) %>% + mutate( + # stock_plaus + stock_plaus = case_when( + (plaus_total_fscc == TRUE & + plaus_forest_floor_fscc == TRUE & + use_stock_topsoil == FALSE) ~ TRUE, + .default = FALSE), + # stock_topsoil_plaus + stock_topsoil_plaus = case_when( + (plaus_total_fscc == TRUE & + plaus_forest_floor_fscc == TRUE) ~ TRUE, + .default = FALSE), + # stock_below_ground_plaus + stock_below_ground_plaus = case_when( + (plaus_total_fscc == TRUE & + use_stock_topsoil == FALSE) ~ TRUE, + .default = FALSE), + # stock_below_ground_topsoil_plaus + stock_below_ground_topsoil_plaus = case_when( + (plaus_total_fscc == TRUE) ~ TRUE, + .default = FALSE), + # stock_forest_floor_plaus + stock_forest_floor_plaus = case_when( + (plaus_total_fscc == TRUE & + plaus_forest_floor_fscc == TRUE) ~ TRUE, + .default = FALSE)) %>% + relocate(stock_plaus, .after = stock_max) %>% + relocate(stock_topsoil_plaus, .after = stock_topsoil_max) %>% + relocate(stock_below_ground_plaus, .after = stock_below_ground_max) %>% + relocate(stock_below_ground_topsoil_plaus, + .after = stock_below_ground_topsoil_max) %>% + relocate(stock_forest_floor_plaus, .after = stock_forest_floor_max) %>% + select(-plaus_forest_floor_fscc, + -plaus_total_fscc, + -use_stock_topsoil) - close(progress_bar)