From a64f606ead4af7771f4cc7587ddeaae8e76b68b3 Mon Sep 17 00:00:00 2001
From: gilbertocamara <gilberto.camara@inpe.br>
Date: Fri, 24 Jan 2025 09:14:10 -0300
Subject: [PATCH] closes #1260

---
 DESCRIPTION                                   |   8 +-
 NAMESPACE                                     |   2 +
 R/sits_plot.R                                 | 102 ++++++++++++++++++
 R/sits_smooth.R                               |  13 +++
 R/sits_som.R                                  |   4 +
 inst/extdata/config_messages.yml              |   2 +
 inst/extdata/scripts/plot_som_clean_samples.R |   1 +
 man/plot.som_clean_samples.Rd                 |  36 +++++++
 man/sits-package.Rd                           |   1 +
 man/sits_smooth.Rd                            |  13 +++
 man/sits_view.Rd                              |   2 +-
 sits.Rproj                                    |   1 +
 12 files changed, 182 insertions(+), 3 deletions(-)
 create mode 100644 man/plot.som_clean_samples.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 00e2ad1f9..8ca5b5461 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,6 +11,7 @@ Authors@R: c(person('Rolf', 'Simoes', role = c('aut'), email = 'rolf.simoes@inpe
              person('Charlotte', 'Pelletier', role = c('ctb'), email = 'charlotte.pelletier@univ-ubs.fr'),
              person('Pedro', 'Andrade', role = c('ctb'), email = 'pedro.andrade@inpe.br'),
              person('Alber', 'Sanchez', role = c('ctb'), email = 'alber.ipia@inpe.br'),
+             person('Estefania', 'Pizarro', role = c('ctb'), email = 'eapizarroa@ine.gob.cl'),
              person('Gilberto', 'Queiroz', role = c('ctb'), email = 'gilberto.queiroz@inpe.br')
              )
 Maintainer: Gilberto Camara <gilberto.camara.inpe@gmail.com>
@@ -26,15 +27,18 @@ Description: An end-to-end toolkit for land use and land cover classification
     smoothing filters for dealing with noisy time series.
     Includes functions for quality assessment of training samples using self-organized maps 
     as presented by Santos et al (2021) <doi:10.1016/j.isprsjprs.2021.04.014>. 
+    Includes methods to reduce training samples imbalance proposed by 
+    Chawla et al (2002) <doi:10.1613/jair.953>.
     Provides machine learning methods including support vector machines, 
     random forests, extreme gradient boosting, multi-layer perceptrons,
-    temporal convolutional neural networks proposed by Pelletier et al (2019) <doi:10.3390/rs11050523>, 
+    temporal convolutional neural networks proposed 
+    by Pelletier et al (2019) <doi:10.3390/rs11050523>, 
     and temporal attention encoders by Garnot and Landrieu (2020) <doi:10.48550/arXiv.2007.00586>.
     Supports GPU processing of deep learning models using torch <https://torch.mlverse.org/>.
     Performs efficient classification of big Earth observation data cubes and includes 
     functions for post-classification smoothing based on Bayesian inference 
     as described by Camara et al (2024) <doi:10.3390/rs16234572>, and 
-    methods for active learning and uncertainty assessment. Supports object-based 
+    methods for active learning and uncertainty assessment. Supports region-based 
     time series analysis using package supercells <https://jakubnowosad.com/supercells/>.
     Enables best practices for estimating area and assessing accuracy of land change as 
     recommended by Olofsson et al (2014) <doi:10.1016/j.rse.2014.02.015>.
diff --git a/NAMESPACE b/NAMESPACE
index d63f82971..4b30d2c28 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -354,6 +354,7 @@ S3method(plot,sar_cube)
 S3method(plot,sits)
 S3method(plot,sits_accuracy)
 S3method(plot,sits_cluster)
+S3method(plot,som_clean_samples)
 S3method(plot,som_evaluate_cluster)
 S3method(plot,som_map)
 S3method(plot,torch_model)
@@ -465,6 +466,7 @@ S3method(sits_select,sits)
 S3method(sits_smooth,default)
 S3method(sits_smooth,derived_cube)
 S3method(sits_smooth,probs_cube)
+S3method(sits_smooth,probs_vector_cube)
 S3method(sits_smooth,raster_cube)
 S3method(sits_timeline,default)
 S3method(sits_timeline,derived_cube)
diff --git a/R/sits_plot.R b/R/sits_plot.R
index 7a2f657b7..f37aaebb9 100644
--- a/R/sits_plot.R
+++ b/R/sits_plot.R
@@ -1802,6 +1802,108 @@ plot.som_map <- function(x, y, ..., type = "codes", band = 1) {
     )
     return(invisible(x))
 }
+#' @title  Plot SOM samples evaluated
+#' @name   plot.som_clean_samples
+#' @author Estefania Pizarro, \email{eapizarroa@@ine.gob.cl}
+#'
+#' @description It is useful to visualise the
+#' output of the SOM evaluation, which classifies the samples as
+#' "clean" (good samples), "remove" (possible outliers),
+#' and "analyse" (borderline cases). This function plots the
+#' percentual distribution of the SOM evaluation per class.
+#' To use it, please run \code{sits_som_clean_samples} using
+#' the parameter "keep" as "c("clean", "analyze", "remove").
+#'
+#'
+#' @param  x        Object of class "som_clean_samples".
+#'
+#' @return          Called for side effects.
+#'
+#' #' @examples
+#' if (sits_run_examples()) {
+#'     # create a SOM map
+#'     som_map <- sits_som_map(samples_modis_ndvi)
+#'     # plot the SOM map
+#'     eval <- sits_som_clean_samples(som_map,
+#'             keep = c("clean", "analyze", "remove"))
+#'     plot(eval)
+#' }
+#' @export
+plot.som_clean_samples <- function(x, ...) {
+    .check_set_caller(".plot_som_clean_samples")
+
+    # retrieve the evaluation labels
+    eval_labels <- unique(x[["eval"]])
+    # check if all eval labels are available
+    all_evals <- all(c("clean", "analyze", "remove")
+                     %in% eval_labels)
+    if (!all_evals)
+        warning(.conf("messages", ".plot_som_clean_samples"))
+    # organize the evaluation by class and percentage
+    eval <- x |>
+        dplyr::group_by(label, eval) |>
+        dplyr::summarise(n = dplyr::n()) |>
+        dplyr::mutate(n_class  = sum(n)) |>
+        dplyr::ungroup() |>
+        dplyr::mutate(percentage = (n/n_class)*100) |>
+        dplyr::select(label, eval, percentage) |>
+        tidyr::pivot_wider(names_from = eval, values_from = percentage)
+
+    colors_eval <- c("#C7BB3A", "#4FC78E", "#D98880")
+    if (all_evals) {
+        eval <- eval |>
+            dplyr::select(label, clean, remove, analyze) |>
+            tidyr::replace_na(list(clean = 0, remove = 0, analyze = 0))
+        pivot <- tidyr::pivot_longer(eval, cols = c(clean, remove, analyze),
+                                     names_to = "Eval", values_to = "value")
+    } else {
+        eval <- eval |>
+            dplyr::select(label, clean, analyze) |>
+            tidyr::replace_na(list(clean = 0, analyze = 0))
+        pivot <- tidyr::pivot_longer(eval, cols = c(clean, analyze),
+                                     names_to = "Eval", values_to = "value")
+        colors_eval <- c("#C7BB3A", "#4FC78E")
+    }
+
+    labels <- unique(pivot[["label"]])
+    pivot$label <- factor(pivot$label, levels = labels)
+
+    # Stacked bar graphs for Noise Detection
+    g <- ggplot2::ggplot(
+        pivot,
+        ggplot2::aes(
+            x = value,
+            y = factor(label, levels = rev(levels(label))),
+            fill = Eval)) +
+        ggplot2::geom_bar(
+            stat = "identity",
+            color = "white",
+            width = 0.9) +
+        ggplot2::geom_text(
+            ggplot2::aes(
+                label = scales::percent(value/100, 1)),
+            position = ggplot2::position_stack(vjust = 0.5),
+            color = "black",
+            size = length(eval_labels),
+            fontface = "bold",
+            check_overlap = TRUE) +
+        ggplot2::theme_classic() +
+        ggplot2::theme(
+            axis.title.y = ggplot2::element_blank(),
+            legend.title = ggplot2::element_text(size = 11),
+            legend.text = ggplot2::element_text(size = 9),
+            legend.key.size = ggplot2::unit(0.5, "cm"),
+            legend.spacing.y = ggplot2::unit(0.5, "cm"),
+            legend.position = "right",
+            legend.justification = "center") +
+        ggplot2::xlab("%") +
+        ggplot2::scale_fill_manual(
+            values = colors_eval,
+            name = "Evaluation") +
+        ggplot2::ggtitle("Class noise detection")
+
+    return(g)
+}
 #' @title  Plot XGB model
 #' @name   plot.xgb_model
 #' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
diff --git a/R/sits_smooth.R b/R/sits_smooth.R
index 3afd49f9c..66f79263e 100644
--- a/R/sits_smooth.R
+++ b/R/sits_smooth.R
@@ -161,6 +161,19 @@ sits_smooth.probs_cube <- function(cube,
 }
 #' @rdname sits_smooth
 #' @export
+sits_smooth.probs_vector_cube <- function(cube,
+                                    window_size = 7L,
+                                    neigh_fraction = 0.5,
+                                    smoothness = 10L,
+                                    exclusion_mask = NULL,
+                                    memsize = 4L,
+                                    multicores = 2L,
+                                    output_dir,
+                                    version = "v1") {
+    stop(.conf("messages", "sits_probs_vector_cube"))
+}
+#' @rdname sits_smooth
+#' @export
 sits_smooth.raster_cube <- function(cube,
                                     window_size = 7L,
                                     neigh_fraction = 0.5,
diff --git a/R/sits_som.R b/R/sits_som.R
index 259d7a038..da4384326 100644
--- a/R/sits_som.R
+++ b/R/sits_som.R
@@ -285,6 +285,10 @@ sits_som_clean_samples <- function(som_map,
             -"prior_prob"
         ) |>
         dplyr::filter(.data[["eval"]] %in% keep)
+
+    # include class for plotting
+    class(data) <- c("som_clean_samples", class(data))
+
     return(data)
 }
 
diff --git a/inst/extdata/config_messages.yml b/inst/extdata/config_messages.yml
index ce191645b..bccf76d0f 100644
--- a/inst/extdata/config_messages.yml
+++ b/inst/extdata/config_messages.yml
@@ -210,6 +210,7 @@
 .plot_sits: "wrong input parameters - see example in documentation"
 .plot_sits_accuracy: "unable to plot - please run sits_accuracy"
 .plot_sits_cluster: "missing cluster object - run 'sits_cluster' and use result as input\n check examples in documentation"
+.plot_som_clean_samples: "please re-run sits_som_clean_samples with keep parameter \n equal to c('clean', 'analyze', 'remove') before ploting the result"
 .plot_som_evaluate_cluster: "unable to plot - please run sits_som_evaluate_cluster"
 .plot_som_map: "wrong input data; please run sits_som_map first"
 .plot_torch_model: "invalid model - please run sits_train() using a torch model"
@@ -424,6 +425,7 @@ sits_mosaic: "wrong input parameters - see example in documentation"
 sits_patterns: "invalid samples data set"
 sits_predictors: "invalid samples data set"
 sits_pred_normalize: "invalid input - use results of 'sits_stats' and 'sits_predictors' as input"
+sits_probs_vector_cube: "smoothing not required for vector probability cube \n run sits_label_classification directly"
 sits_reclassify: "check that cube and mask are valid classified data cubes"
 sits_reclassify_mask_intersect: "mask roi does not intersect cube"
 sits_regularize: "check input parameters include a valid cube, resolution, period and output_dir"
diff --git a/inst/extdata/scripts/plot_som_clean_samples.R b/inst/extdata/scripts/plot_som_clean_samples.R
index 0e671e693..e692574f1 100644
--- a/inst/extdata/scripts/plot_som_clean_samples.R
+++ b/inst/extdata/scripts/plot_som_clean_samples.R
@@ -24,6 +24,7 @@ plot_eval <- function(eval){
         dplyr::select(label, clean, remove, analyze) |>
         tidyr::replace_na(list(clean = 0, remove = 0, analyze = 0))
 
+
     pivot <- tidyr::pivot_longer(eval, cols = c(clean, remove, analyze),
                           names_to = "Eval", values_to = "value")
     labels <- unique(pivot[["label"]])
diff --git a/man/plot.som_clean_samples.Rd b/man/plot.som_clean_samples.Rd
new file mode 100644
index 000000000..09500f1ba
--- /dev/null
+++ b/man/plot.som_clean_samples.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sits_plot.R
+\name{plot.som_clean_samples}
+\alias{plot.som_clean_samples}
+\title{Plot SOM samples evaluated}
+\usage{
+\method{plot}{som_clean_samples}(x, ...)
+}
+\arguments{
+\item{x}{Object of class "som_clean_samples".}
+}
+\value{
+Called for side effects.
+
+#' @examples
+if (sits_run_examples()) {
+    # create a SOM map
+    som_map <- sits_som_map(samples_modis_ndvi)
+    # plot the SOM map
+    eval <- sits_som_clean_samples(som_map,
+            keep = c("clean", "analyze", "remove"))
+    plot(eval)
+}
+}
+\description{
+It is useful to visualise the
+output of the SOM evaluation, which classifies the samples as
+"clean" (good samples), "remove" (possible outliers),
+and "analyse" (borderline cases). This function plots the
+percentual distribution of the SOM evaluation per class.
+To use it, please run \code{sits_som_clean_samples} using
+the parameter "keep" as "c("clean", "analyze", "remove").
+}
+\author{
+Estefania Pizarro, \email{eapizarroa@ine.gob.cl}
+}
diff --git a/man/sits-package.Rd b/man/sits-package.Rd
index 1b768d175..3cdcd8a68 100644
--- a/man/sits-package.Rd
+++ b/man/sits-package.Rd
@@ -44,6 +44,7 @@ Other contributors:
   \item Charlotte Pelletier \email{charlotte.pelletier@univ-ubs.fr} [contributor]
   \item Pedro Andrade \email{pedro.andrade@inpe.br} [contributor]
   \item Alber Sanchez \email{alber.ipia@inpe.br} [contributor]
+  \item Estefania Pizarro \email{eapizarroa@ine.gob.cl} [contributor]
   \item Gilberto Queiroz \email{gilberto.queiroz@inpe.br} [contributor]
 }
 
diff --git a/man/sits_smooth.Rd b/man/sits_smooth.Rd
index e6d933bff..e16c8230f 100644
--- a/man/sits_smooth.Rd
+++ b/man/sits_smooth.Rd
@@ -3,6 +3,7 @@
 \name{sits_smooth}
 \alias{sits_smooth}
 \alias{sits_smooth.probs_cube}
+\alias{sits_smooth.probs_vector_cube}
 \alias{sits_smooth.raster_cube}
 \alias{sits_smooth.derived_cube}
 \alias{sits_smooth.default}
@@ -32,6 +33,18 @@ sits_smooth(
   version = "v1"
 )
 
+\method{sits_smooth}{probs_vector_cube}(
+  cube,
+  window_size = 7L,
+  neigh_fraction = 0.5,
+  smoothness = 10L,
+  exclusion_mask = NULL,
+  memsize = 4L,
+  multicores = 2L,
+  output_dir,
+  version = "v1"
+)
+
 \method{sits_smooth}{raster_cube}(
   cube,
   window_size = 7L,
diff --git a/man/sits_view.Rd b/man/sits_view.Rd
index 77854da8e..310b167a1 100644
--- a/man/sits_view.Rd
+++ b/man/sits_view.Rd
@@ -45,7 +45,7 @@ sits_view(x, ...)
   max_cog_size = 2048,
   first_quantile = 0.02,
   last_quantile = 0.98,
-  leaflet_megabytes = 32,
+  leaflet_megabytes = 64,
   add = FALSE
 )
 
diff --git a/sits.Rproj b/sits.Rproj
index c1d6889aa..108634675 100644
--- a/sits.Rproj
+++ b/sits.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: 2f1606ae-8610-45aa-99ce-edaa30c043fc
 
 RestoreWorkspace: Default
 SaveWorkspace: Ask