Skip to content

Commit

Permalink
support OPGD model
Browse files Browse the repository at this point in the history
  • Loading branch information
SpatLyu committed Jun 6, 2024
1 parent ffc552b commit 20512a4
Show file tree
Hide file tree
Showing 11 changed files with 528 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
.DS_Store
.quarto
docs
inst/doc
13 changes: 12 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,24 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
Imports:
classInt,
dplyr,
knitr,
magrittr,
pander,
parallel,
purrr,
stats,
tibble,
tidyr,
utils
URL: https://spatlyu.github.io/gdverse/
URL: https://spatlyu.github.io/gdverse/, https://github.com/SpatLyu/gdverse
Suggests:
ggspatial,
rmarkdown,
sf,
terra,
tidyterra,
tidyverse,
tmap
VignetteBuilder: knitr
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
export(ecological_detector)
export(factor_detector)
export(gd)
export(gd_bestunidisc)
export(interaction_detector)
export(risk_detector)
export(st_unidisc)
importFrom(classInt,classify_intervals)
importFrom(dplyr,all_of)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
Expand All @@ -17,12 +20,19 @@ importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,slice_max)
importFrom(dplyr,ungroup)
importFrom(knitr,kable)
importFrom(magrittr,`%>%`)
importFrom(pander,pander)
importFrom(parallel,clusterExport)
importFrom(parallel,makeCluster)
importFrom(parallel,parLapply)
importFrom(parallel,stopCluster)
importFrom(purrr,map2_dfr)
importFrom(purrr,map_dfr)
importFrom(purrr,pmap_dfc)
importFrom(purrr,set_names)
importFrom(stats,as.formula)
importFrom(stats,pf)
importFrom(stats,t.test)
Expand Down
134 changes: 134 additions & 0 deletions R/discretization.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#' @title Univariate discretization
#' @author Wenbo Lv \email{[email protected]}
#' @description
#' Function to classify univariate vector to interval,a wrapper of `classInt::classify_intervals()`.
#'
#' @param x A continuous numerical variable.
#' @param k (optional) Number of classes required, if missing, `grDevices::nclass.Sturges()` is used;
#' see also the "dpih" and "headtails" styles for automatic choice of the number of classes.
#' @param method Chosen classify style: one of "fixed", "sd", "equal", "pretty", "quantile", "kmeans",
#' "hclust", "bclust", "fisher", "jenks", "dpih", "headtails", "maximum", or "box".Default is `quantile`.
#' @param factor (optional) Default is `FALSE`, if `TRUE` returns cols as a factor with intervals as
#' labels rather than integers.
#' @param ... (optional) Other arguments passed to `classInt::classify_intervals()`,
#' see `?classInt::classify_intervals()`.
#'
#' @return A discrete vectors after being classified.
#' @importFrom classInt classify_intervals
#' @export
#'
#' @examples
#' xvar = c(22361, 9573, 4836, 5309, 10384, 4359, 11016, 4414, 3327, 3408,
#' 17816, 6909, 6936, 7990, 3758, 3569, 21965, 3605, 2181, 1892,
#' 2459, 2934, 6399, 8578, 8537, 4840, 12132, 3734, 4372, 9073,
#' 7508, 5203)
#' st_unidisc(xvar,k = 6,method = 'sd')
st_unidisc = \(x,k,method = "quantile",factor = FALSE,...){
return(classInt::classify_intervals(var = x,n = k,style = method,
...,factor = factor))
}

#' @title Best univariate discretize parameters based on geodetector q-statistic
#' @author Wenbo Lv \email{[email protected]}
#' @description
#' Function for determining the best univariate discretize parameters based on geodetector q-statistic.
#'
#' @param formula A formula of spatial stratified heterogeneity test.
#' @param data A data.frame or tibble of observation data.
#' @param discnum A vector of number of classes for discretization.
#' @param discmethod (optional) A vector of methods for discretization,default is used
#' `c("sd","equal","pretty","quantile","fisher","headtails","maximum","box")`in `spEcula`.
#' @param cores positive integer(default is 1). If cores > 1, a 'parallel' package
#' cluster with that many cores is created and used. You can also supply a cluster
#' object.
#' @param return_disc (optional) Whether or not return discretized result used the optimal parameter.
#' Default is `TRUE`.
#' @param ... (optional) Other arguments passed to `st_unidisc()`.
#'
#' @return A list with the optimal parameter in the provided parameter combination with `k`,
#' `method` and `disc`(when `return_disc` is `TRUE`).
#' @importFrom stats as.formula
#' @importFrom parallel makeCluster stopCluster clusterExport parLapply
#' @importFrom tidyr crossing
#' @importFrom magrittr `%>%`
#' @importFrom tibble as_tibble
#' @importFrom purrr map_dfr pmap_dfc set_names
#' @importFrom dplyr bind_cols slice_max group_by ungroup select
#' @export
#'
#' @examples
#' \dontrun{
#' library(terra)
#' library(tidyverse)
#' fvcpath = system.file("extdata", "FVC.zip",package = 'spEcula')
#' fvc = terra::rast(paste0("/vsizip/",fvcpath))
#' fvc = as_tibble(terra::as.data.frame(fvc,na.rm = T))
#' g = gd_bestunidisc(fvc ~ .,data = select(fvc,-lulc),discnum = 2:15,cores = 6)
#' new.fvc = g$disv
#' new.fvc = bind_cols(select(fvc,fvc,lulc),new.fvc)
#' ssh.test(fvc ~ .,data = new.fvc,type = 'factor')
#' ssh.test(fvc ~ .,data = new.fvc,type = 'interaction')
#' }
gd_bestunidisc = \(formula,data,discnum,discmethod = NULL,
cores = 1,return_disc = TRUE,...){
doclust = FALSE
if (inherits(cores, "cluster")) {
doclust = TRUE
} else if (cores > 1) {
doclust = TRUE
cores = parallel::makeCluster(cores)
on.exit(parallel::stopCluster(cores), add=TRUE)
}
if (is.null(discmethod)) {
discmethod = c("sd","equal","pretty","quantile","fisher","headtails","maximum","box")
}

formula = stats::as.formula(formula)
formula.vars = all.vars(formula)
response = data[, formula.vars[1], drop = TRUE]
if (formula.vars[2] == "."){
explanatory = data[,-which(colnames(data) == formula.vars[1])]
} else {
explanatory = subset(data, TRUE, match(formula.vars[-1], colnames(data)))
}

discname = names(explanatory)
paradf = tidyr::crossing("x" = discname,
"k" = discnum,
"method" = discmethod)
parak = split(paradf, seq_len(nrow(paradf)))

calcul_disc = \(paramgd){
xdisc = st_unidisc(explanatory[,paramgd[[1]],drop = TRUE],
k = paramgd[[2]],method = paramgd[[3]],...)
fd = factor_detector(response,xdisc)
q = fd[[1]]
names(q) = "qstatistic"
return(q)
}

if (doclust) {
parallel::clusterExport(cores,c('st_unidisc','factor_detector'))
out_g = parallel::parLapply(cores,parak,calcul_disc)
out_g = tibble::as_tibble(do.call(rbind, out_g))
} else {
out_g = purrr::map_dfr(parak,calcul_disc)
}

out_g = dplyr::bind_cols(paradf,out_g) %>%
dplyr::group_by(x) %>%
dplyr::slice_max(order_by = qstatistic,
with_ties = FALSE) %>%
dplyr::ungroup() %>%
dplyr::select(-qstatistic) %>%
as.list()

if(return_disc){
resdisc = purrr::pmap_dfc(out_g,
\(x,k,method) st_unidisc(x = explanatory[,x,drop = TRUE],
k = k,method = method,...)) %>%
purrr::set_names(out_g[[1]])
out_g = append(out_g,list("disv" = resdisc))
}
return(out_g)
}
Binary file added man/figures/gd_flowchart.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
59 changes: 59 additions & 0 deletions man/gd_bestunidisc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/st_unidisc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vignettes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
Loading

0 comments on commit 20512a4

Please sign in to comment.