From 0517572512df33e23d3dc4bfb085f1e6e01757d7 Mon Sep 17 00:00:00 2001 From: MLopez-Ibanez <2620021+MLopez-Ibanez@users.noreply.github.com> Date: Fri, 1 Mar 2024 00:28:00 +0000 Subject: [PATCH] Implement Sobol initialization. --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 4 +- R/configurations.R | 1 - R/generation.R | 142 +++++++++++++++++++-- R/irace-package.R | 1 + R/irace.R | 4 +- R/utils.R | 8 ++ tests/testthat/test-targetRunnerParallel.R | 2 +- 9 files changed, 145 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 23d43cdc..819ed7c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,6 +27,7 @@ Imports: data.table (>= 1.15.0), matrixStats, R6, + spacefillr, withr Suggests: Rmpi (>= 0.6.0), diff --git a/NAMESPACE b/NAMESPACE index 7b1174cf..a29415fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -64,3 +64,4 @@ importFrom(graphics,par) importFrom(graphics,plot) importFrom(graphics,points) importFrom(graphics,strwidth) +importFrom(spacefillr,generate_sobol_set) diff --git a/NEWS.md b/NEWS.md index 816099aa..0644f4d1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -62,9 +62,11 @@ ## New features and improvements * `sampleUniform()` and `sampleModel()` are significantly faster thanks to using [`data.table`](https://r-datatable.com). + + * Initial configurations are sampled using Sobol low-discrepancy sequences using `spacefillr::generate_sobol_set()`. This should provide a better initial distribution of parameter values. * Ablation will report configurations that produced the same results, which - points to parameter values that have the same effect on the target algorithm, + indicates parameter values that have the same effect on the target algorithm, possibly indicating a bug in the target algorithm. * New option `instancesFile` of `ablation()` for using either the training diff --git a/R/configurations.R b/R/configurations.R index 4ebae742..d5448531 100644 --- a/R/configurations.R +++ b/R/configurations.R @@ -2,7 +2,6 @@ configurations_alloc <- function(colnames, nrow, parameters) { parameter_type <- function(type) { - stopifnot(type %in% c("i","r","o","c")) switch(type, i = NA_integer_, r = NA_real_, diff --git a/R/generation.R b/R/generation.R index 7536053a..9fbf564e 100644 --- a/R/generation.R +++ b/R/generation.R @@ -24,6 +24,17 @@ get_fixed_value <- function(parameters, param) parameters$domain[[param]][[1L]] } +repairConfigurations <- function(x, parameters, repair) +{ + if (!is.null(repair)) { + # FIXME: Pass the whole newConfigurations to repair and let it handle each row. + j <- colnames(x) + for (i in seq_nrow(x)) + set(x, i, j = j, value = repair(as.data.frame(x[i]), parameters)) + } + x +} + ## Calculates the parameter bounds when parameters domain is dependent getDependentBound <- function(parameters, param, configuration) { @@ -48,7 +59,6 @@ getDependentBound <- function(parameters, param, configuration) "). This is NOT a bug in irace. Check the definition of these parameters.") } } - values } @@ -81,6 +91,119 @@ get_dependent_domain <- function(parameters, param, configuration) domain } +param_qunif_c <- function(x, domain, transf, digits = NULL) +{ + n <- length(domain) + z <- integer_round(as.numeric(x) * n + 1L, 1L, n) + domain[z] +} + +param_qunif_i <- function(x, domain, transf, digits = NULL) +{ + upper <- domain[2L] + lower <- domain[1L] + x <- as.numeric(x) + if (transf == "log") { + # +1 for correct rounding before floor() + x <- transform_from_log(x, transf, lower, upper + 1L) + } else { + x <- x * (upper + 1L - lower) + lower + } + integer_round(x, lower, upper) +} + +param_qunif_r <- function(x, domain, transf, digits) +{ + upper <- domain[2L] + lower <- domain[1L] + x <- as.numeric(x) + if (transf == "log") { + return(round(transform_from_log(x, transf, lower, upper), digits)) + } + x <- x * (upper - lower) + lower + clamp(round(x, digits), lower, upper) +} + +.param_qunif <- c(i = param_qunif_i, r = param_qunif_r, c = param_qunif_c, + o = param_qunif_c) + +generate_sobol <- function(parameters, n, repair = NULL) +{ + # Do not use .Machine$integer.max to minimize differences between machines. + seed <- sample.int(2147483647L, size = 1L) + confs <- spacefillr::generate_sobol_set(n, dim = parameters$nbVariable, seed = seed) + confs <- data.table(confs) + setnames(confs, parameters$names[!parameters$isFixed]) + nodep_names <- parameters$names[!parameters$isDependent & !parameters$isFixed] + # FIXME: How to do this faster using data.table? + for (x in nodep_names) { + this_qunif <- .param_qunif[[parameters$types[x]]] + set(confs, j = x, value = this_qunif(confs[[x]], parameters$domain[[x]], parameters$transform[[x]], + digits = parameters$digits[x])) + } + for (x in parameters$names[parameters$isFixed]) { + set(confs, j = x, value = parameters$domain[[x]][[1L]]) + } + setcolorder(confs, parameters$names) + + hierarchy <- parameters$hierarchy + max_level <- max(hierarchy) + if (max_level > 1L) { + .NEWVALUE <- .DOMAIN <- NULL # To silence CRAN warnings. + for (k in seq_len(max_level - 1L)) { + prev_names <- names(hierarchy)[hierarchy <= k] + dep_names <- names(hierarchy)[hierarchy == k+1L] + for (p in dep_names) { + idx_satisfied <- which_satisfied(confs, parameters$conditions[[p]]) + if (parameters$isDependent[[p]] && length(idx_satisfied) > 0L) { + this_qunif <- .param_qunif[[parameters$types[p]]] + confs[idx_satisfied, let(.DOMAIN = list(get_dependent_domain(parameters, p, .SD))), by=.I, .SDcols=prev_names] + confs[idx_satisfied, .NEWVALUE := this_qunif(.SD, unlist(.DOMAIN), parameters$transform[[p]], digits = parameters$digits[p]), by=.I, .SDcols=p] + confs[, (p):=.NEWVALUE] + confs[, let(.NEWVALUE=NULL, .DOMAIN=NULL)] + } else if (length(idx_satisfied) < n) { + idx_not_satisfied <- if (length(idx_satisfied)) + seq_len(n)[-idx_satisfied] else NULL + na_value <- switch(parameters$types[p], + i = NA_integer_, + r = NA_real_, + c = NA_character_, + o = NA_character_, + irace.internal.error("Unknown type '", parameters$types[p], "'")) + set(confs, i = idx_not_satisfied, j = p, value = na_value) + } + } + } + } + repairConfigurations(confs, parameters, repair) + set(confs, j = ".PARENT.", value = NA_integer_) + confs +} + +sampleSobol <- function(parameters, n, repair = NULL) +{ + newConfigurations <- generate_sobol(parameters, n, repair) + newConfigurations <- unique(newConfigurations) + forbidden <- parameters$forbidden + newConfigurations <- filter_forbidden(newConfigurations, forbidden) + have <- nrow(newConfigurations) + if (have == n) { + setDF(newConfigurations) + return(newConfigurations) + } + needed <- max(ceiling(n + (n - have) * (2 - have / n)), min(parameters$nbVariable * 5L, 100L)) + newConfigurations <- generate_sobol(parameters, needed, repair) + newConfigurations <- unique(newConfigurations) + newConfigurations <- filter_forbidden(newConfigurations, forbidden) + if (nrow(newConfigurations) == 0L) { + irace.error("irace tried to sample a configuration not forbidden without success, perhaps your constraints are too strict?") + } + newConfigurations <- truncate_rows(newConfigurations, n) + setDF(newConfigurations) + newConfigurations +} + + generate_uniform <- function(parameters, nbConfigurations, repair = NULL) { newConfigurations <- configurations_alloc(parameters$names, nrow = nbConfigurations, parameters = parameters) @@ -128,16 +251,12 @@ generate_uniform <- function(parameters, nbConfigurations, repair = NULL) } set(newConfigurations, i = idx, j = currentParameter, value = newVals) } - if (!is.null(repair)) { - # FIXME: Pass the whole newConfigurations to repair and let it handle each row. - j <- colnames(newConfigurations) - for (i in seq_len(nbConfigurations)) - set(newConfigurations, i, j = j, value = repair(as.data.frame(newConfigurations[i]), parameters)) - } + repairConfigurations(newConfigurations, parameters, repair) set(newConfigurations, j = ".PARENT.", value = NA_integer_) newConfigurations } + ### Uniform sampling for the initial generation sampleUniform <- function(parameters, nbConfigurations, repair = NULL) { @@ -253,12 +372,7 @@ sample_from_model <- function(parameters, eliteConfigurations, model, # .BY is a list, so take the first argument. newConfigurations[idx_satisfied, c(currentParameter) := list(sample_values(.N, .BY[[1L]])), by = idx_elites[idx_satisfied]] } - if (!is.null(repair)) { - # FIXME: Pass the whole newConfigurations to repair and let it handle each row. - j <- colnames(newConfigurations) - for (i in seq_len(nbNewConfigurations)) - set(newConfigurations, i, j = j, value = repair(as.data.frame(newConfigurations[i]), parameters)) - } + repairConfigurations(newConfigurations, parameters, repair) set(newConfigurations, j = ".PARENT.", value = ids_elites[idx_elites]) newConfigurations } @@ -357,7 +471,7 @@ integer_round <- function(x, lower, upper) # The probability of this happening is very small, but it happens. x <- pmin.int(upper, x) irace.assert(all(x >= lower)) - x + as.integer(x) } sample_numeric_unif <- function(n, lower, upper, transf) diff --git a/R/irace-package.R b/R/irace-package.R index 96ef1a0d..d694bcc3 100644 --- a/R/irace-package.R +++ b/R/irace-package.R @@ -7,6 +7,7 @@ #' @importFrom R6 R6Class #' @importFrom grDevices dev.off pdf #' @importFrom graphics abline axis boxplot par plot points strwidth bxp grid +#' @importFrom spacefillr generate_sobol_set #' #' @details License: GPL (>= 2) #' diff --git a/R/irace.R b/R/irace.R index f6473104..27708b94 100644 --- a/R/irace.R +++ b/R/irace.R @@ -825,7 +825,7 @@ irace_run <- function(scenario, parameters) repeat { # Sample new configurations if needed if (nrow(allConfigurations) < nconfigurations) { - newConfigurations <- sampleUniform(parameters, + newConfigurations <- sampleSobol(parameters, nconfigurations - nrow(allConfigurations), repair = scenario$repairConfiguration) newConfigurations <- cbind (.ID. = max(0L, allConfigurations[[".ID."]]) + seq_nrow(newConfigurations), @@ -1116,7 +1116,7 @@ irace_run <- function(scenario, parameters) catInfo("Sample ", nbNewConfigurations, " configurations from uniform distribution", verbose = FALSE) } - newConfigurations <- sampleUniform(parameters, nbNewConfigurations, + newConfigurations <- sampleSobol(parameters, nbNewConfigurations, repair = scenario$repairConfiguration) newConfigurations <- cbind (.ID. = max(0L, allConfigurations[[".ID."]]) + seq_nrow(newConfigurations), diff --git a/R/utils.R b/R/utils.R index 581de0df..8a26708f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -716,3 +716,11 @@ seq_nrow <- function(x) seq_len(nrow(x)) clamp <- function(x, lower, upper) pmax.int(lower, pmin.int(x, upper)) +truncate_rows <- function(x, n) +{ + nx <- nrow(x) + if (nx <= n) return(x) + x[-seq.int(n + 1L, nx), ] +} + + diff --git a/tests/testthat/test-targetRunnerParallel.R b/tests/testthat/test-targetRunnerParallel.R index 0565e2d2..fe28c4d7 100644 --- a/tests/testthat/test-targetRunnerParallel.R +++ b/tests/testthat/test-targetRunnerParallel.R @@ -49,7 +49,7 @@ x "x" r (1,2) ') expect_output( irace(scenario = list(targetRunnerParallel = targetRunnerParallel, - instances = lapply(1:5, function(i) 10), + instances = replicate(5, list(10)), targetRunnerData = list(a=1, b=2), maxExperiments = 42L), parameters = parameters),