diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R index 5ed7d01b..6abd0af1 100644 --- a/data-raw/benchmark.R +++ b/data-raw/benchmark.R @@ -59,19 +59,20 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), "")) conns <- get_test_conns() conn <- conns[[1]] - try({ - # Our benchmark data is the iris data set but repeated to increase the data size - data_generator <- function(repeats) { - purrr::map( - seq(repeats), - \(it) dplyr::mutate(iris, r = dplyr::row_number() + (it - 1) * nrow(iris)) - ) |> - purrr::reduce(rbind) |> - dplyr::rename_with(~ tolower(gsub(".", "_", .x, fixed = TRUE))) - } - n <- 10 + # Our benchmark data is the iris data set but repeated to increase the data size + data_generator <- function(repeats) { + purrr::map( + seq(repeats), + \(it) dplyr::mutate(iris, r = dplyr::row_number() + (it - 1) * nrow(iris)) + ) |> + purrr::reduce(rbind) |> + dplyr::rename_with(~ tolower(gsub(".", "_", .x, fixed = TRUE))) + } + # Benchmark 1, update_snapshot() with consecutive updates + try({ + n <- 10 data_1 <- data_generator(n) data_2 <- data_generator(2 * n) |> dplyr::mutate( @@ -127,11 +128,45 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), "")) dir.create("data", showWarnings = FALSE) saveRDS(update_snapshot_benchmark, glue::glue("data/benchmark-update_snapshot_{names(conns)[[1]]}_{version}.rds")) + + # Clean up + purrr::walk(conns, ~ DBI::dbDisconnect(., shutdown = TRUE)) }) - detach("package:SCDB", unload = TRUE) + # Benchmark 2, update_snapshot() with increasing data size + try({ + for (n in seq(from = 2, to = 10, by = 2)) { + + data <- data_generator(n) |> + dplyr::copy_to(conn, df = _, name = id("test.SCDB_data", conn), overwrite = TRUE, temporary = FALSE) + + # Define the SCDB update function + scdb_updates <- function(conn, data) { + update_snapshot(data, conn, "SCDB_benchmark", timestamp = "2021-01-01", + logger = Logger$new(output_to_console = FALSE, warn = FALSE)) + DBI::dbRemoveTable(conn, name = "SCDB_benchmark") + } + + # Construct the list of benchmarks + update_snapshot_benchmark <- microbenchmark::microbenchmark(scdb_updates(conn, data), times = 25) |> + dplyr::mutate( + "benchmark_function" = "update_snapshot() - complexity", + "database" = names(conns)[[1]], + "version" = !!ifelse(version == "substr(sha, 1, 10)", branch, version), + "n" = n + ) + + dir.create("data", showWarnings = FALSE) + saveRDS( + update_snapshot_benchmark, + glue::glue("data/benchmark-update_snapshot_complexity_{names(conns)[[1]]}_{version}.rds") + ) + } + + # Clean up + purrr::walk(conns, ~ DBI::dbDisconnect(., shutdown = TRUE)) + }) - # Clean up - purrr::walk(conns, ~ DBI::dbDisconnect(., shutdown = TRUE)) + detach("package:SCDB", unload = TRUE) } }