feat(benchmark): Swap to bench::mark() to capture memory also

ssi-dk · Apr 18, 2024 · 67b235e · 67b235e
1 parent 6da2b35
commit 67b235e
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 10 deletions.
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -62,7 +62,7 @@ jobs:
 
       - name: Delete previous benchmark files
         if: always()
-        run: rm -rf inst/extdata/benchmark-*.rds
+        run: rm -rf inst/extdata/benchmark*.rds
 
 
 
@@ -192,7 +192,7 @@ jobs:
 
           g <- ggplot2::ggplot(
             benchmarks,
-            ggplot2::aes(x = version, y = time / 1e9)
+            ggplot2::aes(x = version, y = time)
           ) +
             ggplot2::stat_summary(fun.data = mean_sd, geom = "pointrange", size = 0.5, linewidth = 1) +
             ggplot2::facet_grid(rows = ggplot2::vars(benchmark_function), cols = ggplot2::vars(database)) +

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,14 +39,14 @@ Imports:
     tidyselect,
     utils
 Suggests:
+    bench,
     callr,
     conflicted,
     duckdb,
     here,
     jsonlite,
     knitr,
     lintr,
-    microbenchmark,
     odbc,
     rmarkdown,
     roxygen2,

diff --git a/data-raw/benchmark.R b/data-raw/benchmark.R
@@ -118,7 +118,8 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), ""))
       }
 
       # Construct the list of benchmarks
-      update_snapshot_benchmark <- microbenchmark::microbenchmark(scdb_updates(conn, data_on_conn), times = 25) |>
+      update_snapshot_benchmark <- bench::mark(scdb_updates(conn, data_on_conn), iterations = 25) |>
+        tidyr::unnest(c(time, gc)) |>
         dplyr::mutate(
           "benchmark_function" = "update_snapshot()",
           "database" = names(conns)[[1]],
@@ -145,7 +146,8 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), ""))
         }
 
         # Construct the list of benchmarks
-        update_snapshot_benchmark <- microbenchmark::microbenchmark(scdb_updates(conn, data), times = 5) |>
+        update_snapshot_benchmark <- bench::mark(scdb_updates(conn, data), iterations = 5) |>
+          tidyr::unnest(c(time, gc)) |>
           dplyr::mutate(
             "benchmark_function" = "update_snapshot() - complexity",
             "database" = names(conns)[[1]],

diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd
@@ -31,7 +31,7 @@ This data forms the basis for three "snapshots" used in the benchmarks:
 The benchmark function uses three consecutive calls to `update_snapshot()` to create the table with first snapshot and
 then update it to the second and third snapshot. Finally, the table is deleted.
 
-The performance of this benchmark function is timed with the `microbenchmark` package using 10 replicates.
+The performance of this benchmark function is timed with the `bench` package using 10 replicates.
 All benchmarks are run on the same machine.
 
 The results of the benchmark are shown graphically below (mean and standard deviation), where we compare the current
@@ -96,7 +96,7 @@ benchmark_1 <- benchmark_1 |>
 
 g <- ggplot2::ggplot(
   benchmark_1,
-  ggplot2::aes(x = version, y = time / 1e9)
+  ggplot2::aes(x = version, y = time)
 ) +
   ggplot2::stat_summary(fun.data = mean_sd, geom = "pointrange", size = 0.5, linewidth = 1) +
   ggplot2::facet_grid(rows = ggplot2::vars(benchmark_function), cols = ggplot2::vars(database)) +
@@ -111,11 +111,11 @@ g
 
 
 We include another benchmark to highlight the complexity scaling of the `update_snapshot() ` with the size of the input
-data. The datasets are similar to the first benchmark is used, but the number of repeats is varied to see the impact of
+data. The data sets are similar to the first benchmark is used, but the number of repeats is varied to see the impact of
 increasing data size. The benchmarks are run from a "clean" state, where the target_table does not exists. The benchmark
 measures both the time to create the table and to remove it again afterwards (to restore the clean state).
 
-The performance of this benchmark function is timed with the `microbenchmark` package using 5 replicates.
+The performance of this benchmark function is timed with the `bench` package using 5 replicates.
 All benchmarks are run on the same machine.
 
 The results of the benchmark are shown graphically below (mean and standard deviation), where we compare the current
@@ -128,7 +128,7 @@ benchmark_2 <- benchmarks |>
 
 ggplot2::ggplot(
   benchmark_2,
-  ggplot2::aes(x = n * nrow(iris) / 1e3, y = time / 1e9, color = version)
+  ggplot2::aes(x = n * nrow(iris) / 1e3, y = time, color = version)
 ) +
   ggplot2::stat_summary(fun.data = mean_sd, geom = "pointrange", size = 0.5, linewidth = 1) +
   ggplot2::facet_grid(rows = ggplot2::vars(benchmark_function), cols = ggplot2::vars(database)) +