Skip to content

Commit

Permalink
test the new AEF-DDF workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
RasmusSkytte committed Oct 21, 2024
1 parent 422cf9f commit bb9e9df
Showing 1 changed file with 8 additions and 233 deletions.
241 changes: 8 additions & 233 deletions .github/workflows/benchmark.yaml
Original file line number Diff line number Diff line change
@@ -1,237 +1,12 @@
on:
workflow_dispatch:
workflow_call:
inputs:
backend_exclude:
type: string
default: 'mssql'
- workflow_dispatch
- push

name: Benchmark

name: "⏱️ Benchmark"
jobs:
benchmark:
runs-on: ubuntu-latest

concurrency:
group: benchmark-${{ github.ref }}
cancel-in-progress: true

services:
postgres:
image: postgres:latest
env:
POSTGRES_DB: test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: --health-cmd "pg_isready -U postgres" --health-interval 10s --health-timeout 5s --health-retries 5

env:
PGHOST: localhost
PGPORT: 5432
PGDATABASE: test
PGUSER: postgres
PGPASSWORD: postgres

steps:
- name: Install a SQL Server suite of tools
if: ${{ !contains(inputs.backend_exclude, 'mssql') }}
uses: potatoqualitee/[email protected]
with:
install: sqlengine, sqlpackage, sqlclient
show-log: true

- name: Configure SQL server
if: ${{ !contains(inputs.backend_exclude, 'mssql') }}
run: |
set -o xtrace
sqlcmd -V 10 -S localhost -U SA -P dbatools.I0 -Q "ALTER LOGIN SA WITH DEFAULT_DATABASE = master;"
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false

- name: Configure git
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "[email protected]"
git switch ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::devtools

- name: Delete previous benchmark files
if: always()
run: rm -rf inst/extdata/benchmark-*.rds

- name: Get SQLite version
if: ${{ !contains(inputs.backend_exclude, 'sqlite') }}
run: |
version=$(Rscript -e "cat(DBI::dbGetQuery(DBI::dbConnect(RSQLite::SQLite()), 'SELECT sqlite_version();')[[1]])")
echo "SQLITE_VERSION=SQLite v$version" >> $GITHUB_ENV
- name: Get DuckDB version
if: ${{ !contains(inputs.backend_exclude, 'duckdb') }}
run: |
version=$(Rscript -e "conn <- DBI::dbConnect(duckdb::duckdb()); cat(DBI::dbGetQuery(conn, 'SELECT version();')[[1]]); DBI::dbDisconnect(conn, shutdown = TRUE)")
echo "DUCKDB_VERSION=DuckDB $version" >> $GITHUB_ENV
- name: Get PostgreSQL version
if: ${{ !contains(inputs.backend_exclude, 'postgres') }}
run: |
version=$(psql --version | awk '{print $3}')
echo "POSTGRES_VERSION=PostgreSQL v$version" >> $GITHUB_ENV
- name: Get SQL Server version
if: ${{ !contains(inputs.backend_exclude, 'mssql') }}
run: |
version=$(sqlcmd -S localhost -U SA -P dbatools.I0 -Q "SET NOCOUNT ON; SELECT SERVERPROPERTY('productversion') AS version" -h -1 -W -b)
echo "SQL_SERVER_VERSION=SQL Server v$version" >> $GITHUB_ENV
- name: Install libraries to benchmark
if: always()
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.SQLITE_VERSION }})
if: ${{ always() && !contains(inputs.backend_exclude, 'sqlite') }}
env:
BACKEND: ${{ env.SQLITE_VERSION }}
BACKEND_DRV: RSQLite::SQLite
BACKEND_ARGS: 'list(dbname = file.path(tempdir(), "SQLite.SQLite"))'
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.DUCKDB_VERSION }})
if: ${{ always() && !contains(inputs.backend_exclude, 'duckdb') }}
env:
BACKEND: ${{ env.DUCKDB_VERSION }}
BACKEND_DRV: duckdb::duckdb
BACKEND_ARGS: 'list(dbdir = file.path(tempdir(), "DuckDB.duckdb"))'
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.POSTGRES_VERSION }})
if: ${{ always() && !contains(inputs.backend_exclude, 'postgres') }}
env:
BACKEND: ${{ env.POSTGRES_VERSION }}
BACKEND_DRV: RPostgres::Postgres
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Run benchmark (${{ env.SQL_SERVER_VERSION }})
if: ${{ always() && !contains(inputs.backend_exclude, 'mssql') }}
env:
BACKEND: ${{ env.SQL_SERVER_VERSION }}
BACKEND_DRV: odbc::odbc
CONN_ARGS_JSON: >
{
"${{ env.SQL_SERVER_VERSION }}": {
"driver": "ODBC Driver 17 for SQL Server",
"server": "localhost",
"database": "master",
"UID": "SA",
"PWD": "dbatools.I0"
}
}
run: source("./data-raw/benchmark.R", echo=TRUE)
shell: Rscript {0}

- name: Display structure of benchmark files
if: always()
run: ls -R inst/extdata

- name: Combine benchmark results
if: always()
run: |
benchmark_files <- list.files(
"inst/extdata",
pattern = "^benchmark-",
full.names = TRUE,
recursive = TRUE
)
benchmarks <- benchmark_files |>
purrr::map(readRDS) |>
purrr::map(tibble::as_tibble) |>
purrr::reduce(rbind)
benchmarks <- benchmarks |>
dplyr::mutate(
"version" = factor(
.data$version,
levels = c("CRAN", "main", setdiff(unique(benchmarks$version), c("CRAN", "main")))
)
)
# Save the combined benchmark results and delete the individual files
dir.create(file.path("inst", "extdata"), recursive = TRUE, showWarnings = FALSE)
saveRDS(benchmarks, file.path("inst", "extdata", "benchmarks.rds"))
file.remove(benchmark_files)
# Add note slow backends
slow_backends <- benchmarks |>
dplyr::distinct(.data$database, .data$n) |>
dplyr::filter(.data$n < max(.data$n)) |>
dplyr::pull("database")
benchmarks <- benchmarks |>
dplyr::mutate("database" = paste0(database, ifelse(database %in% slow_backends, "*", "")))
# Mean and standard deviation (see ggplot2::mean_se())
mean_sd <- function(x) {
mu <- mean(x)
sd <- sd(x)
data.frame(y = mu, ymin = mu - sd, ymax = mu + sd)
}
# Determine subgroups
groups <- setdiff(colnames(benchmarks), c("expr", "time", "benchmark_function", "database", "version", "n"))
# Allow subgroups
dodge <- ggplot2::position_dodge(width = 0.6)
g <- ggplot2::ggplot(
benchmarks,
ggplot2::aes(
x = version,
y = time / 1e9,
group = !!switch(length(groups) > 0, as.symbol(groups)),
color = !!switch(length(groups) > 0, as.symbol(groups)))
) +
ggplot2::stat_summary(fun.data = mean_sd, geom = "pointrange", size = 0.5, linewidth = 1, position = dodge) +
ggplot2::facet_grid(rows = ggplot2::vars(benchmark_function), cols = ggplot2::vars(database)) +
ggplot2::labs(x = "Codebase version", y = "Time (s)")
if (length(slow_backends) > 1) {
g <- g + ggplot2::labs(caption = "* IMPORTANT: Benchmark data halved for this backend!")
}
ggplot2::ggsave("benchmarks.pdf")
shell: Rscript {0}

- name: Upload benchmark summary
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-summary
path: benchmarks.pdf

- name: Commit and push changes
if: always()
run: |
git remote set-url origin https://$GITHUB_ACTOR:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY.git
git stash --include-untracked
git pull --rebase origin ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
git stash list | grep stash@{0} && git stash pop || echo "No stash to pop"
git add inst/extdata/\*
git commit -m "chore: Update benchmark data" || echo "No changes to commit"
git push origin ${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}
document:
uses: ssi-dk/AEF-DDF/.github/workflows/benchmark.yaml@main
secrets: inherit
with:
backend_exclude: ""

0 comments on commit bb9e9df

Please sign in to comment.