From 195de378da46cfc6f7d0c1c2799d14ca0e90617f Mon Sep 17 00:00:00 2001 From: Arne Beer Date: Sat, 22 Feb 2025 16:16:00 +0100 Subject: [PATCH 1/3] add: state saving benchmarks --- Cargo.lock | 194 ++++++++++++++++++++++++- codecov.yml | 1 + pueue/Cargo.toml | 9 ++ pueue/benches/save_compressed_state.rs | 43 ++++++ pueue/benches/save_state.rs | 43 ++++++ 5 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 pueue/benches/save_compressed_state.rs create mode 100644 pueue/benches/save_state.rs diff --git a/Cargo.lock b/Cargo.lock index ebeadd11..5e390002 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.18" @@ -182,7 +188,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools", + "itertools 0.13.0", "proc-macro2", "quote", "regex", @@ -235,6 +241,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.14" @@ -468,6 +480,67 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crossterm" version = "0.28.1" @@ -847,6 +920,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + [[package]] name = "hex" version = "0.4.3" @@ -908,12 +987,32 @@ dependencies = [ "logos", ] +[[package]] +name = "is-terminal" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -1185,6 +1284,12 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + [[package]] name = "option-ext" version = "0.2.0" @@ -1293,6 +1398,34 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "portpicker" version = "0.1.1" @@ -1408,6 +1541,7 @@ dependencies = [ "color-eyre", "comfy-table", "command-group", + "criterion", "crossterm", "ctrlc", "flate2", @@ -1540,6 +1674,26 @@ dependencies = [ "zerocopy 0.8.20", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rcgen" version = "0.13.2" @@ -1774,6 +1928,15 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2090,6 +2253,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tokio" version = "1.43.0" @@ -2303,6 +2476,16 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2425,6 +2608,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/codecov.yml b/codecov.yml index 54bc006b..0eb644f5 100644 --- a/codecov.yml +++ b/codecov.yml @@ -7,6 +7,7 @@ ignore: - "LICENSE" - ".github" - ".gitignore" + - "pueue/benches" coverage: status: diff --git a/pueue/Cargo.toml b/pueue/Cargo.toml index 9083a166..f691133f 100644 --- a/pueue/Cargo.toml +++ b/pueue/Cargo.toml @@ -14,6 +14,14 @@ rust-version.workspace = true [badges] maintenance = { status = "actively-developed" } +[[bench]] +harness = false +name = "save_compressed_state" + +[[bench]] +harness = false +name = "save_state" + [dependencies] chrono.workspace = true clap = { version = "4.5.30", features = [ @@ -55,6 +63,7 @@ tracing-subscriber = { version = "0.3.19", features = [ assert_cmd = "2" assert_matches = "1" better-panic.workspace = true +criterion = "0.5" pretty_assertions.workspace = true rstest = "0.24" serde_yaml.workspace = true diff --git a/pueue/benches/save_compressed_state.rs b/pueue/benches/save_compressed_state.rs new file mode 100644 index 00000000..ee4b9b5e --- /dev/null +++ b/pueue/benches/save_compressed_state.rs @@ -0,0 +1,43 @@ +use std::{collections::HashMap, env::vars, path::PathBuf}; + +use chrono::Local; +use criterion::{Criterion, criterion_group, criterion_main}; +use pueue::daemon::internal_state::state::InternalState; +use pueue_lib::{Settings, Task, state::PUEUE_DEFAULT_GROUP}; + +/// Create a large state file with a few hundred tasks. +/// Save it to disk in compressed state +fn save_compressed_state() { + let dir = tempfile::tempdir().unwrap(); + let mut settings = Settings::default(); + settings.shared.pueue_directory = Some(dir.path().to_owned()); + settings.daemon.compress_state_file = true; + + let mut state = InternalState::new(); + + for _ in 0..400 { + let task = Task::new( + "ls".into(), + PathBuf::from("/tmp"), + HashMap::from_iter(vars()), + PUEUE_DEFAULT_GROUP.to_owned(), + pueue_lib::TaskStatus::Queued { + enqueued_at: Local::now(), + }, + Vec::new(), + 0, + None, + ); + + state.add_task(task); + } + + state.save(&settings).unwrap(); +} + +pub fn state(crit: &mut Criterion) { + crit.bench_function("Save compressed state", |b| b.iter(save_compressed_state)); +} + +criterion_group!(benches, state); +criterion_main!(benches); diff --git a/pueue/benches/save_state.rs b/pueue/benches/save_state.rs new file mode 100644 index 00000000..6f0a9054 --- /dev/null +++ b/pueue/benches/save_state.rs @@ -0,0 +1,43 @@ +use std::{collections::HashMap, env::vars, path::PathBuf}; + +use chrono::Local; +use criterion::{Criterion, criterion_group, criterion_main}; +use pueue::daemon::internal_state::state::InternalState; +use pueue_lib::{Settings, Task, state::PUEUE_DEFAULT_GROUP}; + +/// Create a large state file with a few hundred tasks. +/// Save it to disk in uncompressed state +fn save_state() { + let dir = tempfile::tempdir().unwrap(); + let mut settings = Settings::default(); + settings.shared.pueue_directory = Some(dir.path().to_owned()); + settings.daemon.compress_state_file = false; + + let mut state = InternalState::new(); + + for _ in 0..400 { + let task = Task::new( + "ls".into(), + PathBuf::from("/tmp"), + HashMap::from_iter(vars()), + PUEUE_DEFAULT_GROUP.to_owned(), + pueue_lib::TaskStatus::Queued { + enqueued_at: Local::now(), + }, + Vec::new(), + 0, + None, + ); + + state.add_task(task); + } + + state.save(&settings).unwrap(); +} + +pub fn state(crit: &mut Criterion) { + crit.bench_function("Save uncompressed state", |b| b.iter(save_state)); +} + +criterion_group!(benches, state); +criterion_main!(benches); From c95b1eec383f5cfd63c9bf04533da2bc344dbff4 Mon Sep 17 00:00:00 2001 From: Arne Beer Date: Sat, 22 Feb 2025 16:16:30 +0100 Subject: [PATCH 2/3] refactor: get max task id --- pueue/src/daemon/internal_state/state.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pueue/src/daemon/internal_state/state.rs b/pueue/src/daemon/internal_state/state.rs index ce857ba3..5e8b7ca5 100644 --- a/pueue/src/daemon/internal_state/state.rs +++ b/pueue/src/daemon/internal_state/state.rs @@ -110,9 +110,9 @@ impl InternalState { /// Add a new task pub fn add_task(&mut self, mut task: Task) -> usize { - let next_id = match self.tasks().keys().max() { + let next_id = match self.tasks().last_key_value() { None => 0, - Some(id) => id + 1, + Some((id, _)) => id + 1, }; task.id = next_id; self.tasks_mut().insert(next_id, task); From ef485a0e2b4b41584b39f4dc0d2509a9f81eb49f Mon Sep 17 00:00:00 2001 From: Arne Beer Date: Sat, 22 Feb 2025 16:54:53 +0100 Subject: [PATCH 3/3] chore: Update some docs --- CHANGELOG.md | 8 ++++++-- pueue_lib/src/log.rs | 8 +++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9b0cf96..5d5c905c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,8 +110,6 @@ Upon updating Pueue and restarting the daemon, the previous state will be wiped, - **Breaking**: Ported from `anyhow` to `color_eyre` for prettier log output. - **Breaking**: Switch `cbor` handling library, breaking backwards-compatible communication on a data format level. - **Breaking**: Switch protocol message representation, completely breaking backwards compatibility. -- Option to save the state in compressed form. This can be toggled with the `daemon.compress_state_file` config file. - Preliminary testing shows significant compression ratios (up to x15), which helps with large states in embedded and I/O bound environments. ### Add @@ -128,6 +126,12 @@ Upon updating Pueue and restarting the daemon, the previous state will be wiped, - Add `queued_count` and `stashed_count` to callback template variables. This allows users to fire callbacks when whole groups are finished. [#578](https://github.com/Nukesor/pueue/issues/578) - Add new subcommand to set or unset environment variables for tasks. [#503](https://github.com/Nukesor/pueue/issues/503) - Add `add --follow` flag that may be called in combination with `--immediate` [#592](https://github.com/Nukesor/pueue/issues/592) +- Add option to save the state in compressed form. This can be toggled with the `daemon.compress_state_file` config file. + Preliminary testing shows significant compression ratios (up to x15), which helps with large states in embedded and I/O bound environments. + On my local machine with a state of 400 tasks, state file size **shrinks** from \~2MB to \~120KB and save time **increases** from \~8ms to \~20ms. + Due to the very repetitive nature of the state's data (mostly environment variables), the `gzip` compression algorithm with the `flate2` implementation has been chosen. + It shows similar compression rations to `zstd` on level `7`, which is more than enough and the dependency is significantly lighter than `zstd`. + `snappy`, which is already a dependency, has also been considered, but it has much worse compression ratios (~2MB -> ~300KB). ### Fixed diff --git a/pueue_lib/src/log.rs b/pueue_lib/src/log.rs index 93850e35..4a050bd5 100644 --- a/pueue_lib/src/log.rs +++ b/pueue_lib/src/log.rs @@ -63,7 +63,7 @@ pub fn clean_log_handles(task_id: usize, pueue_dir: &Path) { /// Return type is `(Vec, bool)` /// - `Vec` the compressed task output. /// - `bool` Whether the full task's output has been read. `false` indicate that the log output has -/// been truncated +/// been truncated. pub fn read_and_compress_log_file( task_id: usize, pueue_dir: &Path, @@ -74,14 +74,16 @@ pub fn read_and_compress_log_file( let mut content = Vec::new(); // Indicates whether the full log output is shown or just the last part of it. + // This may be true even if, for example, only the last 15 lines were requested + // but the log is only 10 lines long. let mut output_complete = true; - // Move the cursor to the last few lines of both files. + // If requested, move the cursor to the last few lines of the file. if let Some(lines) = lines { output_complete = seek_to_last_lines(&mut file, lines)?; } - // Compress the full log input and pipe it into the snappy compressor + // Pipe the remaining log output file it into the snappy compressor { let mut compressor = FrameEncoder::new(&mut content); io::copy(&mut file, &mut compressor)