Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/36/add benchmarks #70

Merged
merged 13 commits into from
Jun 18, 2024
95 changes: 95 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ cid = { version = "0.11.1" }
clap = { version = "4.5.3" }
codec = { package = "parity-scale-codec", version = "3.0.0", default-features = false }
color-print = "0.3.4"
criterion = "0.5.1"
digest = "0.10.7"
futures = "0.3.28"
hex-literal = { version = "0.4.1" }
Expand Down
7 changes: 6 additions & 1 deletion storage/mater/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,18 @@ serde = { workspace = true, features = ["derive"] }
serde_ipld_dagcbor.workspace = true
sha2.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["fs", "macros", "rt"] }
tokio = { workspace = true, features = ["fs", "macros", "rt-multi-thread"] }
tokio-stream.workspace = true
tokio-util = { workspace = true, features = ["io"] }

[dev-dependencies]
criterion = { workspace = true, features = ["async_tokio", "html_reports"] }
rand.workspace = true
tempfile.workspace = true

[lints]
workspace = true

[[bench]]
harness = false
name = "benchmark"
152 changes: 152 additions & 0 deletions storage/mater/benches/benchmark.rs
jmg-duarte marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
use std::{
fmt::Display,
io::Cursor,
path::{Path, PathBuf},
sync::OnceLock,
};

use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use mater::{create_filestore, Blockstore, Config};
use tempfile::{tempdir, TempDir};
use tokio::runtime::Runtime as TokioExecutor;

#[derive(Debug, Clone, Copy)]
struct Params {
size: usize,
num: usize,
}

impl Display for Params {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"content_size: {} bytes, num_of_duplicates: {}",
self.size, self.num
)
}
}

/// Get content sizes for the benchmarks.
fn get_sizes() -> Vec<usize> {
vec![
1024 * 1000, // 1 MB
1024 * 10000, // 10 MB
1024 * 100000, // 100 MB
1024 * 1000000, // 1 GB
]
}

/// Get number of copies for the benchmarks. Zero means that there are no copies
/// and the whole content is unique.
fn get_num_copies() -> Vec<usize> {
vec![0, 1, 2, 4]
}

static CONTENTS: OnceLock<Vec<(Params, Vec<u8>)>> = OnceLock::new();
fn get_contents() -> &'static Vec<(Params, Vec<u8>)> {
CONTENTS.get_or_init(|| {
let mut contents = vec![];
for size in get_sizes() {
for num in get_num_copies() {
let content = create_content(size, num);
contents.push((Params { size, num }, content));
cernicc marked this conversation as resolved.
Show resolved Hide resolved
}
}

contents
})
}

/// Create random content of a given size. Duplicates are used to specify how
/// many times the content should be repeated.
fn create_content(size: usize, num_of_copies: usize) -> Vec<u8> {
let single_part_size = size / (num_of_copies + 1);
jmg-duarte marked this conversation as resolved.
Show resolved Hide resolved
let single_content = (0..single_part_size)
.map(|_| rand::random())
.collect::<Vec<u8>>();

single_content.repeat(num_of_copies)
}

/// Prepare temporary file
fn prepare_source_file(content: &[u8]) -> (TempDir, PathBuf) {
let temp_dir = tempdir().unwrap();
let file = temp_dir.path().join("source_file");

// Write content to the file
std::fs::write(&file, &content).unwrap();

(temp_dir, file)
}

/// Read content to a Blockstore. This function is benchmarked.
async fn read_content_benched(content: &[u8], mut store: Blockstore) {
let cursor = Cursor::new(content);
store.read(cursor).await.unwrap()
}

fn read(c: &mut Criterion) {
let contents = get_contents();

for (params, content) in contents {
c.bench_with_input(BenchmarkId::new("read", params), params, |b, _params| {
b.to_async(TokioExecutor::new().unwrap())
.iter(|| read_content_benched(&content, Blockstore::new()));
});
}
}

/// Write content from a Blockstore. This function is benchmarked.
async fn write_contents_benched(buffer: Vec<u8>, store: Blockstore) {
store.write(buffer).await.unwrap();
}

fn write(c: &mut Criterion) {
let runtime = TokioExecutor::new().unwrap();
let contents = get_contents();

for (params, content) in contents {
let mut blockstore = Blockstore::new();

// Read file contents to the blockstore
runtime.block_on(async {
let cursor = Cursor::new(content);
blockstore.read(cursor).await.unwrap()
});

c.bench_with_input(BenchmarkId::new("write", params), &(), |b, _: &()| {
b.to_async(TokioExecutor::new().unwrap()).iter_batched(
|| (blockstore.clone(), Vec::with_capacity(params.size)),
|(blockstore, buffer)| write_contents_benched(buffer, blockstore),
BatchSize::SmallInput,
);
});
}
}

/// Create a filestore. This function is benchmarked.
async fn create_filestore_benched(source: &Path, target: &Path) {
create_filestore(source, target, Config::default())
.await
.unwrap();
}

fn filestore(c: &mut Criterion) {
let contents = get_contents();

for (params, content) in contents {
// Prepare temporary files
let (temp_dir, source_file) = prepare_source_file(&content);
let target_file = temp_dir.path().join("target");

c.bench_with_input(BenchmarkId::new("filestore", params), &(), |b, _: &()| {
b.to_async(TokioExecutor::new().unwrap())
.iter(|| create_filestore_benched(&source_file, &target_file));
});
}
}

criterion_group!(bench_reading, read);
criterion_group!(bench_writing, write);
criterion_group!(bench_filestore, filestore);
criterion_main!(bench_reading, bench_writing, bench_filestore);
7 changes: 5 additions & 2 deletions storage/mater/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! Both version 1 and version 2 are supported.
//!
//! You can make use of the lower-level utilities such as [`CarV2Reader`] to read a CARv2 file,
//! though these utilies were designed to be used in higher-level abstractions, like the [`Blockstore`].
//! though these utilities were designed to be used in higher-level abstractions, like the [`Blockstore`].

#![warn(unused_crate_dependencies)]
#![warn(missing_docs)]
Expand All @@ -18,7 +18,7 @@ mod v2;

// We need to expose this because `read_block` returns `(Cid, Vec<u8>)`.
pub use ipld_core::cid::Cid;
pub use stores::{create_filestore, Blockstore};
pub use stores::{create_filestore, Blockstore, Config};
pub use v1::{Header as CarV1Header, Reader as CarV1Reader, Writer as CarV1Writer};
pub use v2::{
Characteristics, Header as CarV2Header, Index, IndexEntry, IndexSorted, MultihashIndexSorted,
Expand Down Expand Up @@ -131,6 +131,9 @@ pub(crate) mod test_utils {
use std::path::Path;

pub(crate) use assert_buffer_eq;
/// This is here so that our build doesn't fail. It thinks that the
/// criterion is not used. But it is used by the benchmarks.
use criterion as _;
jmg-duarte marked this conversation as resolved.
Show resolved Hide resolved
use tokio::{fs::File, io::AsyncWriteExt};

/// Dump a byte slice into a file.
Expand Down
1 change: 1 addition & 0 deletions storage/mater/src/stores/blockstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use crate::{
/// The store keeps track of ([`Cid`], [`Bytes`]) pairs, performing de-duplication based on the [`Cid`].
///
/// **Important note: currently, the blockstore only supports a single file!**
#[derive(Debug, Clone)]
pub struct Blockstore {
root: Option<Cid>,
blocks: IndexMap<Cid, Bytes>,
Expand Down
Loading