eigerco · serg-temchenko · Jun 18, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -37,6 +37,7 @@ cid = { version = "0.11.1" }
 clap = { version = "4.5.3" }
 codec = { package = "parity-scale-codec", version = "3.0.0", default-features = false }
 color-print = "0.3.4"
+criterion = "0.5.1"
 digest = "0.10.7"
 futures = "0.3.28"
 hex-literal = { version = "0.4.1" }

diff --git a/storage/mater/Cargo.toml b/storage/mater/Cargo.toml
@@ -23,13 +23,18 @@ serde = { workspace = true, features = ["derive"] }
 serde_ipld_dagcbor.workspace = true
 sha2.workspace = true
 thiserror.workspace = true
-tokio = { workspace = true, features = ["fs", "macros", "rt"] }
+tokio = { workspace = true, features = ["fs", "macros", "rt-multi-thread"] }
 tokio-stream.workspace = true
 tokio-util = { workspace = true, features = ["io"] }
 
 [dev-dependencies]
+criterion = { workspace = true, features = ["async_tokio", "html_reports"] }
 rand.workspace = true
 tempfile.workspace = true
 
 [lints]
 workspace = true
+
+[[bench]]
+harness = false
+name = "benchmark"
diff --git a/storage/mater/benches/benchmark.rs b/storage/mater/benches/benchmark.rs
@@ -0,0 +1,152 @@
+use std::{
+    fmt::Display,
+    io::Cursor,
+    path::{Path, PathBuf},
+    sync::OnceLock,
+};
+
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use mater::{create_filestore, Blockstore, Config};
+use tempfile::{tempdir, TempDir};
+use tokio::runtime::Runtime as TokioExecutor;
+
+#[derive(Debug, Clone, Copy)]
+struct Params {
+    size: usize,
+    num: usize,
+}
+
+impl Display for Params {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "content_size: {} bytes, num_of_duplicates: {}",
+            self.size, self.num
+        )
+    }
+}
+
+/// Get content sizes for the benchmarks.
+fn get_sizes() -> Vec<usize> {
+    vec![
+        1024 * 1000,    // 1 MB
+        1024 * 10000,   // 10 MB
+        1024 * 100000,  // 100 MB
+        1024 * 1000000, // 1 GB
+    ]
+}
+
+/// Get number of copies for the benchmarks. Zero means that there are no copies
+/// and the whole content is unique.
+fn get_num_copies() -> Vec<usize> {
+    vec![0, 1, 2, 4]
+}
+
+static CONTENTS: OnceLock<Vec<(Params, Vec<u8>)>> = OnceLock::new();
+fn get_contents() -> &'static Vec<(Params, Vec<u8>)> {
+    CONTENTS.get_or_init(|| {
+        let mut contents = vec![];
+        for size in get_sizes() {
+            for num in get_num_copies() {
+                let content = create_content(size, num);
+                contents.push((Params { size, num }, content));
+            }
+        }
+
+        contents
+    })
+}
+
+/// Create random content of a given size. Duplicates are used to specify how
+/// many times the content should be repeated.
+fn create_content(size: usize, num_of_copies: usize) -> Vec<u8> {
+    let single_part_size = size / (num_of_copies + 1);
+    let single_content = (0..single_part_size)
+        .map(|_| rand::random())
+        .collect::<Vec<u8>>();
+
+    single_content.repeat(num_of_copies)
+}
+
+/// Prepare temporary file
+fn prepare_source_file(content: &[u8]) -> (TempDir, PathBuf) {
+    let temp_dir = tempdir().unwrap();
+    let file = temp_dir.path().join("source_file");
+
+    // Write content to the file
+    std::fs::write(&file, &content).unwrap();
+
+    (temp_dir, file)
+}
+
+/// Read content to a Blockstore. This function is benchmarked.
+async fn read_content_benched(content: &[u8], mut store: Blockstore) {
+    let cursor = Cursor::new(content);
+    store.read(cursor).await.unwrap()
+}
+
+fn read(c: &mut Criterion) {
+    let contents = get_contents();
+
+    for (params, content) in contents {
+        c.bench_with_input(BenchmarkId::new("read", params), params, |b, _params| {
+            b.to_async(TokioExecutor::new().unwrap())
+                .iter(|| read_content_benched(&content, Blockstore::new()));
+        });
+    }
+}
+
+/// Write content from a Blockstore. This function is benchmarked.
+async fn write_contents_benched(buffer: Vec<u8>, store: Blockstore) {
+    store.write(buffer).await.unwrap();
+}
+
+fn write(c: &mut Criterion) {
+    let runtime = TokioExecutor::new().unwrap();
+    let contents = get_contents();
+
+    for (params, content) in contents {
+        let mut blockstore = Blockstore::new();
+
+        // Read file contents to the blockstore
+        runtime.block_on(async {
+            let cursor = Cursor::new(content);
+            blockstore.read(cursor).await.unwrap()
+        });
+
+        c.bench_with_input(BenchmarkId::new("write", params), &(), |b, _: &()| {
+            b.to_async(TokioExecutor::new().unwrap()).iter_batched(
+                || (blockstore.clone(), Vec::with_capacity(params.size)),
+                |(blockstore, buffer)| write_contents_benched(buffer, blockstore),
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+/// Create a filestore. This function is benchmarked.
+async fn create_filestore_benched(source: &Path, target: &Path) {
+    create_filestore(source, target, Config::default())
+        .await
+        .unwrap();
+}
+
+fn filestore(c: &mut Criterion) {
+    let contents = get_contents();
+
+    for (params, content) in contents {
+        // Prepare temporary files
+        let (temp_dir, source_file) = prepare_source_file(&content);
+        let target_file = temp_dir.path().join("target");
+
+        c.bench_with_input(BenchmarkId::new("filestore", params), &(), |b, _: &()| {
+            b.to_async(TokioExecutor::new().unwrap())
+                .iter(|| create_filestore_benched(&source_file, &target_file));
+        });
+    }
+}
+
+criterion_group!(bench_reading, read);
+criterion_group!(bench_writing, write);
+criterion_group!(bench_filestore, filestore);
+criterion_main!(bench_reading, bench_writing, bench_filestore);
diff --git a/storage/mater/src/lib.rs b/storage/mater/src/lib.rs
@@ -2,7 +2,7 @@
 //! Both version 1 and version 2 are supported.
 //!
 //! You can make use of the lower-level utilities such as [`CarV2Reader`] to read a CARv2 file,
-//! though these utilies were designed to be used in higher-level abstractions, like the [`Blockstore`].
+//! though these utilities were designed to be used in higher-level abstractions, like the [`Blockstore`].
 
 #![warn(unused_crate_dependencies)]
 #![warn(missing_docs)]
@@ -18,7 +18,7 @@ mod v2;
 
 // We need to expose this because `read_block` returns `(Cid, Vec<u8>)`.
 pub use ipld_core::cid::Cid;
-pub use stores::{create_filestore, Blockstore};
+pub use stores::{create_filestore, Blockstore, Config};
 pub use v1::{Header as CarV1Header, Reader as CarV1Reader, Writer as CarV1Writer};
 pub use v2::{
     Characteristics, Header as CarV2Header, Index, IndexEntry, IndexSorted, MultihashIndexSorted,
@@ -131,6 +131,9 @@ pub(crate) mod test_utils {
     use std::path::Path;
 
     pub(crate) use assert_buffer_eq;
+    /// This is here so that our build doesn't fail. It thinks that the
+    /// criterion is not used. But it is used by the benchmarks.
+    use criterion as _;
     use tokio::{fs::File, io::AsyncWriteExt};
 
     /// Dump a byte slice into a file.

diff --git a/storage/mater/src/stores/blockstore.rs b/storage/mater/src/stores/blockstore.rs
@@ -30,6 +30,7 @@ use crate::{
 /// The store keeps track of ([`Cid`], [`Bytes`]) pairs, performing de-duplication based on the [`Cid`].
 ///
 /// **Important note: currently, the blockstore only supports a single file!**
+#[derive(Debug, Clone)]
 pub struct Blockstore {
     root: Option<Cid>,
     blocks: IndexMap<Cid, Bytes>,