From b0cc9563276af8af521bcec49ea96846dc562aa3 Mon Sep 17 00:00:00 2001 From: Chris O'Brien Date: Tue, 1 Aug 2023 14:19:25 -0500 Subject: [PATCH] add lr_trie, setup workspace --- Cargo.toml | 55 +++++++ lr_trie/Cargo.toml | 24 +++ lr_trie/src/inner.rs | 48 ++++++ lr_trie/src/inner_wrapper.rs | 156 ++++++++++++++++++++ lr_trie/src/lib.rs | 11 ++ lr_trie/src/op.rs | 17 +++ lr_trie/src/result.rs | 22 +++ lr_trie/src/trie.rs | 274 +++++++++++++++++++++++++++++++++++ 8 files changed, 607 insertions(+) create mode 100644 Cargo.toml create mode 100644 lr_trie/Cargo.toml create mode 100644 lr_trie/src/inner.rs create mode 100644 lr_trie/src/inner_wrapper.rs create mode 100644 lr_trie/src/lib.rs create mode 100644 lr_trie/src/op.rs create mode 100644 lr_trie/src/result.rs create mode 100644 lr_trie/src/trie.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..584bf40 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,55 @@ +[package] +name = "integral-db" +description = "High throughput persistence layer optimized for concurrent reads and writes." +version = "0.1.0" +authors = ["VRRB Labs "] +edition = "2021" +readme = "README.md" + +[lib] +name = "integral_db" +path = "lr_trie/src/lib.rs" + +[workspace] +members = [ + "lr_trie", +] + +[dependencies] +left-right = "0.11.5" +patriecia = { git = "https://github.com/vrrb-io/patriecia" } +serde = { version = "1.0.144", features = ["derive"] } +keccak-hash = "0.9" +parking_lot = "0.12" +rlp = "0.5.1" +thiserror = "1.0" +bincode = "1.3.3" +rand = { version = "0.8.5", features = ["std"] } +hex = "*" +criterion = "0.3.5" +ethereum-types = "0.13.1" +uuid = { version = "1.3.1", features = ["v4", "serde"] } +tracing = "0.1.37" + +[workspace.dependencies] +left-right = "0.11.5" +patriecia = { git = "https://github.com/vrrb-io/patriecia" } +serde = { version = "1.0.144", features = ["derive"] } +keccak-hash = "0.9" +parking_lot = "0.12" +rlp = "0.5.1" +thiserror = "1.0" +bincode = "1.3.3" +rand = { version = "0.8.5", features = ["std"] } +hex = "*" +criterion = "0.3.5" +ethereum-types = "0.13.1" +uuid = { version = "1.3.1", features = ["v4", "serde"] } +tracing = "0.1.37" + +[dev-dependencies] +rand = { workspace = true } +hex = { workspace = true } +criterion = { workspace = true } +ethereum-types = { workspace = true } +uuid = { workspace = true } diff --git a/lr_trie/Cargo.toml b/lr_trie/Cargo.toml new file mode 100644 index 0000000..7885e2a --- /dev/null +++ b/lr_trie/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "lr_trie" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +left-right = { workspace = true } +patriecia = { workspace = true } +serde = { workspace = true } +keccak-hash = { workspace = true } +parking_lot = { workspace = true } +rlp = { workspace = true } +thiserror = { workspace = true } +bincode = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +rand = { workspace = true } +hex = { workspace = true } +criterion = { workspace = true } +ethereum-types = { workspace = true } +uuid = { workspace = true } diff --git a/lr_trie/src/inner.rs b/lr_trie/src/inner.rs new file mode 100644 index 0000000..763d75f --- /dev/null +++ b/lr_trie/src/inner.rs @@ -0,0 +1,48 @@ +use left_right::Absorb; +pub use left_right::ReadHandleFactory; +use patriecia::{db::Database, inner::InnerTrie, trie::Trie}; +use tracing::error; + +use crate::Operation; + +impl Absorb for InnerTrie +where + D: Database, +{ + fn absorb_first(&mut self, operation: &mut Operation, _other: &Self) { + match operation { + // TODO: report errors via instrumentation + Operation::Add(key, value) => { + if let Err(err) = self.insert(key, value) { + error!("failed to insert key: {err}"); + } + } + Operation::Remove(key) => { + if let Err(err) = self.remove(key) { + error!("failed to remove value for key: {err}"); + } + } + Operation::Extend(values) => { + // + // TODO: temp hack to get this going. Refactor ASAP + // + for (k, v) in values { + if let Err(err) = self.insert(k, v) { + error!("failed to insert key: {err}"); + } + } + } + } + + if let Err(err) = self.commit() { + error!("failed to commit changes to trie: {err}"); + } + } + + fn sync_with(&mut self, first: &Self) { + *self = first.clone(); + if let Err(err) = self.commit() { + tracing::error!("failed to commit changes to trie: {err}"); + } + } +} diff --git a/lr_trie/src/inner_wrapper.rs b/lr_trie/src/inner_wrapper.rs new file mode 100644 index 0000000..4b3f811 --- /dev/null +++ b/lr_trie/src/inner_wrapper.rs @@ -0,0 +1,156 @@ +use std::{ + fmt::{self, Debug, Display, Formatter}, + sync::Arc, +}; + +pub use left_right::ReadHandleFactory; +use patriecia::{db::Database, trie::Trie, InnerTrie, TrieIterator, H256}; +use serde::{Deserialize, Serialize}; + +use crate::{LeftRightTrieError, Result}; + +pub type Proof = Vec; + +#[derive(Debug, Clone)] +pub struct InnerTrieWrapper +where + D: Database, +{ + inner: InnerTrie, +} + +impl InnerTrieWrapper +where + D: Database, +{ + pub fn new(inner: InnerTrie) -> Self { + Self { inner } + } + + /// Produces a clone of the underlying trie + pub fn inner(&self) -> InnerTrie { + self.inner.clone() + } + + pub fn get(&self, key: &K) -> Result + where + K: for<'a> Deserialize<'a> + Serialize + Clone, + V: for<'a> Deserialize<'a> + Serialize + Clone, + { + let key = bincode::serialize(key).unwrap_or_default(); + + let raw_value_opt = self + .inner + .get(&key) + .map_err(|err| LeftRightTrieError::Other(err.to_string()))?; + + let raw_value = raw_value_opt.ok_or_else(|| { + LeftRightTrieError::Other("received none value from inner trie".to_string()) + })?; + + let value = bincode::deserialize::(&raw_value) + .map_err(|err| LeftRightTrieError::Other(err.to_string()))?; + + Ok(value) + } + + pub fn contains<'a, K, V>(&self, key: &'a K) -> Result + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + let key = bincode::serialize(&key).unwrap_or_default(); + self.inner + .contains(&key) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn insert<'a, K, V>(&mut self, key: K, value: V) -> Result<()> + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + let key = bincode::serialize(&key).unwrap_or_default(); + let value = bincode::serialize(&value).unwrap_or_default(); + + self.inner + .insert(&key, &value) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn remove<'a, K, V>(&mut self, key: K) -> Result + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + let key = bincode::serialize(&key).unwrap_or_default(); + self.inner + .remove(&key) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn root_hash(&mut self) -> Result { + self.commit() + } + + /// Creates a Merkle proof for a given value. + pub fn get_proof<'a, K, V>(&mut self, key: &K) -> Result> + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + let key = bincode::serialize(key).unwrap_or_default(); + self.inner + .get_proof(&key) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + /// Verifies a Merkle proof for a given value. + pub fn verify_proof<'a, K, V>( + &self, + root_hash: H256, + key: &K, + proof: Vec, + ) -> Result> + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + let key = bincode::serialize(key).unwrap_or_default(); + + self.inner + .verify_proof(root_hash, &key, proof) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn commit(&mut self) -> Result { + self.inner + .commit() + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn iter(&self) -> TrieIterator { + self.inner.iter() + } + + pub fn len(&self) -> usize { + self.iter().count() + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn db(&self) -> Arc { + self.inner.db() + } +} + +impl Display for InnerTrieWrapper +where + D: Database, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.inner) + } +} diff --git a/lr_trie/src/lib.rs b/lr_trie/src/lib.rs new file mode 100644 index 0000000..7ef9a7d --- /dev/null +++ b/lr_trie/src/lib.rs @@ -0,0 +1,11 @@ +/// This crate contains a left-right wrapped, evmap-backed, Merkle-Patricia Trie +/// heavily inspired by https://github.com/carver/eth-trie.rs which is a fork of https://github.com/citahub/cita-trie +pub use patriecia::H256; + +mod inner; +mod inner_wrapper; +pub mod op; +mod result; +mod trie; + +pub use crate::{inner::*, inner_wrapper::*, op::*, result::*, trie::*}; diff --git a/lr_trie/src/op.rs b/lr_trie/src/op.rs new file mode 100644 index 0000000..b791726 --- /dev/null +++ b/lr_trie/src/op.rs @@ -0,0 +1,17 @@ +pub type Byte = u8; +pub type Bytes = [Byte]; +pub type Key = Vec; +pub type TrieValue = Vec; + +#[derive(Debug)] +#[non_exhaustive] +pub enum Operation { + /// Add a single value serialized to bytes + Add(Key, TrieValue), + + /// Remove a value specified by the key from the trie + Remove(Key), + /// Extend the state trie with the provided iterator over leaf values as + /// byte slices. + Extend(Vec<(Key, TrieValue)>), +} diff --git a/lr_trie/src/result.rs b/lr_trie/src/result.rs new file mode 100644 index 0000000..ba212d2 --- /dev/null +++ b/lr_trie/src/result.rs @@ -0,0 +1,22 @@ +use patriecia::error::TrieError; + +pub type Result = std::result::Result; + +#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)] +pub enum LeftRightTrieError { + #[error("failed to deserialize value")] + FailedToDeserializeValue, + + #[error("value not found for key {0:?}")] + #[deprecated] + NoValueForKey(Vec), + + #[error("value for key {0} not found")] + NotFound(String), + + #[error("trie error: {0}")] + FailedToGetValueForKey(TrieError), + + #[error("{0}")] + Other(String), +} diff --git a/lr_trie/src/trie.rs b/lr_trie/src/trie.rs new file mode 100644 index 0000000..0d1eea8 --- /dev/null +++ b/lr_trie/src/trie.rs @@ -0,0 +1,274 @@ +use std::{ + fmt::{self, Debug, Display, Formatter}, + marker::PhantomData, + sync::Arc, +}; + +pub use left_right::ReadHandleFactory; +use left_right::{ReadHandle, WriteHandle}; +use patriecia::{db::Database, inner::InnerTrie, H256}; +use serde::{Deserialize, Serialize}; + +use crate::{InnerTrieWrapper, LeftRightTrieError, Operation, Proof, Result}; + +/// Concurrent generic Merkle Patricia Trie +#[derive(Debug)] +pub struct LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + pub read_handle: ReadHandle>, + pub write_handle: WriteHandle, Operation>, + _marker: PhantomData<(K, V, &'a ())>, +} + +impl<'a, D, K, V> LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + pub fn new(db: Arc) -> Self { + let (write_handle, read_handle) = left_right::new_from_empty(InnerTrie::new(db)); + + Self { + read_handle, + write_handle, + _marker: PhantomData, + } + } + + pub fn handle(&self) -> InnerTrieWrapper { + let read_handle = self + .read_handle + .enter() + .map(|guard| guard.clone()) + .unwrap_or_default(); + + InnerTrieWrapper::new(read_handle) + } + + /// Returns a vector of all entries within the trie + pub fn entries(&self) -> Vec<(K, V)> { + todo!() + } + + pub fn len(&self) -> usize { + self.handle().iter().count() + } + + pub fn is_empty(&self) -> bool { + self.handle().len() == 0 + } + + pub fn root(&self) -> Option { + self.handle().root_hash().ok() + } + + pub fn get_proof(&mut self, key: &K) -> Result> + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + self.handle() + .get_proof::(key) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn verify_proof(&self, root: H256, key: &K, proof: Vec) -> Result> + where + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, + { + self.handle() + .verify_proof::(root, key, proof) + .map_err(|err| LeftRightTrieError::Other(err.to_string())) + } + + pub fn factory(&self) -> ReadHandleFactory> { + self.read_handle.factory() + } + + pub fn update(&mut self, key: K, value: V) { + self.insert(key, value); + } + + pub fn publish(&mut self) { + self.write_handle.publish(); + } + + pub fn insert(&mut self, key: K, value: V) { + //TODO: revisit the serializer used to store things on the trie + let key = bincode::serialize(&key).unwrap_or_default(); + let value = bincode::serialize(&value).unwrap_or_default(); + self.write_handle + .append(Operation::Add(key, value)) + .publish(); + } + + pub fn extend(&mut self, values: Vec<(K, V)>) { + let mapped = values + .into_iter() + .map(|(key, value)| { + //TODO: revisit the serializer used to store things on the trie + let key = bincode::serialize(&key).unwrap_or_default(); + let value = bincode::serialize(&value).unwrap_or_default(); + + (key, value) + }) + .collect(); + + self.write_handle + .append(Operation::Extend(mapped)) + .publish(); + } +} + +impl<'a, D, K, V> PartialEq for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn eq(&self, other: &Self) -> bool { + self.handle().root_hash() == other.handle().root_hash() + } +} + +impl<'a, D, K, V> Default for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn default() -> Self { + let (write_handle, read_handle) = left_right::new::, Operation>(); + Self { + read_handle, + write_handle, + _marker: PhantomData, + } + } +} + +impl<'a, D, K, V> From for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn from(db: D) -> Self { + let db = Arc::new(db); + let (write_handle, read_handle) = left_right::new_from_empty(InnerTrie::new(db)); + + Self { + read_handle, + write_handle, + _marker: PhantomData, + } + } +} + +impl<'a, D, K, V> From> for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn from(other: InnerTrie) -> Self { + let (write_handle, read_handle) = left_right::new_from_empty(other); + + Self { + read_handle, + write_handle, + _marker: PhantomData, + } + } +} + +impl<'a, D, K, V> Clone for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn clone(&self) -> Self { + let inner = self.handle().inner(); + LeftRightTrie::from(inner) + } +} + +impl<'a, D, K, V> Display for LeftRightTrie<'a, K, V, D> +where + D: Database, + K: Serialize + Deserialize<'a>, + V: Serialize + Deserialize<'a>, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.handle()) + } +} + +#[cfg(test)] +mod tests { + use std::thread; + + use patriecia::db::MemoryDB; + + use super::*; + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] + struct CustomValue { + pub data: usize, + } + + #[test] + fn should_store_arbitrary_values() { + let memdb = Arc::new(MemoryDB::new(true)); + let mut trie = LeftRightTrie::new(memdb); + + trie.insert("abcdefg", CustomValue { data: 100 }); + + let value: CustomValue = trie.handle().get(&String::from("abcdefg")).unwrap(); + + assert_eq!(value, CustomValue { data: 100 }); + } + + #[test] + fn should_be_read_concurrently() { + let memdb = Arc::new(MemoryDB::new(true)); + let mut trie = LeftRightTrie::new(memdb); + + let total = 18; + + for n in 0..total { + let key = format!("test-{n}"); + + trie.insert(key, CustomValue { data: 12345 }); + } + + trie.publish(); + + // NOTE Spawn 10 threads and 10 readers that should report the exact same value + [0..10] + .iter() + .map(|_| { + let reader = trie.handle(); + thread::spawn(move || { + assert_eq!(reader.len(), total); + for n in 0..total { + let key = format!("test-{n}"); + + let res: CustomValue = reader.get(&key).unwrap(); + + assert_eq!(res, CustomValue { data: 12345 }); + } + }) + }) + .for_each(|handle| { + handle.join().unwrap(); + }); + } +}