diff --git a/Cargo.toml b/Cargo.toml index 1d898c9..db87c09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ ssz_types = "0.8.0" proptest = "1.0.0" tree_hash_derive = "0.8.0" criterion = "0.5" +dhat = "0.3.3" [features] debug = [] @@ -52,3 +53,6 @@ harness = false [[bench]] name = "pop_front" harness = false + +[profile.bench] +debug = true diff --git a/src/lib.rs b/src/lib.rs index f093028..ce30bcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ pub mod iter; pub mod leaf; pub mod level_iter; pub mod list; +pub mod mem; pub mod packed_leaf; mod repeat; pub mod serde; diff --git a/src/list.rs b/src/list.rs index 262e1ea..904321a 100644 --- a/src/list.rs +++ b/src/list.rs @@ -336,13 +336,13 @@ impl> List { } } -impl Default for List { +impl> Default for List { fn default() -> Self { Self::empty() } } -impl TreeHash for List { +impl> TreeHash for List { fn tree_hash_type() -> tree_hash::TreeHashType { tree_hash::TreeHashType::List } @@ -404,7 +404,7 @@ where } // FIXME: duplicated from `ssz::encode::impl_for_vec` -impl Encode for List { +impl> Encode for List { fn is_ssz_fixed_len() -> bool { false } @@ -438,10 +438,11 @@ impl Encode for List { } } -impl TryFromIter for List +impl TryFromIter for List where T: Value, N: Unsigned, + U: UpdateMap, { type Error = Error; @@ -453,10 +454,11 @@ where } } -impl Decode for List +impl Decode for List where T: Value, N: Unsigned, + U: UpdateMap, { fn is_ssz_fixed_len() -> bool { false diff --git a/src/mem.rs b/src/mem.rs new file mode 100644 index 0000000..0e3834c --- /dev/null +++ b/src/mem.rs @@ -0,0 +1,131 @@ +use crate::{Arc, List, Tree, UpdateMap, Value, Vector}; +use std::collections::HashMap; +use typenum::Unsigned; + +pub trait MemorySize { + /// The memory address of this item. + fn self_pointer(&self) -> usize; + + /// Subtrees (Arcs) for this type's fields that consume memory. + fn subtrees(&self) -> Vec<&dyn MemorySize>; + + /// Memory consumed by this type's non-recursive fields. + fn intrinsic_size(&self) -> usize; +} + +/// Memory usage (RAM) analysis for Milhouse data structures. +#[derive(Default, Clone)] +pub struct MemoryTracker { + // Map from pointer to size of subtree referenced by that pointer. + subtree_sizes: HashMap, + // Total size of all tracked items, accounting for de-duplication. + total_size: usize, +} + +#[derive(Debug)] +pub struct ItemStats { + /// Total size of this item ignorning structural sharing. + pub total_size: usize, + /// Amount of memory used by this item in addition to memory that was already tracked. + pub differential_size: usize, +} + +impl MemoryTracker { + pub fn track_item(&mut self, item: &T) -> ItemStats { + let ptr = item.self_pointer(); + + // If this item is already tracked, then its differential size is 0. + if let Some(&total_size) = self.subtree_sizes.get(&ptr) { + return ItemStats { + total_size, + differential_size: 0, + }; + } + + // Otherwise, calculate the intrinsic size of this item, and recurse into its subtrees. + let intrinsic_size = item.intrinsic_size(); + + let subtrees = item.subtrees(); + + let mut total_size = intrinsic_size; + let mut differential_size = intrinsic_size; + + for subtree in subtrees { + let subtree_stats = self.track_item(subtree); + total_size += subtree_stats.total_size; + differential_size += subtree_stats.differential_size; + } + + self.subtree_sizes.insert(ptr, total_size); + self.total_size += intrinsic_size; + + ItemStats { + total_size, + differential_size, + } + } + + pub fn total_size(&self) -> usize { + self.total_size + } +} + +impl MemorySize for Arc> { + fn self_pointer(&self) -> usize { + self.as_ptr() as usize + } + + fn subtrees(&self) -> Vec<&dyn MemorySize> { + match &**self { + Tree::Leaf(_) | Tree::PackedLeaf(_) | Tree::Zero(_) => vec![], + Tree::Node { left, right, .. } => { + vec![left, right] + } + } + } + + fn intrinsic_size(&self) -> usize { + let leaf_size = match &**self { + // This is the T allocated behind the Arc in `Leaf::value`. + Tree::Leaf(_) => std::mem::size_of::(), + // This is the Vec allocated inside `PackedLeaf::values`. + Tree::PackedLeaf(packed) => packed.values.capacity() * std::mem::size_of::(), + Tree::Node { .. } | Tree::Zero(..) => 0, + }; + std::mem::size_of::() + std::mem::size_of::>() + leaf_size + } +} + +impl> MemorySize for List { + fn self_pointer(&self) -> usize { + self as *const _ as usize + } + + fn subtrees(&self) -> Vec<&dyn MemorySize> { + vec![&self.interface.backing.tree] + } + + fn intrinsic_size(&self) -> usize { + // This approximates the size of the UpdateMap, and assumes that `T` is not recursive. + // We could probably add a `T: MemorySize` bound? In most practical cases the update map + // should be empty anyway. + std::mem::size_of::() + self.interface.updates.len() * std::mem::size_of::() + } +} + +impl> MemorySize for Vector { + fn self_pointer(&self) -> usize { + self as *const _ as usize + } + + fn subtrees(&self) -> Vec<&dyn MemorySize> { + vec![&self.interface.backing.tree] + } + + fn intrinsic_size(&self) -> usize { + // This approximates the size of the UpdateMap, and assumes that `T` is not recursive. + // We could probably add a `T: MemorySize` bound? In most practical cases the update map + // should be empty anyway. + std::mem::size_of::() + self.interface.updates.len() * std::mem::size_of::() + } +} diff --git a/src/packed_leaf.rs b/src/packed_leaf.rs index 512f3ef..9aaf42e 100644 --- a/src/packed_leaf.rs +++ b/src/packed_leaf.rs @@ -11,7 +11,7 @@ pub struct PackedLeaf { #[educe(PartialEq(ignore), Hash(ignore))] #[arbitrary(with = arb_rwlock)] pub hash: RwLock, - pub(crate) values: Vec, + pub values: Vec, } impl Clone for PackedLeaf diff --git a/src/tests/mem.rs b/src/tests/mem.rs new file mode 100644 index 0000000..642191d --- /dev/null +++ b/src/tests/mem.rs @@ -0,0 +1,23 @@ +use crate::{mem::MemoryTracker, Vector}; +use typenum::U1024; + +#[test] +fn vector_mutate_last() { + let v1 = Vector::::new(vec![1; 1024]).unwrap(); + let mut v2 = v1.clone(); + *v2.get_mut(1023).unwrap() = 2; + v2.apply_updates().unwrap(); + + let mut tracker = MemoryTracker::default(); + let v1_stats = tracker.track_item(&v1); + let v2_stats = tracker.track_item(&v2); + + // Total size is equal. + assert_eq!(v1_stats.total_size, v2_stats.total_size); + + // Differential size for v1 is equal to its total size (nothing to diff against). + assert_eq!(v1_stats.total_size, v1_stats.differential_size); + + // The differential size of the second list should be less than 2% of the total size. + assert!(50 * v2_stats.differential_size < v2_stats.total_size); +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 677a071..4af2e67 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -2,6 +2,7 @@ mod builder; mod iterator; +mod mem; mod packed; mod pop_front; mod proptest; diff --git a/src/vector.rs b/src/vector.rs index 08544d1..cd6a103 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -238,7 +238,7 @@ where } } -impl Default for Vector { +impl> Default for Vector { fn default() -> Self { Self::from_elem(T::default()).unwrap_or_else(|e| { panic!( @@ -250,7 +250,7 @@ impl Default for Vector { } } -impl tree_hash::TreeHash for Vector { +impl> tree_hash::TreeHash for Vector { fn tree_hash_type() -> tree_hash::TreeHashType { tree_hash::TreeHashType::Vector } @@ -270,10 +270,11 @@ impl tree_hash::TreeHash for Vector { } } -impl TryFromIter for Vector +impl TryFromIter for Vector where T: Value, N: Unsigned, + U: UpdateMap, { type Error = Error; @@ -295,7 +296,7 @@ impl<'a, T: Value, N: Unsigned, U: UpdateMap> IntoIterator for &'a Vector Encode for Vector { +impl> Encode for Vector { fn is_ssz_fixed_len() -> bool { ::is_ssz_fixed_len() } @@ -337,7 +338,7 @@ impl Encode for Vector { } } -impl Decode for Vector { +impl> Decode for Vector { fn is_ssz_fixed_len() -> bool { ::is_ssz_fixed_len() } diff --git a/tests/mem_list.rs b/tests/mem_list.rs new file mode 100644 index 0000000..f198b9a --- /dev/null +++ b/tests/mem_list.rs @@ -0,0 +1,32 @@ +use milhouse::{mem::MemoryTracker, List}; +use typenum::U1024; + +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +#[test] +fn memory_tracker_accuracy() { + let _profiler = dhat::Profiler::builder().testing().build(); + + // Take a snapshot at the start so we can ignore "background allocations" from e.g. the test + // runner and the process starting up. + let pre_stats = dhat::HeapStats::get(); + + // We box the list because the MemorySize implementation for List includes the fields of the + // list, and we want to allocate them on the heap so that they are visible to DHAT. + let list = Box::new(List::::new(vec![1; 1024]).unwrap()); + + // Calculate the size of the list using Milhouse tools, and then drop the tracker so it isn't + // consuming any heap space (which would interfere with our measurements). + let mut mem_tracker = MemoryTracker::default(); + let stats = mem_tracker.track_item(&*list); + assert_eq!(stats.total_size, mem_tracker.total_size()); + drop(mem_tracker); + + // Calculate total size according to DHAT by subtracting the starting allocations from the + // current amount allocated. + let post_stats = dhat::HeapStats::get(); + let dhat_total_size = post_stats.curr_bytes - pre_stats.curr_bytes; + + dhat::assert_eq!(dhat_total_size, stats.total_size); +}