Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add differential memory usage tracking #51

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ ssz_types = "0.8.0"
proptest = "1.0.0"
tree_hash_derive = "0.8.0"
criterion = "0.5"
dhat = "0.3.3"

[features]
debug = []
Expand All @@ -52,3 +53,6 @@ harness = false
[[bench]]
name = "pop_front"
harness = false

[profile.bench]
debug = true
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub mod iter;
pub mod leaf;
pub mod level_iter;
pub mod list;
pub mod mem;
pub mod packed_leaf;
mod repeat;
pub mod serde;
Expand Down
12 changes: 7 additions & 5 deletions src/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,13 @@ impl<T: Value, N: Unsigned, U: UpdateMap<T>> List<T, N, U> {
}
}

impl<T: Value, N: Unsigned> Default for List<T, N> {
impl<T: Value, N: Unsigned, U: UpdateMap<T>> Default for List<T, N, U> {
fn default() -> Self {
Self::empty()
}
}

impl<T: Value + Send + Sync, N: Unsigned> TreeHash for List<T, N> {
impl<T: Value + Send + Sync, N: Unsigned, U: UpdateMap<T>> TreeHash for List<T, N, U> {
fn tree_hash_type() -> tree_hash::TreeHashType {
tree_hash::TreeHashType::List
}
Expand Down Expand Up @@ -404,7 +404,7 @@ where
}

// FIXME: duplicated from `ssz::encode::impl_for_vec`
impl<T: Value, N: Unsigned> Encode for List<T, N> {
impl<T: Value, N: Unsigned, U: UpdateMap<T>> Encode for List<T, N, U> {
fn is_ssz_fixed_len() -> bool {
false
}
Expand Down Expand Up @@ -438,10 +438,11 @@ impl<T: Value, N: Unsigned> Encode for List<T, N> {
}
}

impl<T, N> TryFromIter<T> for List<T, N>
impl<T, N, U> TryFromIter<T> for List<T, N, U>
where
T: Value,
N: Unsigned,
U: UpdateMap<T>,
{
type Error = Error;

Expand All @@ -453,10 +454,11 @@ where
}
}

impl<T, N> Decode for List<T, N>
impl<T, N, U> Decode for List<T, N, U>
where
T: Value,
N: Unsigned,
U: UpdateMap<T>,
{
fn is_ssz_fixed_len() -> bool {
false
Expand Down
131 changes: 131 additions & 0 deletions src/mem.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
use crate::{Arc, List, Tree, UpdateMap, Value, Vector};
use std::collections::HashMap;
use typenum::Unsigned;

pub trait MemorySize {
/// The memory address of this item.
fn self_pointer(&self) -> usize;

/// Subtrees (Arcs) for this type's fields that consume memory.
fn subtrees(&self) -> Vec<&dyn MemorySize>;

/// Memory consumed by this type's non-recursive fields.
fn intrinsic_size(&self) -> usize;
}

/// Memory usage (RAM) analysis for Milhouse data structures.
#[derive(Default, Clone)]
pub struct MemoryTracker {
// Map from pointer to size of subtree referenced by that pointer.
subtree_sizes: HashMap<usize, usize>,
// Total size of all tracked items, accounting for de-duplication.
total_size: usize,
}

#[derive(Debug)]
pub struct ItemStats {
/// Total size of this item ignorning structural sharing.
pub total_size: usize,
/// Amount of memory used by this item in addition to memory that was already tracked.
pub differential_size: usize,
}

impl MemoryTracker {
pub fn track_item<T: MemorySize + ?Sized>(&mut self, item: &T) -> ItemStats {
let ptr = item.self_pointer();

// If this item is already tracked, then its differential size is 0.
if let Some(&total_size) = self.subtree_sizes.get(&ptr) {
return ItemStats {
total_size,
differential_size: 0,
};
}

// Otherwise, calculate the intrinsic size of this item, and recurse into its subtrees.
let intrinsic_size = item.intrinsic_size();

let subtrees = item.subtrees();

let mut total_size = intrinsic_size;
let mut differential_size = intrinsic_size;

for subtree in subtrees {
let subtree_stats = self.track_item(subtree);
total_size += subtree_stats.total_size;
differential_size += subtree_stats.differential_size;
}

self.subtree_sizes.insert(ptr, total_size);
self.total_size += intrinsic_size;

ItemStats {
total_size,
differential_size,
}
}

pub fn total_size(&self) -> usize {
self.total_size
}
}

impl<T: Value> MemorySize for Arc<Tree<T>> {
fn self_pointer(&self) -> usize {
self.as_ptr() as usize
}

fn subtrees(&self) -> Vec<&dyn MemorySize> {
match &**self {
Tree::Leaf(_) | Tree::PackedLeaf(_) | Tree::Zero(_) => vec![],
Tree::Node { left, right, .. } => {
vec![left, right]
}
}
}

fn intrinsic_size(&self) -> usize {
let leaf_size = match &**self {
// This is the T allocated behind the Arc in `Leaf::value`.
Tree::Leaf(_) => std::mem::size_of::<T>(),
// This is the Vec<T> allocated inside `PackedLeaf::values`.
Tree::PackedLeaf(packed) => packed.values.capacity() * std::mem::size_of::<T>(),
Tree::Node { .. } | Tree::Zero(..) => 0,
};
std::mem::size_of::<Self>() + std::mem::size_of::<Tree<T>>() + leaf_size
}
}

impl<T: Value, N: Unsigned, U: UpdateMap<T>> MemorySize for List<T, N, U> {
fn self_pointer(&self) -> usize {
self as *const _ as usize
}

fn subtrees(&self) -> Vec<&dyn MemorySize> {
vec![&self.interface.backing.tree]
}

fn intrinsic_size(&self) -> usize {
// This approximates the size of the UpdateMap, and assumes that `T` is not recursive.
// We could probably add a `T: MemorySize` bound? In most practical cases the update map
// should be empty anyway.
std::mem::size_of::<Self>() + self.interface.updates.len() * std::mem::size_of::<T>()
}
}

impl<T: Value, N: Unsigned, U: UpdateMap<T>> MemorySize for Vector<T, N, U> {
fn self_pointer(&self) -> usize {
self as *const _ as usize
}

fn subtrees(&self) -> Vec<&dyn MemorySize> {
vec![&self.interface.backing.tree]
}

fn intrinsic_size(&self) -> usize {
// This approximates the size of the UpdateMap, and assumes that `T` is not recursive.
// We could probably add a `T: MemorySize` bound? In most practical cases the update map
// should be empty anyway.
std::mem::size_of::<Self>() + self.interface.updates.len() * std::mem::size_of::<T>()
}
}
2 changes: 1 addition & 1 deletion src/packed_leaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub struct PackedLeaf<T: TreeHash + Clone> {
#[educe(PartialEq(ignore), Hash(ignore))]
#[arbitrary(with = arb_rwlock)]
pub hash: RwLock<Hash256>,
pub(crate) values: Vec<T>,
pub values: Vec<T>,
}

impl<T> Clone for PackedLeaf<T>
Expand Down
23 changes: 23 additions & 0 deletions src/tests/mem.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use crate::{mem::MemoryTracker, Vector};
use typenum::U1024;

#[test]
fn vector_mutate_last() {
let v1 = Vector::<u64, U1024>::new(vec![1; 1024]).unwrap();
let mut v2 = v1.clone();
*v2.get_mut(1023).unwrap() = 2;
v2.apply_updates().unwrap();

let mut tracker = MemoryTracker::default();
let v1_stats = tracker.track_item(&v1);
let v2_stats = tracker.track_item(&v2);

// Total size is equal.
assert_eq!(v1_stats.total_size, v2_stats.total_size);

// Differential size for v1 is equal to its total size (nothing to diff against).
assert_eq!(v1_stats.total_size, v1_stats.differential_size);

// The differential size of the second list should be less than 2% of the total size.
assert!(50 * v2_stats.differential_size < v2_stats.total_size);
}
1 change: 1 addition & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

mod builder;
mod iterator;
mod mem;
mod packed;
mod pop_front;
mod proptest;
Expand Down
11 changes: 6 additions & 5 deletions src/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ where
}
}

impl<T: Default + Value, N: Unsigned> Default for Vector<T, N> {
impl<T: Default + Value, N: Unsigned, U: UpdateMap<T>> Default for Vector<T, N, U> {
fn default() -> Self {
Self::from_elem(T::default()).unwrap_or_else(|e| {
panic!(
Expand All @@ -250,7 +250,7 @@ impl<T: Default + Value, N: Unsigned> Default for Vector<T, N> {
}
}

impl<T: Value + Send + Sync, N: Unsigned> tree_hash::TreeHash for Vector<T, N> {
impl<T: Value + Send + Sync, N: Unsigned, U: UpdateMap<T>> tree_hash::TreeHash for Vector<T, N, U> {
fn tree_hash_type() -> tree_hash::TreeHashType {
tree_hash::TreeHashType::Vector
}
Expand All @@ -270,10 +270,11 @@ impl<T: Value + Send + Sync, N: Unsigned> tree_hash::TreeHash for Vector<T, N> {
}
}

impl<T, N> TryFromIter<T> for Vector<T, N>
impl<T, N, U> TryFromIter<T> for Vector<T, N, U>
where
T: Value,
N: Unsigned,
U: UpdateMap<T>,
{
type Error = Error;

Expand All @@ -295,7 +296,7 @@ impl<'a, T: Value, N: Unsigned, U: UpdateMap<T>> IntoIterator for &'a Vector<T,
}

// FIXME: duplicated from `ssz::encode::impl_for_vec`
impl<T: Value, N: Unsigned> Encode for Vector<T, N> {
impl<T: Value, N: Unsigned, U: UpdateMap<T>> Encode for Vector<T, N, U> {
fn is_ssz_fixed_len() -> bool {
<T as Encode>::is_ssz_fixed_len()
}
Expand Down Expand Up @@ -337,7 +338,7 @@ impl<T: Value, N: Unsigned> Encode for Vector<T, N> {
}
}

impl<T: Value, N: Unsigned> Decode for Vector<T, N> {
impl<T: Value, N: Unsigned, U: UpdateMap<T>> Decode for Vector<T, N, U> {
fn is_ssz_fixed_len() -> bool {
<T as Decode>::is_ssz_fixed_len()
}
Expand Down
32 changes: 32 additions & 0 deletions tests/mem_list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use milhouse::{mem::MemoryTracker, List};
use typenum::U1024;

#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

#[test]
fn memory_tracker_accuracy() {
let _profiler = dhat::Profiler::builder().testing().build();

// Take a snapshot at the start so we can ignore "background allocations" from e.g. the test
// runner and the process starting up.
let pre_stats = dhat::HeapStats::get();

// We box the list because the MemorySize implementation for List includes the fields of the
// list, and we want to allocate them on the heap so that they are visible to DHAT.
let list = Box::new(List::<u64, U1024>::new(vec![1; 1024]).unwrap());

// Calculate the size of the list using Milhouse tools, and then drop the tracker so it isn't
// consuming any heap space (which would interfere with our measurements).
let mut mem_tracker = MemoryTracker::default();
let stats = mem_tracker.track_item(&*list);
assert_eq!(stats.total_size, mem_tracker.total_size());
drop(mem_tracker);

// Calculate total size according to DHAT by subtracting the starting allocations from the
// current amount allocated.
let post_stats = dhat::HeapStats::get();
let dhat_total_size = post_stats.curr_bytes - pre_stats.curr_bytes;

dhat::assert_eq!(dhat_total_size, stats.total_size);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really neat test! Have you tried nested Lists?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, because I know it won't work 😳

}