diff --git a/x/merkledb/codec.go b/x/merkledb/codec.go index c14534d9cead..ea6bc10363f4 100644 --- a/x/merkledb/codec.go +++ b/x/merkledb/codec.go @@ -29,11 +29,8 @@ const ( minDBNodeLen = minMaybeByteSliceLen + minVarIntLen minChildLen = minVarIntLen + minKeyLen + ids.IDLen + boolLen - estimatedKeyLen = 64 - estimatedValueLen = 64 - estimatedCompressedKeyLen = 8 - // Child index, child compressed key, child ID, child has value - estimatedNodeChildLen = minVarIntLen + estimatedCompressedKeyLen + ids.IDLen + boolLen + estimatedKeyLen = 64 + estimatedValueLen = 64 // Child index, child ID hashValuesChildLen = minVarIntLen + ids.IDLen ) @@ -62,6 +59,7 @@ type encoderDecoder interface { type encoder interface { // Assumes [n] is non-nil. encodeDBNode(n *dbNode) []byte + encodedDBNodeSize(n *dbNode) int // Returns the bytes that will be hashed to generate [n]'s ID. // Assumes [n] is non-nil. @@ -93,15 +91,51 @@ type codecImpl struct { varIntPool sync.Pool } +func (c *codecImpl) childSize(index byte, childEntry *child) int { + // * index + // * child ID + // * child key + // * bool indicating whether the child has a value + return c.uintSize(uint64(index)) + ids.IDLen + c.keySize(childEntry.compressedKey) + boolLen +} + +// based on the current implementation of codecImpl.encodeUint which uses binary.PutUvarint +func (*codecImpl) uintSize(value uint64) int { + // binary.PutUvarint repeatedly divides by 128 until the value is under 128, + // so count the number of times that will occur + i := 0 + for value >= 0x80 { + value >>= 7 + i++ + } + return i + 1 +} + +func (c *codecImpl) keySize(p Key) int { + return c.uintSize(uint64(p.length)) + bytesNeeded(p.length) +} + +func (c *codecImpl) encodedDBNodeSize(n *dbNode) int { + // * number of children + // * bool indicating whether [n] has a value + // * the value (optional) + // * children + size := c.uintSize(uint64(len(n.children))) + boolLen + if n.value.HasValue() { + valueLen := len(n.value.Value()) + size += c.uintSize(uint64(valueLen)) + valueLen + } + // for each non-nil entry, we add the additional size of the child entry + for index, entry := range n.children { + size += c.childSize(index, entry) + } + return size +} + func (c *codecImpl) encodeDBNode(n *dbNode) []byte { - var ( - numChildren = len(n.children) - // Estimate size of [n] to prevent memory allocations - estimatedLen = estimatedValueLen + minVarIntLen + estimatedNodeChildLen*numChildren - buf = bytes.NewBuffer(make([]byte, 0, estimatedLen)) - ) + buf := bytes.NewBuffer(make([]byte, 0, c.encodedDBNodeSize(n))) c.encodeMaybeByteSlice(buf, n.value) - c.encodeUint(buf, uint64(numChildren)) + c.encodeUint(buf, uint64(len(n.children))) // Note we insert children in order of increasing index // for determinism. keys := maps.Keys(n.children) diff --git a/x/merkledb/codec_test.go b/x/merkledb/codec_test.go index 1f463ca50858..991368a823f4 100644 --- a/x/merkledb/codec_test.go +++ b/x/merkledb/codec_test.go @@ -158,7 +158,7 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) { } nodeBytes := codec.encodeDBNode(&node) - + require.Len(nodeBytes, codec.encodedDBNodeSize(&node)) var gotNode dbNode require.NoError(codec.decodeDBNode(nodeBytes, &gotNode)) require.Equal(node, gotNode) @@ -244,3 +244,12 @@ func TestCodecDecodeKeyLengthOverflowRegression(t *testing.T) { _, err := codec.decodeKey(binary.AppendUvarint(nil, math.MaxInt)) require.ErrorIs(t, err, io.ErrUnexpectedEOF) } + +func TestUintSize(t *testing.T) { + c := codec.(*codecImpl) + for i := uint64(0); i < math.MaxInt16; i++ { + expectedSize := c.uintSize(i) + actualSize := binary.PutUvarint(make([]byte, binary.MaxVarintLen64), i) + require.Equal(t, expectedSize, actualSize, i) + } +} diff --git a/x/merkledb/db.go b/x/merkledb/db.go index 5b046c8bb834..d26d56d6a9f9 100644 --- a/x/merkledb/db.go +++ b/x/merkledb/db.go @@ -1357,6 +1357,5 @@ func cacheEntrySize(key Key, n *node) int { if n == nil { return cacheEntryOverHead + len(key.Bytes()) } - // nodes cache their bytes representation so the total memory consumed is roughly twice that - return cacheEntryOverHead + len(key.Bytes()) + 2*len(n.bytes()) + return cacheEntryOverHead + len(key.Bytes()) + codec.encodedDBNodeSize(&n.dbNode) } diff --git a/x/merkledb/node.go b/x/merkledb/node.go index d15eb6ae7e14..4caad76f294a 100644 --- a/x/merkledb/node.go +++ b/x/merkledb/node.go @@ -29,7 +29,6 @@ type child struct { type node struct { dbNode key Key - nodeBytes []byte valueDigest maybe.Maybe[[]byte] } @@ -50,9 +49,8 @@ func parseNode(key Key, nodeBytes []byte) (*node, error) { return nil, err } result := &node{ - dbNode: n, - key: key, - nodeBytes: nodeBytes, + dbNode: n, + key: key, } result.setValueDigest() @@ -66,17 +64,7 @@ func (n *node) hasValue() bool { // Returns the byte representation of this node. func (n *node) bytes() []byte { - if n.nodeBytes == nil { - n.nodeBytes = codec.encodeDBNode(&n.dbNode) - } - - return n.nodeBytes -} - -// clear the cached values that will need to be recalculated whenever the node changes -// for example, node ID and byte representation -func (n *node) onNodeChanged() { - n.nodeBytes = nil + return codec.encodeDBNode(&n.dbNode) } // Returns and caches the ID of this node. @@ -88,7 +76,6 @@ func (n *node) calculateID(metrics merkleMetrics) ids.ID { // Set [n]'s value to [val]. func (n *node) setValue(val maybe.Maybe[[]byte]) { - n.onNodeChanged() n.value = val n.setValueDigest() } @@ -121,13 +108,11 @@ func (n *node) addChildWithID(childNode *node, tokenSize int, childID ids.ID) { // Adds a child to [n] without a reference to the child node. func (n *node) setChildEntry(index byte, childEntry *child) { - n.onNodeChanged() n.children[index] = childEntry } // Removes [child] from [n]'s children. func (n *node) removeChild(child *node, tokenSize int) { - n.onNodeChanged() delete(n.children, child.key.Token(n.key.length, tokenSize)) } @@ -143,7 +128,6 @@ func (n *node) clone() *node { children: make(map[byte]*child, len(n.children)), }, valueDigest: n.valueDigest, - nodeBytes: n.nodeBytes, } for key, existing := range n.children { result.children[key] = &child{ diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index 35be62f8a5f9..12a17e7bbbac 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -278,7 +278,6 @@ func (t *trieView) calculateNodeIDsHelper(n *node) ids.ID { // This child wasn't changed. continue } - n.onNodeChanged() childEntry.hasValue = childNodeChange.after.hasValue() // Try updating the child and its descendants in a goroutine.