Skip to content

Commit

Permalink
Remove cached node bytes from merkle nodes (#2393)
Browse files Browse the repository at this point in the history
Signed-off-by: David Boehm <[email protected]>
Co-authored-by: Dan Laine <[email protected]>
  • Loading branch information
dboehm-avalabs and Dan Laine authored Dec 14, 2023
1 parent cf5e869 commit 4909a20
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 35 deletions.
58 changes: 46 additions & 12 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ const (
minDBNodeLen = minMaybeByteSliceLen + minVarIntLen
minChildLen = minVarIntLen + minKeyLen + ids.IDLen + boolLen

estimatedKeyLen = 64
estimatedValueLen = 64
estimatedCompressedKeyLen = 8
// Child index, child compressed key, child ID, child has value
estimatedNodeChildLen = minVarIntLen + estimatedCompressedKeyLen + ids.IDLen + boolLen
estimatedKeyLen = 64
estimatedValueLen = 64
// Child index, child ID
hashValuesChildLen = minVarIntLen + ids.IDLen
)
Expand Down Expand Up @@ -62,6 +59,7 @@ type encoderDecoder interface {
type encoder interface {
// Assumes [n] is non-nil.
encodeDBNode(n *dbNode) []byte
encodedDBNodeSize(n *dbNode) int

// Returns the bytes that will be hashed to generate [n]'s ID.
// Assumes [n] is non-nil.
Expand Down Expand Up @@ -93,15 +91,51 @@ type codecImpl struct {
varIntPool sync.Pool
}

func (c *codecImpl) childSize(index byte, childEntry *child) int {
// * index
// * child ID
// * child key
// * bool indicating whether the child has a value
return c.uintSize(uint64(index)) + ids.IDLen + c.keySize(childEntry.compressedKey) + boolLen
}

// based on the current implementation of codecImpl.encodeUint which uses binary.PutUvarint
func (*codecImpl) uintSize(value uint64) int {
// binary.PutUvarint repeatedly divides by 128 until the value is under 128,
// so count the number of times that will occur
i := 0
for value >= 0x80 {
value >>= 7
i++
}
return i + 1
}

func (c *codecImpl) keySize(p Key) int {
return c.uintSize(uint64(p.length)) + bytesNeeded(p.length)
}

func (c *codecImpl) encodedDBNodeSize(n *dbNode) int {
// * number of children
// * bool indicating whether [n] has a value
// * the value (optional)
// * children
size := c.uintSize(uint64(len(n.children))) + boolLen
if n.value.HasValue() {
valueLen := len(n.value.Value())
size += c.uintSize(uint64(valueLen)) + valueLen
}
// for each non-nil entry, we add the additional size of the child entry
for index, entry := range n.children {
size += c.childSize(index, entry)
}
return size
}

func (c *codecImpl) encodeDBNode(n *dbNode) []byte {
var (
numChildren = len(n.children)
// Estimate size of [n] to prevent memory allocations
estimatedLen = estimatedValueLen + minVarIntLen + estimatedNodeChildLen*numChildren
buf = bytes.NewBuffer(make([]byte, 0, estimatedLen))
)
buf := bytes.NewBuffer(make([]byte, 0, c.encodedDBNodeSize(n)))
c.encodeMaybeByteSlice(buf, n.value)
c.encodeUint(buf, uint64(numChildren))
c.encodeUint(buf, uint64(len(n.children)))
// Note we insert children in order of increasing index
// for determinism.
keys := maps.Keys(n.children)
Expand Down
11 changes: 10 additions & 1 deletion x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) {
}

nodeBytes := codec.encodeDBNode(&node)

require.Len(nodeBytes, codec.encodedDBNodeSize(&node))
var gotNode dbNode
require.NoError(codec.decodeDBNode(nodeBytes, &gotNode))
require.Equal(node, gotNode)
Expand Down Expand Up @@ -244,3 +244,12 @@ func TestCodecDecodeKeyLengthOverflowRegression(t *testing.T) {
_, err := codec.decodeKey(binary.AppendUvarint(nil, math.MaxInt))
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
}

func TestUintSize(t *testing.T) {
c := codec.(*codecImpl)
for i := uint64(0); i < math.MaxInt16; i++ {
expectedSize := c.uintSize(i)
actualSize := binary.PutUvarint(make([]byte, binary.MaxVarintLen64), i)
require.Equal(t, expectedSize, actualSize, i)
}
}
3 changes: 1 addition & 2 deletions x/merkledb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1357,6 +1357,5 @@ func cacheEntrySize(key Key, n *node) int {
if n == nil {
return cacheEntryOverHead + len(key.Bytes())
}
// nodes cache their bytes representation so the total memory consumed is roughly twice that
return cacheEntryOverHead + len(key.Bytes()) + 2*len(n.bytes())
return cacheEntryOverHead + len(key.Bytes()) + codec.encodedDBNodeSize(&n.dbNode)
}
22 changes: 3 additions & 19 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ type child struct {
type node struct {
dbNode
key Key
nodeBytes []byte
valueDigest maybe.Maybe[[]byte]
}

Expand All @@ -50,9 +49,8 @@ func parseNode(key Key, nodeBytes []byte) (*node, error) {
return nil, err
}
result := &node{
dbNode: n,
key: key,
nodeBytes: nodeBytes,
dbNode: n,
key: key,
}

result.setValueDigest()
Expand All @@ -66,17 +64,7 @@ func (n *node) hasValue() bool {

// Returns the byte representation of this node.
func (n *node) bytes() []byte {
if n.nodeBytes == nil {
n.nodeBytes = codec.encodeDBNode(&n.dbNode)
}

return n.nodeBytes
}

// clear the cached values that will need to be recalculated whenever the node changes
// for example, node ID and byte representation
func (n *node) onNodeChanged() {
n.nodeBytes = nil
return codec.encodeDBNode(&n.dbNode)
}

// Returns and caches the ID of this node.
Expand All @@ -88,7 +76,6 @@ func (n *node) calculateID(metrics merkleMetrics) ids.ID {

// Set [n]'s value to [val].
func (n *node) setValue(val maybe.Maybe[[]byte]) {
n.onNodeChanged()
n.value = val
n.setValueDigest()
}
Expand Down Expand Up @@ -121,13 +108,11 @@ func (n *node) addChildWithID(childNode *node, tokenSize int, childID ids.ID) {

// Adds a child to [n] without a reference to the child node.
func (n *node) setChildEntry(index byte, childEntry *child) {
n.onNodeChanged()
n.children[index] = childEntry
}

// Removes [child] from [n]'s children.
func (n *node) removeChild(child *node, tokenSize int) {
n.onNodeChanged()
delete(n.children, child.key.Token(n.key.length, tokenSize))
}

Expand All @@ -143,7 +128,6 @@ func (n *node) clone() *node {
children: make(map[byte]*child, len(n.children)),
},
valueDigest: n.valueDigest,
nodeBytes: n.nodeBytes,
}
for key, existing := range n.children {
result.children[key] = &child{
Expand Down
1 change: 0 additions & 1 deletion x/merkledb/trieview.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,6 @@ func (t *trieView) calculateNodeIDsHelper(n *node) ids.ID {
// This child wasn't changed.
continue
}
n.onNodeChanged()
childEntry.hasValue = childNodeChange.after.hasValue()

// Try updating the child and its descendants in a goroutine.
Expand Down

0 comments on commit 4909a20

Please sign in to comment.