Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cached node bytes from merkle nodes #2393

Merged
merged 37 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e562467
Shrink bytes stored in nodes
dboehm-avalabs Nov 16, 2023
6446181
Update db_test.go
dboehm-avalabs Nov 16, 2023
48d7a07
Update node.go
dboehm-avalabs Nov 16, 2023
ddace03
Update codec.go
dboehm-avalabs Nov 16, 2023
d66f8e7
reduce bytes
dboehm-avalabs Nov 16, 2023
2fa326e
Update trieview.go
dboehm-avalabs Nov 16, 2023
67e5f39
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 16, 2023
1e9e189
Update trieview.go
dboehm-avalabs Nov 16, 2023
603ca6c
Update db.go
dboehm-avalabs Nov 16, 2023
07bec1f
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 16, 2023
7908bb5
Merge branch 'dev' into ShrinkNodeStorage
Nov 22, 2023
b5a6dfa
nit
Nov 22, 2023
d6c0774
remove unused channel
Nov 22, 2023
4538d42
Update trieview.go
dboehm-avalabs Nov 22, 2023
f3461fa
Merge branch 'dev' into ShrinkNodeStorage
dboehm-avalabs Nov 27, 2023
39cc53b
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Nov 27, 2023
8462ead
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 27, 2023
a9ba3a1
Merge branch 'dev' into ShrinkNodeStorage
Nov 29, 2023
b640a5a
Merge branch 'ShrinkNodeStorage' into RemoveNodeBytes
dboehm-avalabs Nov 29, 2023
3b23a4c
Update db.go
dboehm-avalabs Nov 29, 2023
20df491
Update x/merkledb/codec.go
dboehm-avalabs Dec 12, 2023
2a3b29c
comments
dboehm-avalabs Dec 12, 2023
2a3f2a2
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 12, 2023
4382155
Merge branch 'RemoveNodeBytes' of https://github.com/ava-labs/avalanc…
dboehm-avalabs Dec 12, 2023
9050a16
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 12, 2023
278abec
Update codec.go
dboehm-avalabs Dec 12, 2023
2d14a85
Update codec.go
dboehm-avalabs Dec 12, 2023
66b4498
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 13, 2023
ed026bc
Update codec_test.go
dboehm-avalabs Dec 13, 2023
1c1ced9
Update codec.go
dboehm-avalabs Dec 13, 2023
fd34f89
Update codec.go
dboehm-avalabs Dec 13, 2023
1f3363e
Update codec.go
dboehm-avalabs Dec 13, 2023
83892ed
Update codec.go
dboehm-avalabs Dec 13, 2023
20c6427
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 14, 2023
de890c5
Merge branch 'dev' into RemoveNodeBytes
dboehm-avalabs Dec 14, 2023
40c9894
nits; add test
Dec 14, 2023
ebfba67
Merge branch 'RemoveNodeBytes' of github.com:ava-labs/avalanchego int…
Dec 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 38 additions & 13 deletions x/merkledb/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ const (
minDBNodeLen = minMaybeByteSliceLen + minVarIntLen
minChildLen = minVarIntLen + minKeyLen + ids.IDLen + boolLen

estimatedKeyLen = 64
estimatedValueLen = 64
estimatedCompressedKeyLen = 8
// Child index, child compressed key, child ID, child has value
estimatedNodeChildLen = minVarIntLen + estimatedCompressedKeyLen + ids.IDLen + boolLen
estimatedKeyLen = 64
estimatedValueLen = 64
// Child index, child ID
hashValuesChildLen = minVarIntLen + ids.IDLen
)
Expand Down Expand Up @@ -62,6 +59,7 @@ type encoderDecoder interface {
type encoder interface {
// Assumes [n] is non-nil.
encodeDBNode(n *dbNode) []byte
encodedDBNodeSize(n *dbNode) int

// Returns the bytes that will be hashed to generate [n]'s ID.
// Assumes [n] is non-nil.
Expand Down Expand Up @@ -91,16 +89,43 @@ type codecImpl struct {
varIntPool sync.Pool
}

func (c *codecImpl) encodeDBNode(n *dbNode) []byte {
var (
numChildren = len(n.children)
// Estimate size of [n] to prevent memory allocations
estimatedLen = estimatedValueLen + minVarIntLen + estimatedNodeChildLen*numChildren
buf = bytes.NewBuffer(make([]byte, 0, estimatedLen))
)
func (c *codecImpl) encodedDBNodeSize(n *dbNode) int {
// total the number of children pointers + bool indicating if it has a value + the value + the child entries for n.children
total := c.uintSize(uint64(len(n.children))) + boolLen
if n.value.HasValue() {
total += c.uintSize(uint64(len(n.value.Value()))) + len(n.value.Value())
}
// for each non-nil entry, we add the additional size of the child entry
for index, entry := range n.children {
total += c.childSize(index, entry)
}
return total
}

func (c *codecImpl) childSize(index byte, childEntry *child) int {
return c.uintSize(uint64(index)) + ids.IDLen + c.keySize(childEntry.compressedKey) + boolLen
}

// based on the current implementation of codecImpl.encodeUint which uses binary.PutUvarint
func (*codecImpl) uintSize(value uint64) int {
// binary.PutUvarint repeatedly divides by 128 until the value is under 128,
// so count the number of times that will occur
i := 0
for value >= 0x80 {
value >>= 7
i++
}
return i + 1
}

func (c *codecImpl) keySize(p Key) int {
return c.uintSize(uint64(p.length)) + bytesNeeded(p.length)
}

func (c *codecImpl) encodeDBNode(n *dbNode) []byte {
buf := bytes.NewBuffer(make([]byte, 0, c.encodedDBNodeSize(n)))
c.encodeMaybeByteSlice(buf, n.value)
c.encodeUint(buf, uint64(numChildren))
c.encodeUint(buf, uint64(len(n.children)))
// Note we insert children in order of increasing index
// for determinism.
keys := maps.Keys(n.children)
Expand Down
2 changes: 1 addition & 1 deletion x/merkledb/codec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func FuzzCodecDBNodeDeterministic(f *testing.F) {
}

nodeBytes := codec.encodeDBNode(&node)

require.Len(nodeBytes, codec.encodedDBNodeSize(&node))
var gotNode dbNode
require.NoError(codec.decodeDBNode(nodeBytes, &gotNode))
require.Equal(node, gotNode)
Expand Down
3 changes: 1 addition & 2 deletions x/merkledb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,5 @@ func cacheEntrySize(key Key, n *node) int {
if n == nil {
return len(key.Bytes())
}
// nodes cache their bytes representation so the total memory consumed is roughly twice that
return len(key.Bytes()) + 2*len(n.bytes())
return len(key.Bytes()) + codec.encodedDBNodeSize(&n.dbNode)
}
22 changes: 3 additions & 19 deletions x/merkledb/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ type child struct {
type node struct {
dbNode
key Key
nodeBytes []byte
valueDigest maybe.Maybe[[]byte]
}

Expand All @@ -50,9 +49,8 @@ func parseNode(key Key, nodeBytes []byte) (*node, error) {
return nil, err
}
result := &node{
dbNode: n,
key: key,
nodeBytes: nodeBytes,
dbNode: n,
key: key,
}

result.setValueDigest()
Expand All @@ -66,17 +64,7 @@ func (n *node) hasValue() bool {

// Returns the byte representation of this node.
func (n *node) bytes() []byte {
if n.nodeBytes == nil {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this cached value means we'll have to reserialize the node before writing it. Are we concerned about the additional time that'll take? Seems to be a tradeoff between memory usage and CPU usage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

anything we are writing is something we edited so this would have always been nil

n.nodeBytes = codec.encodeDBNode(&n.dbNode)
}

return n.nodeBytes
}

// clear the cached values that will need to be recalculated whenever the node changes
// for example, node ID and byte representation
func (n *node) onNodeChanged() {
n.nodeBytes = nil
return codec.encodeDBNode(&n.dbNode)
}

// Returns and caches the ID of this node.
Expand All @@ -88,7 +76,6 @@ func (n *node) calculateID(metrics merkleMetrics) ids.ID {

// Set [n]'s value to [val].
func (n *node) setValue(val maybe.Maybe[[]byte]) {
n.onNodeChanged()
n.value = val
n.setValueDigest()
}
Expand Down Expand Up @@ -116,13 +103,11 @@ func (n *node) addChild(childNode *node, tokenSize int) {

// Adds a child to [n] without a reference to the child node.
func (n *node) setChildEntry(index byte, childEntry *child) {
n.onNodeChanged()
n.children[index] = childEntry
}

// Removes [child] from [n]'s children.
func (n *node) removeChild(child *node, tokenSize int) {
n.onNodeChanged()
delete(n.children, child.key.Token(n.key.length, tokenSize))
}

Expand All @@ -138,7 +123,6 @@ func (n *node) clone() *node {
children: make(map[byte]*child, len(n.children)),
},
valueDigest: n.valueDigest,
nodeBytes: n.nodeBytes,
}
for key, existing := range n.children {
result.children[key] = &child{
Expand Down
1 change: 0 additions & 1 deletion x/merkledb/trieview.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ func (t *trieView) calculateNodeIDsHelper(n *node) ids.ID {
// This child wasn't changed.
continue
}
n.onNodeChanged()
childEntry.hasValue = childNodeChange.after.hasValue()

// Try updating the child and its descendants in a goroutine.
Expand Down
Loading