From 188f2b2f596d7af941b69f34fa151618feae66c3 Mon Sep 17 00:00:00 2001 From: David Boehm <91908103+dboehm-avalabs@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:10:00 -0400 Subject: [PATCH] MerkleDB Reduce buffer creation/memcopy on path construction (#2124) Co-authored-by: Dan Laine --- x/merkledb/path.go | 111 +++++++++++++++++++++++----------------- x/merkledb/path_test.go | 84 ++++++++++++++++-------------- x/merkledb/proof.go | 2 +- x/merkledb/trie_test.go | 2 +- x/merkledb/trieview.go | 8 +-- 5 files changed, 116 insertions(+), 91 deletions(-) diff --git a/x/merkledb/path.go b/x/merkledb/path.go index 3c580117f586..20fbb599e3c2 100644 --- a/x/merkledb/path.go +++ b/x/merkledb/path.go @@ -128,6 +128,20 @@ func (p Path) HasPrefix(prefix Path) bool { return strings.HasPrefix(p.value, prefixWithoutPartialByte) } +// iteratedHasPrefix checks if the provided prefix path is a prefix of the current path after having skipped [skipTokens] tokens first +// this has better performance than constructing the actual path via Skip() then calling HasPrefix because it avoids the []byte allocation +func (p Path) iteratedHasPrefix(skipTokens int, prefix Path) bool { + if p.tokensLength-skipTokens < prefix.tokensLength { + return false + } + for i := 0; i < prefix.tokensLength; i++ { + if p.Token(skipTokens+i) != prefix.Token(i) { + return false + } + } + return true +} + // HasStrictPrefix returns true iff [prefix] is a prefix of [p] // but is not equal to it. func (p Path) HasStrictPrefix(prefix Path) bool { @@ -149,11 +163,7 @@ func (p Path) Token(index int) byte { // Path with [token] appended to the end. func (p Path) Append(token byte) Path { buffer := make([]byte, p.bytesNeeded(p.tokensLength+1)) - copy(buffer, p.value) - // Shift [token] to the left such that it's at the correct - // index within its storage byte, then OR it with its storage - // byte to write the token into the byte. - buffer[len(buffer)-1] |= token << p.bitsToShift(p.tokensLength) + p.appendIntoBuffer(buffer, token) return Path{ value: byteSliceToString(buffer), tokensLength: p.tokensLength + 1, @@ -216,49 +226,6 @@ func (p Path) bytesNeeded(tokens int) int { return size } -// Extend returns a new Path that equals the passed Path appended to the current Path -func (p Path) Extend(path Path) Path { - if p.tokensLength == 0 { - return path - } - if path.tokensLength == 0 { - return p - } - - totalLength := p.tokensLength + path.tokensLength - - // copy existing value into the buffer - buffer := make([]byte, p.bytesNeeded(totalLength)) - copy(buffer, p.value) - - // If the existing value fits into a whole number of bytes, - // the extension path can be copied directly into the buffer. - if !p.hasPartialByte() { - copy(buffer[len(p.value):], path.value) - return Path{ - value: byteSliceToString(buffer), - tokensLength: totalLength, - pathConfig: p.pathConfig, - } - } - - // The existing path doesn't fit into a whole number of bytes. - // Figure out how many bits to shift. - shift := p.bitsToShift(p.tokensLength - 1) - // Fill the partial byte with the first [shift] bits of the extension path - buffer[len(p.value)-1] |= path.value[0] >> (8 - shift) - - // copy the rest of the extension path bytes into the buffer, - // shifted byte shift bits - shiftCopy(buffer[len(p.value):], path.value, shift) - - return Path{ - value: byteSliceToString(buffer), - tokensLength: totalLength, - pathConfig: p.pathConfig, - } -} - // Treats [src] as a bit array and copies it into [dst] shifted by [shift] bits. // For example, if [src] is [0b0000_0001, 0b0000_0010] and [shift] is 4, // we copy [0b0001_0000, 0b0010_0000] into [dst]. @@ -306,6 +273,54 @@ func (p Path) Skip(tokensToSkip int) Path { return result } +func (p Path) AppendExtend(token byte, extensionPath Path) Path { + appendBytes := p.bytesNeeded(p.tokensLength + 1) + totalLength := p.tokensLength + 1 + extensionPath.tokensLength + buffer := make([]byte, p.bytesNeeded(totalLength)) + p.appendIntoBuffer(buffer[:appendBytes], token) + + // the extension path will be shifted based on the number of tokens in the partial byte + tokenRemainder := (p.tokensLength + 1) % p.tokensPerByte + result := Path{ + value: byteSliceToString(buffer), + tokensLength: totalLength, + pathConfig: p.pathConfig, + } + + extensionBuffer := buffer[appendBytes-1:] + if extensionPath.tokensLength == 0 { + return result + } + + // If the existing value fits into a whole number of bytes, + // the extension path can be copied directly into the buffer. + if tokenRemainder == 0 { + copy(extensionBuffer[1:], extensionPath.value) + return result + } + + // The existing path doesn't fit into a whole number of bytes. + // Figure out how many bits to shift. + shift := extensionPath.bitsToShift(tokenRemainder - 1) + // Fill the partial byte with the first [shift] bits of the extension path + extensionBuffer[0] |= extensionPath.value[0] >> (8 - shift) + + // copy the rest of the extension path bytes into the buffer, + // shifted byte shift bits + shiftCopy(extensionBuffer[1:], extensionPath.value, shift) + + return result +} + +func (p Path) appendIntoBuffer(buffer []byte, token byte) { + copy(buffer, p.value) + + // Shift [token] to the left such that it's at the correct + // index within its storage byte, then OR it with its storage + // byte to write the token into the byte. + buffer[len(buffer)-1] |= token << p.bitsToShift(p.tokensLength) +} + // Take returns a new Path that contains the first tokensToTake tokens of the current Path func (p Path) Take(tokensToTake int) Path { if p.tokensLength == tokensToTake { diff --git a/x/merkledb/path_test.go b/x/merkledb/path_test.go index effabdcee7be..a03de5c2f94a 100644 --- a/x/merkledb/path_test.go +++ b/x/merkledb/path_test.go @@ -114,6 +114,7 @@ func Test_Path_Has_Prefix(t *testing.T) { pathB := tt.pathB(bf) require.Equal(tt.isPrefix, pathA.HasPrefix(pathB)) + require.Equal(tt.isPrefix, pathA.iteratedHasPrefix(0, pathB)) require.Equal(tt.isStrictPrefix, pathA.HasStrictPrefix(pathB)) }) } @@ -275,70 +276,76 @@ func Test_Path_Append(t *testing.T) { } } -func Test_Path_Extend(t *testing.T) { +func Test_Path_AppendExtend(t *testing.T) { require := require.New(t) path2 := NewPath([]byte{0b1000_0000}, BranchFactor2).Take(1) p := NewPath([]byte{0b01010101}, BranchFactor2) - extendedP := path2.Extend(p) - require.Equal([]byte{0b10101010, 0b1000_0000}, extendedP.Bytes()) + extendedP := path2.AppendExtend(0, p) + require.Equal([]byte{0b10010101, 0b01000_000}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) require.Equal(byte(0), extendedP.Token(1)) - require.Equal(byte(1), extendedP.Token(2)) - require.Equal(byte(0), extendedP.Token(3)) - require.Equal(byte(1), extendedP.Token(4)) - require.Equal(byte(0), extendedP.Token(5)) - require.Equal(byte(1), extendedP.Token(6)) - require.Equal(byte(0), extendedP.Token(7)) - require.Equal(byte(1), extendedP.Token(8)) - - p = NewPath([]byte{0b01010101, 0b1000_0000}, BranchFactor2).Take(9) - extendedP = path2.Extend(p) - require.Equal([]byte{0b10101010, 0b1100_0000}, extendedP.Bytes()) + require.Equal(byte(0), extendedP.Token(2)) + require.Equal(byte(1), extendedP.Token(3)) + require.Equal(byte(0), extendedP.Token(4)) + require.Equal(byte(1), extendedP.Token(5)) + require.Equal(byte(0), extendedP.Token(6)) + require.Equal(byte(1), extendedP.Token(7)) + require.Equal(byte(0), extendedP.Token(8)) + require.Equal(byte(1), extendedP.Token(9)) + + p = NewPath([]byte{0b0101_0101, 0b1000_0000}, BranchFactor2).Take(9) + extendedP = path2.AppendExtend(0, p) + require.Equal([]byte{0b1001_0101, 0b0110_0000}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) require.Equal(byte(0), extendedP.Token(1)) - require.Equal(byte(1), extendedP.Token(2)) - require.Equal(byte(0), extendedP.Token(3)) - require.Equal(byte(1), extendedP.Token(4)) - require.Equal(byte(0), extendedP.Token(5)) - require.Equal(byte(1), extendedP.Token(6)) - require.Equal(byte(0), extendedP.Token(7)) - require.Equal(byte(1), extendedP.Token(8)) + require.Equal(byte(0), extendedP.Token(2)) + require.Equal(byte(1), extendedP.Token(3)) + require.Equal(byte(0), extendedP.Token(4)) + require.Equal(byte(1), extendedP.Token(5)) + require.Equal(byte(0), extendedP.Token(6)) + require.Equal(byte(1), extendedP.Token(7)) + require.Equal(byte(0), extendedP.Token(8)) require.Equal(byte(1), extendedP.Token(9)) + require.Equal(byte(1), extendedP.Token(10)) path4 := NewPath([]byte{0b0100_0000}, BranchFactor4).Take(1) p = NewPath([]byte{0b0101_0101}, BranchFactor4) - extendedP = path4.Extend(p) - require.Equal([]byte{0b0101_0101, 0b0100_0000}, extendedP.Bytes()) + extendedP = path4.AppendExtend(0, p) + require.Equal([]byte{0b0100_0101, 0b0101_0000}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) - require.Equal(byte(1), extendedP.Token(1)) + require.Equal(byte(0), extendedP.Token(1)) require.Equal(byte(1), extendedP.Token(2)) require.Equal(byte(1), extendedP.Token(3)) require.Equal(byte(1), extendedP.Token(4)) + require.Equal(byte(1), extendedP.Token(5)) path16 := NewPath([]byte{0b0001_0000}, BranchFactor16).Take(1) p = NewPath([]byte{0b0001_0001}, BranchFactor16) - extendedP = path16.Extend(p) - require.Equal([]byte{0b0001_0001, 0b0001_0000}, extendedP.Bytes()) + extendedP = path16.AppendExtend(0, p) + require.Equal([]byte{0b0001_0000, 0b0001_0001}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) - require.Equal(byte(1), extendedP.Token(1)) + require.Equal(byte(0), extendedP.Token(1)) require.Equal(byte(1), extendedP.Token(2)) + require.Equal(byte(1), extendedP.Token(3)) p = NewPath([]byte{0b0001_0001, 0b0001_0001}, BranchFactor16) - extendedP = path16.Extend(p) - require.Equal([]byte{0b0001_0001, 0b0001_0001, 0b0001_0000}, extendedP.Bytes()) + extendedP = path16.AppendExtend(0, p) + require.Equal([]byte{0b0001_0000, 0b0001_0001, 0b0001_0001}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) - require.Equal(byte(1), extendedP.Token(1)) + require.Equal(byte(0), extendedP.Token(1)) require.Equal(byte(1), extendedP.Token(2)) require.Equal(byte(1), extendedP.Token(3)) require.Equal(byte(1), extendedP.Token(4)) + require.Equal(byte(1), extendedP.Token(5)) path256 := NewPath([]byte{0b0000_0001}, BranchFactor256) p = NewPath([]byte{0b0000_0001}, BranchFactor256) - extendedP = path256.Extend(p) - require.Equal([]byte{0b0000_0001, 0b0000_0001}, extendedP.Bytes()) + extendedP = path256.AppendExtend(0, p) + require.Equal([]byte{0b0000_0001, 0b0000_0000, 0b0000_0001}, extendedP.Bytes()) require.Equal(byte(1), extendedP.Token(0)) - require.Equal(byte(1), extendedP.Token(1)) + require.Equal(byte(0), extendedP.Token(1)) + require.Equal(byte(1), extendedP.Token(2)) } func TestPathBytesNeeded(t *testing.T) { @@ -458,11 +465,12 @@ func TestPathBytesNeeded(t *testing.T) { } } -func FuzzPathExtend(f *testing.F) { +func FuzzPathAppendExtend(f *testing.F) { f.Fuzz(func( t *testing.T, first []byte, second []byte, + token byte, forceFirstOdd bool, forceSecondOdd bool, ) { @@ -476,13 +484,15 @@ func FuzzPathExtend(f *testing.F) { if forceSecondOdd && path2.tokensLength > 0 { path2 = path2.Take(path2.tokensLength - 1) } - extendedP := path1.Extend(path2) - require.Equal(path1.tokensLength+path2.tokensLength, extendedP.tokensLength) + token = byte(int(token) % int(branchFactor)) + extendedP := path1.AppendExtend(token, path2) + require.Equal(path1.tokensLength+path2.tokensLength+1, extendedP.tokensLength) for i := 0; i < path1.tokensLength; i++ { require.Equal(path1.Token(i), extendedP.Token(i)) } + require.Equal(token, extendedP.Token(path1.tokensLength)) for i := 0; i < path2.tokensLength; i++ { - require.Equal(path2.Token(i), extendedP.Token(i+path1.tokensLength)) + require.Equal(path2.Token(i), extendedP.Token(i+1+path1.tokensLength)) } } }) diff --git a/x/merkledb/proof.go b/x/merkledb/proof.go index e7954bfac247..23cc7a3532ec 100644 --- a/x/merkledb/proof.go +++ b/x/merkledb/proof.go @@ -862,7 +862,7 @@ func addPathInfo( if existingChild, ok := n.children[index]; ok { compressedPath = existingChild.compressedPath } - childPath := keyPath.Append(index).Extend(compressedPath) + childPath := keyPath.AppendExtend(index, compressedPath) if (shouldInsertLeftChildren && childPath.Less(insertChildrenLessThan.Value())) || (shouldInsertRightChildren && childPath.Greater(insertChildrenGreaterThan.Value())) { // We didn't set the other values on the child entry, but it doesn't matter. diff --git a/x/merkledb/trie_test.go b/x/merkledb/trie_test.go index 4c1878bc47b5..d13680b0b064 100644 --- a/x/merkledb/trie_test.go +++ b/x/merkledb/trie_test.go @@ -1201,7 +1201,7 @@ func Test_Trie_ConcurrentNewViewAndCommit(t *testing.T) { // Assumes this node has exactly one child. func getSingleChildPath(n *node) Path { for index, entry := range n.children { - return n.key.Append(index).Extend(entry.compressedPath) + return n.key.AppendExtend(index, entry.compressedPath) } return Path{} } diff --git a/x/merkledb/trieview.go b/x/merkledb/trieview.go index e216755a7f71..a92970866e14 100644 --- a/x/merkledb/trieview.go +++ b/x/merkledb/trieview.go @@ -269,7 +269,7 @@ func (t *trieView) calculateNodeIDsHelper(n *node) { ) for childIndex, child := range n.children { - childPath := n.key.Append(childIndex).Extend(child.compressedPath) + childPath := n.key.AppendExtend(childIndex, child.compressedPath) childNodeChange, ok := t.changes.nodes[childPath] if !ok { // This child wasn't changed. @@ -367,7 +367,7 @@ func (t *trieView) getProof(ctx context.Context, key []byte) (*Proof, error) { childNode, err := t.getNodeWithID( child.id, - closestNode.key.Append(nextIndex).Extend(child.compressedPath), + closestNode.key.AppendExtend(nextIndex, child.compressedPath), child.hasValue, ) if err != nil { @@ -694,7 +694,7 @@ func (t *trieView) compressNodePath(parent, node *node) error { // "Cycle" over the key/values to find the only child. // Note this iteration once because len(node.children) == 1. for index, entry := range node.children { - childPath = node.key.Append(index).Extend(entry.compressedPath) + childPath = node.key.AppendExtend(index, entry.compressedPath) childEntry = entry } @@ -768,7 +768,7 @@ func (t *trieView) getPathTo(key Path) ([]*node, error) { // the current token for the child entry has now been handled, so increment the matchedPathIndex matchedPathIndex += 1 - if !hasChild || !key.Skip(matchedPathIndex).HasPrefix(nextChildEntry.compressedPath) { + if !hasChild || !key.iteratedHasPrefix(matchedPathIndex, nextChildEntry.compressedPath) { // there was no child along the path or the child that was there doesn't match the remaining path return nodes, nil }