Skip to content

Commit

Permalink
MerkleDB Reduce buffer creation/memcopy on path construction (#2124)
Browse files Browse the repository at this point in the history
Co-authored-by: Dan Laine <[email protected]>
  • Loading branch information
dboehm-avalabs and Dan Laine authored Oct 16, 2023
1 parent 9d44ec2 commit 188f2b2
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 91 deletions.
111 changes: 63 additions & 48 deletions x/merkledb/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@ func (p Path) HasPrefix(prefix Path) bool {
return strings.HasPrefix(p.value, prefixWithoutPartialByte)
}

// iteratedHasPrefix checks if the provided prefix path is a prefix of the current path after having skipped [skipTokens] tokens first
// this has better performance than constructing the actual path via Skip() then calling HasPrefix because it avoids the []byte allocation
func (p Path) iteratedHasPrefix(skipTokens int, prefix Path) bool {
if p.tokensLength-skipTokens < prefix.tokensLength {
return false
}
for i := 0; i < prefix.tokensLength; i++ {
if p.Token(skipTokens+i) != prefix.Token(i) {
return false
}
}
return true
}

// HasStrictPrefix returns true iff [prefix] is a prefix of [p]
// but is not equal to it.
func (p Path) HasStrictPrefix(prefix Path) bool {
Expand All @@ -149,11 +163,7 @@ func (p Path) Token(index int) byte {
// Path with [token] appended to the end.
func (p Path) Append(token byte) Path {
buffer := make([]byte, p.bytesNeeded(p.tokensLength+1))
copy(buffer, p.value)
// Shift [token] to the left such that it's at the correct
// index within its storage byte, then OR it with its storage
// byte to write the token into the byte.
buffer[len(buffer)-1] |= token << p.bitsToShift(p.tokensLength)
p.appendIntoBuffer(buffer, token)
return Path{
value: byteSliceToString(buffer),
tokensLength: p.tokensLength + 1,
Expand Down Expand Up @@ -216,49 +226,6 @@ func (p Path) bytesNeeded(tokens int) int {
return size
}

// Extend returns a new Path that equals the passed Path appended to the current Path
func (p Path) Extend(path Path) Path {
if p.tokensLength == 0 {
return path
}
if path.tokensLength == 0 {
return p
}

totalLength := p.tokensLength + path.tokensLength

// copy existing value into the buffer
buffer := make([]byte, p.bytesNeeded(totalLength))
copy(buffer, p.value)

// If the existing value fits into a whole number of bytes,
// the extension path can be copied directly into the buffer.
if !p.hasPartialByte() {
copy(buffer[len(p.value):], path.value)
return Path{
value: byteSliceToString(buffer),
tokensLength: totalLength,
pathConfig: p.pathConfig,
}
}

// The existing path doesn't fit into a whole number of bytes.
// Figure out how many bits to shift.
shift := p.bitsToShift(p.tokensLength - 1)
// Fill the partial byte with the first [shift] bits of the extension path
buffer[len(p.value)-1] |= path.value[0] >> (8 - shift)

// copy the rest of the extension path bytes into the buffer,
// shifted byte shift bits
shiftCopy(buffer[len(p.value):], path.value, shift)

return Path{
value: byteSliceToString(buffer),
tokensLength: totalLength,
pathConfig: p.pathConfig,
}
}

// Treats [src] as a bit array and copies it into [dst] shifted by [shift] bits.
// For example, if [src] is [0b0000_0001, 0b0000_0010] and [shift] is 4,
// we copy [0b0001_0000, 0b0010_0000] into [dst].
Expand Down Expand Up @@ -306,6 +273,54 @@ func (p Path) Skip(tokensToSkip int) Path {
return result
}

func (p Path) AppendExtend(token byte, extensionPath Path) Path {
appendBytes := p.bytesNeeded(p.tokensLength + 1)
totalLength := p.tokensLength + 1 + extensionPath.tokensLength
buffer := make([]byte, p.bytesNeeded(totalLength))
p.appendIntoBuffer(buffer[:appendBytes], token)

// the extension path will be shifted based on the number of tokens in the partial byte
tokenRemainder := (p.tokensLength + 1) % p.tokensPerByte
result := Path{
value: byteSliceToString(buffer),
tokensLength: totalLength,
pathConfig: p.pathConfig,
}

extensionBuffer := buffer[appendBytes-1:]
if extensionPath.tokensLength == 0 {
return result
}

// If the existing value fits into a whole number of bytes,
// the extension path can be copied directly into the buffer.
if tokenRemainder == 0 {
copy(extensionBuffer[1:], extensionPath.value)
return result
}

// The existing path doesn't fit into a whole number of bytes.
// Figure out how many bits to shift.
shift := extensionPath.bitsToShift(tokenRemainder - 1)
// Fill the partial byte with the first [shift] bits of the extension path
extensionBuffer[0] |= extensionPath.value[0] >> (8 - shift)

// copy the rest of the extension path bytes into the buffer,
// shifted byte shift bits
shiftCopy(extensionBuffer[1:], extensionPath.value, shift)

return result
}

func (p Path) appendIntoBuffer(buffer []byte, token byte) {
copy(buffer, p.value)

// Shift [token] to the left such that it's at the correct
// index within its storage byte, then OR it with its storage
// byte to write the token into the byte.
buffer[len(buffer)-1] |= token << p.bitsToShift(p.tokensLength)
}

// Take returns a new Path that contains the first tokensToTake tokens of the current Path
func (p Path) Take(tokensToTake int) Path {
if p.tokensLength == tokensToTake {
Expand Down
84 changes: 47 additions & 37 deletions x/merkledb/path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ func Test_Path_Has_Prefix(t *testing.T) {
pathB := tt.pathB(bf)

require.Equal(tt.isPrefix, pathA.HasPrefix(pathB))
require.Equal(tt.isPrefix, pathA.iteratedHasPrefix(0, pathB))
require.Equal(tt.isStrictPrefix, pathA.HasStrictPrefix(pathB))
})
}
Expand Down Expand Up @@ -275,70 +276,76 @@ func Test_Path_Append(t *testing.T) {
}
}

func Test_Path_Extend(t *testing.T) {
func Test_Path_AppendExtend(t *testing.T) {
require := require.New(t)

path2 := NewPath([]byte{0b1000_0000}, BranchFactor2).Take(1)
p := NewPath([]byte{0b01010101}, BranchFactor2)
extendedP := path2.Extend(p)
require.Equal([]byte{0b10101010, 0b1000_0000}, extendedP.Bytes())
extendedP := path2.AppendExtend(0, p)
require.Equal([]byte{0b10010101, 0b01000_000}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
require.Equal(byte(0), extendedP.Token(3))
require.Equal(byte(1), extendedP.Token(4))
require.Equal(byte(0), extendedP.Token(5))
require.Equal(byte(1), extendedP.Token(6))
require.Equal(byte(0), extendedP.Token(7))
require.Equal(byte(1), extendedP.Token(8))

p = NewPath([]byte{0b01010101, 0b1000_0000}, BranchFactor2).Take(9)
extendedP = path2.Extend(p)
require.Equal([]byte{0b10101010, 0b1100_0000}, extendedP.Bytes())
require.Equal(byte(0), extendedP.Token(2))
require.Equal(byte(1), extendedP.Token(3))
require.Equal(byte(0), extendedP.Token(4))
require.Equal(byte(1), extendedP.Token(5))
require.Equal(byte(0), extendedP.Token(6))
require.Equal(byte(1), extendedP.Token(7))
require.Equal(byte(0), extendedP.Token(8))
require.Equal(byte(1), extendedP.Token(9))

p = NewPath([]byte{0b0101_0101, 0b1000_0000}, BranchFactor2).Take(9)
extendedP = path2.AppendExtend(0, p)
require.Equal([]byte{0b1001_0101, 0b0110_0000}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
require.Equal(byte(0), extendedP.Token(3))
require.Equal(byte(1), extendedP.Token(4))
require.Equal(byte(0), extendedP.Token(5))
require.Equal(byte(1), extendedP.Token(6))
require.Equal(byte(0), extendedP.Token(7))
require.Equal(byte(1), extendedP.Token(8))
require.Equal(byte(0), extendedP.Token(2))
require.Equal(byte(1), extendedP.Token(3))
require.Equal(byte(0), extendedP.Token(4))
require.Equal(byte(1), extendedP.Token(5))
require.Equal(byte(0), extendedP.Token(6))
require.Equal(byte(1), extendedP.Token(7))
require.Equal(byte(0), extendedP.Token(8))
require.Equal(byte(1), extendedP.Token(9))
require.Equal(byte(1), extendedP.Token(10))

path4 := NewPath([]byte{0b0100_0000}, BranchFactor4).Take(1)
p = NewPath([]byte{0b0101_0101}, BranchFactor4)
extendedP = path4.Extend(p)
require.Equal([]byte{0b0101_0101, 0b0100_0000}, extendedP.Bytes())
extendedP = path4.AppendExtend(0, p)
require.Equal([]byte{0b0100_0101, 0b0101_0000}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(1), extendedP.Token(1))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
require.Equal(byte(1), extendedP.Token(3))
require.Equal(byte(1), extendedP.Token(4))
require.Equal(byte(1), extendedP.Token(5))

path16 := NewPath([]byte{0b0001_0000}, BranchFactor16).Take(1)
p = NewPath([]byte{0b0001_0001}, BranchFactor16)
extendedP = path16.Extend(p)
require.Equal([]byte{0b0001_0001, 0b0001_0000}, extendedP.Bytes())
extendedP = path16.AppendExtend(0, p)
require.Equal([]byte{0b0001_0000, 0b0001_0001}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(1), extendedP.Token(1))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
require.Equal(byte(1), extendedP.Token(3))

p = NewPath([]byte{0b0001_0001, 0b0001_0001}, BranchFactor16)
extendedP = path16.Extend(p)
require.Equal([]byte{0b0001_0001, 0b0001_0001, 0b0001_0000}, extendedP.Bytes())
extendedP = path16.AppendExtend(0, p)
require.Equal([]byte{0b0001_0000, 0b0001_0001, 0b0001_0001}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(1), extendedP.Token(1))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
require.Equal(byte(1), extendedP.Token(3))
require.Equal(byte(1), extendedP.Token(4))
require.Equal(byte(1), extendedP.Token(5))

path256 := NewPath([]byte{0b0000_0001}, BranchFactor256)
p = NewPath([]byte{0b0000_0001}, BranchFactor256)
extendedP = path256.Extend(p)
require.Equal([]byte{0b0000_0001, 0b0000_0001}, extendedP.Bytes())
extendedP = path256.AppendExtend(0, p)
require.Equal([]byte{0b0000_0001, 0b0000_0000, 0b0000_0001}, extendedP.Bytes())
require.Equal(byte(1), extendedP.Token(0))
require.Equal(byte(1), extendedP.Token(1))
require.Equal(byte(0), extendedP.Token(1))
require.Equal(byte(1), extendedP.Token(2))
}

func TestPathBytesNeeded(t *testing.T) {
Expand Down Expand Up @@ -458,11 +465,12 @@ func TestPathBytesNeeded(t *testing.T) {
}
}

func FuzzPathExtend(f *testing.F) {
func FuzzPathAppendExtend(f *testing.F) {
f.Fuzz(func(
t *testing.T,
first []byte,
second []byte,
token byte,
forceFirstOdd bool,
forceSecondOdd bool,
) {
Expand All @@ -476,13 +484,15 @@ func FuzzPathExtend(f *testing.F) {
if forceSecondOdd && path2.tokensLength > 0 {
path2 = path2.Take(path2.tokensLength - 1)
}
extendedP := path1.Extend(path2)
require.Equal(path1.tokensLength+path2.tokensLength, extendedP.tokensLength)
token = byte(int(token) % int(branchFactor))
extendedP := path1.AppendExtend(token, path2)
require.Equal(path1.tokensLength+path2.tokensLength+1, extendedP.tokensLength)
for i := 0; i < path1.tokensLength; i++ {
require.Equal(path1.Token(i), extendedP.Token(i))
}
require.Equal(token, extendedP.Token(path1.tokensLength))
for i := 0; i < path2.tokensLength; i++ {
require.Equal(path2.Token(i), extendedP.Token(i+path1.tokensLength))
require.Equal(path2.Token(i), extendedP.Token(i+1+path1.tokensLength))
}
}
})
Expand Down
2 changes: 1 addition & 1 deletion x/merkledb/proof.go
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ func addPathInfo(
if existingChild, ok := n.children[index]; ok {
compressedPath = existingChild.compressedPath
}
childPath := keyPath.Append(index).Extend(compressedPath)
childPath := keyPath.AppendExtend(index, compressedPath)
if (shouldInsertLeftChildren && childPath.Less(insertChildrenLessThan.Value())) ||
(shouldInsertRightChildren && childPath.Greater(insertChildrenGreaterThan.Value())) {
// We didn't set the other values on the child entry, but it doesn't matter.
Expand Down
2 changes: 1 addition & 1 deletion x/merkledb/trie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@ func Test_Trie_ConcurrentNewViewAndCommit(t *testing.T) {
// Assumes this node has exactly one child.
func getSingleChildPath(n *node) Path {
for index, entry := range n.children {
return n.key.Append(index).Extend(entry.compressedPath)
return n.key.AppendExtend(index, entry.compressedPath)
}
return Path{}
}
Expand Down
8 changes: 4 additions & 4 deletions x/merkledb/trieview.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ func (t *trieView) calculateNodeIDsHelper(n *node) {
)

for childIndex, child := range n.children {
childPath := n.key.Append(childIndex).Extend(child.compressedPath)
childPath := n.key.AppendExtend(childIndex, child.compressedPath)
childNodeChange, ok := t.changes.nodes[childPath]
if !ok {
// This child wasn't changed.
Expand Down Expand Up @@ -367,7 +367,7 @@ func (t *trieView) getProof(ctx context.Context, key []byte) (*Proof, error) {

childNode, err := t.getNodeWithID(
child.id,
closestNode.key.Append(nextIndex).Extend(child.compressedPath),
closestNode.key.AppendExtend(nextIndex, child.compressedPath),
child.hasValue,
)
if err != nil {
Expand Down Expand Up @@ -694,7 +694,7 @@ func (t *trieView) compressNodePath(parent, node *node) error {
// "Cycle" over the key/values to find the only child.
// Note this iteration once because len(node.children) == 1.
for index, entry := range node.children {
childPath = node.key.Append(index).Extend(entry.compressedPath)
childPath = node.key.AppendExtend(index, entry.compressedPath)
childEntry = entry
}

Expand Down Expand Up @@ -768,7 +768,7 @@ func (t *trieView) getPathTo(key Path) ([]*node, error) {
// the current token for the child entry has now been handled, so increment the matchedPathIndex
matchedPathIndex += 1

if !hasChild || !key.Skip(matchedPathIndex).HasPrefix(nextChildEntry.compressedPath) {
if !hasChild || !key.iteratedHasPrefix(matchedPathIndex, nextChildEntry.compressedPath) {
// there was no child along the path or the child that was there doesn't match the remaining path
return nodes, nil
}
Expand Down

0 comments on commit 188f2b2

Please sign in to comment.