Skip to content

Commit

Permalink
trie: reduce allocations in stacktrie (#30743)
Browse files Browse the repository at this point in the history
This PR uses various tweaks and tricks to make the stacktrie near
alloc-free.

```
[user@work go-ethereum]$ benchstat stacktrie.1 stacktrie.7
goos: linux
goarch: amd64
pkg: github.com/ethereum/go-ethereum/trie
cpu: 12th Gen Intel(R) Core(TM) i7-1270P
             │ stacktrie.1  │             stacktrie.7              │
             │    sec/op    │    sec/op     vs base                │
Insert100K-8   106.97m ± 8%   88.21m ± 34%  -17.54% (p=0.000 n=10)

             │   stacktrie.1    │             stacktrie.7              │
             │       B/op       │     B/op      vs base                │
Insert100K-8   13199.608Ki ± 0%   3.424Ki ± 3%  -99.97% (p=0.000 n=10)

             │  stacktrie.1   │             stacktrie.7             │
             │   allocs/op    │ allocs/op   vs base                 │
Insert100K-8   553428.50 ± 0%   22.00 ± 5%  -100.00% (p=0.000 n=10)
```
Also improves derivesha:
```
goos: linux
goarch: amd64
pkg: github.com/ethereum/go-ethereum/core/types
cpu: 12th Gen Intel(R) Core(TM) i7-1270P
                          │ derivesha.1 │             derivesha.2              │
                          │   sec/op    │    sec/op     vs base                │
DeriveSha200/stack_trie-8   477.8µ ± 2%   430.0µ ± 12%  -10.00% (p=0.000 n=10)

                          │ derivesha.1  │             derivesha.2              │
                          │     B/op     │     B/op      vs base                │
DeriveSha200/stack_trie-8   45.17Ki ± 0%   25.65Ki ± 0%  -43.21% (p=0.000 n=10)

                          │ derivesha.1 │            derivesha.2             │
                          │  allocs/op  │ allocs/op   vs base                │
DeriveSha200/stack_trie-8   1259.0 ± 0%   232.0 ± 0%  -81.57% (p=0.000 n=10)

```

---------

Co-authored-by: Gary Rong <[email protected]>
  • Loading branch information
holiman and rjl493456442 authored Jan 23, 2025
1 parent a840e9b commit d3cc618
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 39 deletions.
64 changes: 64 additions & 0 deletions trie/bytepool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package trie

// bytesPool is a pool for byte slices. It is safe for concurrent use.
type bytesPool struct {
c chan []byte
w int
}

// newBytesPool creates a new bytesPool. The sliceCap sets the capacity of
// newly allocated slices, and the nitems determines how many items the pool
// will hold, at maximum.
func newBytesPool(sliceCap, nitems int) *bytesPool {
return &bytesPool{
c: make(chan []byte, nitems),
w: sliceCap,
}
}

// Get returns a slice. Safe for concurrent use.
func (bp *bytesPool) Get() []byte {
select {
case b := <-bp.c:
return b
default:
return make([]byte, 0, bp.w)
}
}

// GetWithSize returns a slice with specified byte slice size.
func (bp *bytesPool) GetWithSize(s int) []byte {
b := bp.Get()
if cap(b) < s {
return make([]byte, s)
}
return b[:s]
}

// Put returns a slice to the pool. Safe for concurrent use. This method
// will ignore slices that are too small or too large (>3x the cap)
func (bp *bytesPool) Put(b []byte) {
if c := cap(b); c < bp.w || c > 3*bp.w {
return
}
select {
case bp.c <- b:
default:
}
}
12 changes: 12 additions & 0 deletions trie/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,18 @@ func keybytesToHex(str []byte) []byte {
return nibbles
}

// writeHexKey writes the hexkey into the given slice.
// OBS! This method omits the termination flag.
// OBS! The dst slice must be at least 2x as large as the key
func writeHexKey(dst []byte, key []byte) []byte {
_ = dst[2*len(key)-1]
for i, b := range key {
dst[i*2] = b / 16
dst[i*2+1] = b % 16
}
return dst[:2*len(key)]
}

// hexToKeybytes turns hex nibbles into key bytes.
// This can only be used for keys of even length.
func hexToKeybytes(hex []byte) []byte {
Expand Down
8 changes: 8 additions & 0 deletions trie/hasher.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode {
return n
}

// hashDataTo hashes the provided data to the given destination buffer. The caller
// must ensure that the dst buffer is of appropriate size.
func (h *hasher) hashDataTo(dst, data []byte) {
h.sha.Reset()
h.sha.Write(data)
h.sha.Read(dst)
}

// proofHash is used to construct trie proofs, and returns the 'collapsed'
// node (for later RLP encoding) as well as the hashed node -- unless the
// node is smaller than 32 bytes, in which case it will be returned as is.
Expand Down
35 changes: 22 additions & 13 deletions trie/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,27 @@ type (
}
hashNode []byte
valueNode []byte

// fullnodeEncoder is a type used exclusively for encoding fullNode.
// Briefly instantiating a fullnodeEncoder and initializing with
// existing slices is less memory intense than using the fullNode type.
fullnodeEncoder struct {
Children [17][]byte
}

// extNodeEncoder is a type used exclusively for encoding extension node.
// Briefly instantiating a extNodeEncoder and initializing with existing
// slices is less memory intense than using the shortNode type.
extNodeEncoder struct {
Key []byte
Val []byte
}

// leafNodeEncoder is a type used exclusively for encoding leaf node.
leafNodeEncoder struct {
Key []byte
Val []byte
}
)

// nilValueNode is used when collapsing internal trie nodes for hashing, since
Expand Down Expand Up @@ -89,6 +110,7 @@ func (n *fullNode) fstring(ind string) string {
}
return resp + fmt.Sprintf("\n%s] ", ind)
}

func (n *shortNode) fstring(ind string) string {
return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" "))
}
Expand All @@ -99,19 +121,6 @@ func (n valueNode) fstring(ind string) string {
return fmt.Sprintf("%x ", []byte(n))
}

// rawNode is a simple binary blob used to differentiate between collapsed trie
// nodes and already encoded RLP binary blobs (while at the same time store them
// in the same cache fields).
type rawNode []byte

func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") }
func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }

func (n rawNode) EncodeRLP(w io.Writer) error {
_, err := w.Write(n)
return err
}

// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
func mustDecodeNode(hash, buf []byte) node {
n, err := decodeNode(hash, buf)
Expand Down
39 changes: 35 additions & 4 deletions trie/node_enc.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
w.ListEnd(offset)
}

func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
offset := w.List()
for _, c := range n.Children {
if c == nil {
w.Write(rlp.EmptyString)
} else if len(c) < 32 {
w.Write(c) // rawNode
} else {
w.WriteBytes(c) // hashNode
}
}
w.ListEnd(offset)
}

func (n *shortNode) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key)
Expand All @@ -51,14 +65,31 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) {
w.ListEnd(offset)
}

func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key)

if n.Val == nil {
w.Write(rlp.EmptyString)
} else if len(n.Val) < 32 {
w.Write(n.Val) // rawNode
} else {
w.WriteBytes(n.Val) // hashNode
}
w.ListEnd(offset)
}

func (n *leafNodeEncoder) encode(w rlp.EncoderBuffer) {
offset := w.List()
w.WriteBytes(n.Key) // Compact format key
w.WriteBytes(n.Val) // Value node, must be non-nil
w.ListEnd(offset)
}

func (n hashNode) encode(w rlp.EncoderBuffer) {
w.WriteBytes(n)
}

func (n valueNode) encode(w rlp.EncoderBuffer) {
w.WriteBytes(n)
}

func (n rawNode) encode(w rlp.EncoderBuffer) {
w.Write(n)
}
73 changes: 51 additions & 22 deletions trie/stacktrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

var (
stPool = sync.Pool{New: func() any { return new(stNode) }}
bPool = newBytesPool(32, 100)
_ = types.TrieHasher((*StackTrie)(nil))
)

Expand All @@ -47,6 +48,8 @@ type StackTrie struct {
h *hasher
last []byte
onTrieNode OnTrieNode
kBuf []byte // buf space used for hex-key during insertions
pBuf []byte // buf space used for path during insertions
}

// NewStackTrie allocates and initializes an empty trie. The committed nodes
Expand All @@ -56,6 +59,17 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
root: stPool.Get().(*stNode),
h: newHasher(false),
onTrieNode: onTrieNode,
kBuf: make([]byte, 64),
pBuf: make([]byte, 64),
}
}

func (t *StackTrie) grow(key []byte) {
if cap(t.kBuf) < 2*len(key) {
t.kBuf = make([]byte, 2*len(key))
}
if cap(t.pBuf) < 2*len(key) {
t.pBuf = make([]byte, 2*len(key))
}
}

Expand All @@ -64,7 +78,8 @@ func (t *StackTrie) Update(key, value []byte) error {
if len(value) == 0 {
return errors.New("trying to insert empty (deletion)")
}
k := t.TrieKey(key)
t.grow(key)
k := writeHexKey(t.kBuf, key)
if bytes.Compare(t.last, k) >= 0 {
return errors.New("non-ascending key order")
}
Expand All @@ -73,7 +88,7 @@ func (t *StackTrie) Update(key, value []byte) error {
} else {
t.last = append(t.last[:0], k...) // reuse key slice
}
t.insert(t.root, k, value, nil)
t.insert(t.root, k, value, t.pBuf[:0])
return nil
}

Expand Down Expand Up @@ -129,6 +144,12 @@ const (
)

func (n *stNode) reset() *stNode {
if n.typ == hashedNode {
// On hashnodes, we 'own' the val: it is guaranteed to be not held
// by external caller. Hence, when we arrive here, we can put it back
// into the pool
bPool.Put(n.val)
}
n.key = n.key[:0]
n.val = nil
for i := range n.children {
Expand All @@ -150,8 +171,12 @@ func (n *stNode) getDiffIndex(key []byte) int {
return len(n.key)
}

// Helper function to that inserts a (key, value) pair into
// the trie.
// Helper function to that inserts a (key, value) pair into the trie.
//
// - The key is not retained by this method, but always copied if needed.
// - The value is retained by this method, as long as the leaf that it represents
// remains unhashed. However: it is never modified.
// - The path is not retained by this method.
func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
switch st.typ {
case branchNode: /* Branch */
Expand Down Expand Up @@ -283,7 +308,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {

case emptyNode: /* Empty */
st.typ = leafNode
st.key = key
st.key = append(st.key, key...) // deep-copy the key as it's volatile
st.val = value

case hashedNode:
Expand Down Expand Up @@ -318,35 +343,33 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
return

case branchNode:
var nodes fullNode
var nodes fullnodeEncoder
for i, child := range st.children {
if child == nil {
nodes.Children[i] = nilValueNode
continue
}
t.hash(child, append(path, byte(i)))
nodes.Children[i] = child.val
}
nodes.encode(t.h.encbuf)
blob = t.h.encodedBytes()

if len(child.val) < 32 {
nodes.Children[i] = rawNode(child.val)
} else {
nodes.Children[i] = hashNode(child.val)
for i, child := range st.children {
if child == nil {
continue
}
st.children[i] = nil
stPool.Put(child.reset()) // Release child back to pool.
}
nodes.encode(t.h.encbuf)
blob = t.h.encodedBytes()

case extNode:
// recursively hash and commit child as the first step
t.hash(st.children[0], append(path, st.key...))

// encode the extension node
n := shortNode{Key: hexToCompactInPlace(st.key)}
if len(st.children[0].val) < 32 {
n.Val = rawNode(st.children[0].val)
} else {
n.Val = hashNode(st.children[0].val)
n := extNodeEncoder{
Key: hexToCompactInPlace(st.key),
Val: st.children[0].val,
}
n.encode(t.h.encbuf)
blob = t.h.encodedBytes()
Expand All @@ -356,8 +379,10 @@ func (t *StackTrie) hash(st *stNode, path []byte) {

case leafNode:
st.key = append(st.key, byte(16))
n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)}

n := leafNodeEncoder{
Key: hexToCompactInPlace(st.key),
Val: st.val,
}
n.encode(t.h.encbuf)
blob = t.h.encodedBytes()

Expand All @@ -368,15 +393,19 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
st.typ = hashedNode
st.key = st.key[:0]

st.val = nil // Release reference to potentially externally held slice.

// Skip committing the non-root node if the size is smaller than 32 bytes
// as tiny nodes are always embedded in their parent except root node.
if len(blob) < 32 && len(path) > 0 {
st.val = common.CopyBytes(blob)
st.val = bPool.GetWithSize(len(blob))
copy(st.val, blob)
return
}
// Write the hash to the 'val'. We allocate a new val here to not mutate
// input values.
st.val = t.h.hashData(blob)
st.val = bPool.GetWithSize(32)
t.h.hashDataTo(st.val, blob)

// Invoke the callback it's provided. Notably, the path and blob slices are
// volatile, please deep-copy the slices in callback if the contents need
Expand Down
Loading

0 comments on commit d3cc618

Please sign in to comment.