trie: reduce allocations in stacktrie (#30743)

This PR uses various tweaks and tricks to make the stacktrie near alloc-free. ``` [user@work go-ethereum]$ benchstat stacktrie.1 stacktrie.7 goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/trie cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ stacktrie.1 │ stacktrie.7 │ │ sec/op │ sec/op vs base │ Insert100K-8 106.97m ± 8% 88.21m ± 34% -17.54% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ B/op │ B/op vs base │ Insert100K-8 13199.608Ki ± 0% 3.424Ki ± 3% -99.97% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ allocs/op │ allocs/op vs base │ Insert100K-8 553428.50 ± 0% 22.00 ± 5% -100.00% (p=0.000 n=10) ``` Also improves derivesha: ``` goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/core/types cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ derivesha.1 │ derivesha.2 │ │ sec/op │ sec/op vs base │ DeriveSha200/stack_trie-8 477.8µ ± 2% 430.0µ ± 12% -10.00% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ B/op │ B/op vs base │ DeriveSha200/stack_trie-8 45.17Ki ± 0% 25.65Ki ± 0% -43.21% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ allocs/op │ allocs/op vs base │ DeriveSha200/stack_trie-8 1259.0 ± 0% 232.0 ± 0% -81.57% (p=0.000 n=10) ``` --------- Co-authored-by: Gary Rong <[email protected]>
ethereum · Jan 23, 2025 · d3cc618 · d3cc618
1 parent a840e9b
commit d3cc618
Show file tree

Hide file tree

Showing 7 changed files with 238 additions and 39 deletions.
diff --git a/trie/bytepool.go b/trie/bytepool.go
@@ -0,0 +1,64 @@
+// Copyright 2024 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package trie
+
+// bytesPool is a pool for byte slices. It is safe for concurrent use.
+type bytesPool struct {
+	c chan []byte
+	w int
+}
+
+// newBytesPool creates a new bytesPool. The sliceCap sets the capacity of
+// newly allocated slices, and the nitems determines how many items the pool
+// will hold, at maximum.
+func newBytesPool(sliceCap, nitems int) *bytesPool {
+	return &bytesPool{
+		c: make(chan []byte, nitems),
+		w: sliceCap,
+	}
+}
+
+// Get returns a slice. Safe for concurrent use.
+func (bp *bytesPool) Get() []byte {
+	select {
+	case b := <-bp.c:
+		return b
+	default:
+		return make([]byte, 0, bp.w)
+	}
+}
+
+// GetWithSize returns a slice with specified byte slice size.
+func (bp *bytesPool) GetWithSize(s int) []byte {
+	b := bp.Get()
+	if cap(b) < s {
+		return make([]byte, s)
+	}
+	return b[:s]
+}
+
+// Put returns a slice to the pool. Safe for concurrent use. This method
+// will ignore slices that are too small or too large (>3x the cap)
+func (bp *bytesPool) Put(b []byte) {
+	if c := cap(b); c < bp.w || c > 3*bp.w {
+		return
+	}
+	select {
+	case bp.c <- b:
+	default:
+	}
+}
diff --git a/trie/encoding.go b/trie/encoding.go
@@ -104,6 +104,18 @@ func keybytesToHex(str []byte) []byte {
 	return nibbles
 }
 
+// writeHexKey writes the hexkey into the given slice.
+// OBS! This method omits the termination flag.
+// OBS! The dst slice must be at least 2x as large as the key
+func writeHexKey(dst []byte, key []byte) []byte {
+	_ = dst[2*len(key)-1]
+	for i, b := range key {
+		dst[i*2] = b / 16
+		dst[i*2+1] = b % 16
+	}
+	return dst[:2*len(key)]
+}
+
 // hexToKeybytes turns hex nibbles into key bytes.
 // This can only be used for keys of even length.
 func hexToKeybytes(hex []byte) []byte {

diff --git a/trie/hasher.go b/trie/hasher.go
@@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode {
 	return n
 }
 
+// hashDataTo hashes the provided data to the given destination buffer. The caller
+// must ensure that the dst buffer is of appropriate size.
+func (h *hasher) hashDataTo(dst, data []byte) {
+	h.sha.Reset()
+	h.sha.Write(data)
+	h.sha.Read(dst)
+}
+
 // proofHash is used to construct trie proofs, and returns the 'collapsed'
 // node (for later RLP encoding) as well as the hashed node -- unless the
 // node is smaller than 32 bytes, in which case it will be returned as is.

diff --git a/trie/node.go b/trie/node.go
@@ -45,6 +45,27 @@ type (
 	}
 	hashNode  []byte
 	valueNode []byte
+
+	// fullnodeEncoder is a type used exclusively for encoding fullNode.
+	// Briefly instantiating a fullnodeEncoder and initializing with
+	// existing slices is less memory intense than using the fullNode type.
+	fullnodeEncoder struct {
+		Children [17][]byte
+	}
+
+	// extNodeEncoder is a type used exclusively for encoding extension node.
+	// Briefly instantiating a extNodeEncoder and initializing with existing
+	// slices is less memory intense than using the shortNode type.
+	extNodeEncoder struct {
+		Key []byte
+		Val []byte
+	}
+
+	// leafNodeEncoder is a type used exclusively for encoding leaf node.
+	leafNodeEncoder struct {
+		Key []byte
+		Val []byte
+	}
 )
 
 // nilValueNode is used when collapsing internal trie nodes for hashing, since
@@ -89,6 +110,7 @@ func (n *fullNode) fstring(ind string) string {
 	}
 	return resp + fmt.Sprintf("\n%s] ", ind)
 }
+
 func (n *shortNode) fstring(ind string) string {
 	return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+"  "))
 }
@@ -99,19 +121,6 @@ func (n valueNode) fstring(ind string) string {
 	return fmt.Sprintf("%x ", []byte(n))
 }
 
-// rawNode is a simple binary blob used to differentiate between collapsed trie
-// nodes and already encoded RLP binary blobs (while at the same time store them
-// in the same cache fields).
-type rawNode []byte
-
-func (n rawNode) cache() (hashNode, bool)   { panic("this should never end up in a live trie") }
-func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
-
-func (n rawNode) EncodeRLP(w io.Writer) error {
-	_, err := w.Write(n)
-	return err
-}
-
 // mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
 func mustDecodeNode(hash, buf []byte) node {
 	n, err := decodeNode(hash, buf)

diff --git a/trie/node_enc.go b/trie/node_enc.go
@@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
 	w.ListEnd(offset)
 }
 
+func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
+	offset := w.List()
+	for _, c := range n.Children {
+		if c == nil {
+			w.Write(rlp.EmptyString)
+		} else if len(c) < 32 {
+			w.Write(c) // rawNode
+		} else {
+			w.WriteBytes(c) // hashNode
+		}
+	}
+	w.ListEnd(offset)
+}
+
 func (n *shortNode) encode(w rlp.EncoderBuffer) {
 	offset := w.List()
 	w.WriteBytes(n.Key)
@@ -51,14 +65,31 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) {
 	w.ListEnd(offset)
 }
 
+func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
+	offset := w.List()
+	w.WriteBytes(n.Key)
+
+	if n.Val == nil {
+		w.Write(rlp.EmptyString)
+	} else if len(n.Val) < 32 {
+		w.Write(n.Val) // rawNode
+	} else {
+		w.WriteBytes(n.Val) // hashNode
+	}
+	w.ListEnd(offset)
+}
+
+func (n *leafNodeEncoder) encode(w rlp.EncoderBuffer) {
+	offset := w.List()
+	w.WriteBytes(n.Key) // Compact format key
+	w.WriteBytes(n.Val) // Value node, must be non-nil
+	w.ListEnd(offset)
+}
+
 func (n hashNode) encode(w rlp.EncoderBuffer) {
 	w.WriteBytes(n)
 }
 
 func (n valueNode) encode(w rlp.EncoderBuffer) {
 	w.WriteBytes(n)
 }
-
-func (n rawNode) encode(w rlp.EncoderBuffer) {
-	w.Write(n)
-}
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
@@ -27,6 +27,7 @@ import (
 
 var (
 	stPool = sync.Pool{New: func() any { return new(stNode) }}
+	bPool  = newBytesPool(32, 100)
 	_      = types.TrieHasher((*StackTrie)(nil))
 )
 
@@ -47,6 +48,8 @@ type StackTrie struct {
 	h          *hasher
 	last       []byte
 	onTrieNode OnTrieNode
+	kBuf       []byte // buf space used for hex-key during insertions
+	pBuf       []byte // buf space used for path during insertions
 }
 
 // NewStackTrie allocates and initializes an empty trie. The committed nodes
@@ -56,6 +59,17 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
 		root:       stPool.Get().(*stNode),
 		h:          newHasher(false),
 		onTrieNode: onTrieNode,
+		kBuf:       make([]byte, 64),
+		pBuf:       make([]byte, 64),
+	}
+}
+
+func (t *StackTrie) grow(key []byte) {
+	if cap(t.kBuf) < 2*len(key) {
+		t.kBuf = make([]byte, 2*len(key))
+	}
+	if cap(t.pBuf) < 2*len(key) {
+		t.pBuf = make([]byte, 2*len(key))
 	}
 }
 
@@ -64,7 +78,8 @@ func (t *StackTrie) Update(key, value []byte) error {
 	if len(value) == 0 {
 		return errors.New("trying to insert empty (deletion)")
 	}
-	k := t.TrieKey(key)
+	t.grow(key)
+	k := writeHexKey(t.kBuf, key)
 	if bytes.Compare(t.last, k) >= 0 {
 		return errors.New("non-ascending key order")
 	}
@@ -73,7 +88,7 @@ func (t *StackTrie) Update(key, value []byte) error {
 	} else {
 		t.last = append(t.last[:0], k...) // reuse key slice
 	}
-	t.insert(t.root, k, value, nil)
+	t.insert(t.root, k, value, t.pBuf[:0])
 	return nil
 }
 
@@ -129,6 +144,12 @@ const (
 )
 
 func (n *stNode) reset() *stNode {
+	if n.typ == hashedNode {
+		// On hashnodes, we 'own' the val: it is guaranteed to be not held
+		// by external caller. Hence, when we arrive here, we can put it back
+		// into the pool
+		bPool.Put(n.val)
+	}
 	n.key = n.key[:0]
 	n.val = nil
 	for i := range n.children {
@@ -150,8 +171,12 @@ func (n *stNode) getDiffIndex(key []byte) int {
 	return len(n.key)
 }
 
-// Helper function to that inserts a (key, value) pair into
-// the trie.
+// Helper function to that inserts a (key, value) pair into the trie.
+//
+//   - The key is not retained by this method, but always copied if needed.
+//   - The value is retained by this method, as long as the leaf that it represents
+//     remains unhashed. However: it is never modified.
+//   - The path is not retained by this method.
 func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 	switch st.typ {
 	case branchNode: /* Branch */
@@ -283,7 +308,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
 
 	case emptyNode: /* Empty */
 		st.typ = leafNode
-		st.key = key
+		st.key = append(st.key, key...) // deep-copy the key as it's volatile
 		st.val = value
 
 	case hashedNode:
@@ -318,35 +343,33 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 		return
 
 	case branchNode:
-		var nodes fullNode
+		var nodes fullnodeEncoder
 		for i, child := range st.children {
 			if child == nil {
-				nodes.Children[i] = nilValueNode
 				continue
 			}
 			t.hash(child, append(path, byte(i)))
+			nodes.Children[i] = child.val
+		}
+		nodes.encode(t.h.encbuf)
+		blob = t.h.encodedBytes()
 
-			if len(child.val) < 32 {
-				nodes.Children[i] = rawNode(child.val)
-			} else {
-				nodes.Children[i] = hashNode(child.val)
+		for i, child := range st.children {
+			if child == nil {
+				continue
 			}
 			st.children[i] = nil
 			stPool.Put(child.reset()) // Release child back to pool.
 		}
-		nodes.encode(t.h.encbuf)
-		blob = t.h.encodedBytes()
 
 	case extNode:
 		// recursively hash and commit child as the first step
 		t.hash(st.children[0], append(path, st.key...))
 
 		// encode the extension node
-		n := shortNode{Key: hexToCompactInPlace(st.key)}
-		if len(st.children[0].val) < 32 {
-			n.Val = rawNode(st.children[0].val)
-		} else {
-			n.Val = hashNode(st.children[0].val)
+		n := extNodeEncoder{
+			Key: hexToCompactInPlace(st.key),
+			Val: st.children[0].val,
 		}
 		n.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
@@ -356,8 +379,10 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 
 	case leafNode:
 		st.key = append(st.key, byte(16))
-		n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)}
-
+		n := leafNodeEncoder{
+			Key: hexToCompactInPlace(st.key),
+			Val: st.val,
+		}
 		n.encode(t.h.encbuf)
 		blob = t.h.encodedBytes()
 
@@ -368,15 +393,19 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
 	st.typ = hashedNode
 	st.key = st.key[:0]
 
+	st.val = nil // Release reference to potentially externally held slice.
+
 	// Skip committing the non-root node if the size is smaller than 32 bytes
 	// as tiny nodes are always embedded in their parent except root node.
 	if len(blob) < 32 && len(path) > 0 {
-		st.val = common.CopyBytes(blob)
+		st.val = bPool.GetWithSize(len(blob))
+		copy(st.val, blob)
 		return
 	}
 	// Write the hash to the 'val'. We allocate a new val here to not mutate
 	// input values.
-	st.val = t.h.hashData(blob)
+	st.val = bPool.GetWithSize(32)
+	t.h.hashDataTo(st.val, blob)
 
 	// Invoke the callback it's provided. Notably, the path and blob slices are
 	// volatile, please deep-copy the slices in callback if the contents need