github.com/ethereum/go-ethereum@v1.16.1/eth/protocols/snap/gentrie.go (about)

     1  // Copyright 2024 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snap
    18  
    19  import (
    20  	"bytes"
    21  
    22  	"github.com/ethereum/go-ethereum/common"
    23  	"github.com/ethereum/go-ethereum/core/rawdb"
    24  	"github.com/ethereum/go-ethereum/ethdb"
    25  	"github.com/ethereum/go-ethereum/trie"
    26  )
    27  
    28  // genTrie interface is used by the snap syncer to generate merkle tree nodes
    29  // based on a received batch of states.
    30  type genTrie interface {
    31  	// update inserts the state item into generator trie.
    32  	update(key, value []byte) error
    33  
    34  	// delete removes the state item from the generator trie.
    35  	delete(key []byte) error
    36  
    37  	// commit flushes the right boundary nodes if complete flag is true. This
    38  	// function must be called before flushing the associated database batch.
    39  	commit(complete bool) common.Hash
    40  }
    41  
    42  // pathTrie is a wrapper over the stackTrie, incorporating numerous additional
    43  // logics to handle the semi-completed trie and potential leftover dangling
    44  // nodes in the database. It is utilized for constructing the merkle tree nodes
    45  // in path mode during the snap sync process.
    46  type pathTrie struct {
    47  	owner common.Hash     // identifier of trie owner, empty for account trie
    48  	tr    *trie.StackTrie // underlying raw stack trie
    49  	first []byte          // the path of first committed node by stackTrie
    50  	last  []byte          // the path of last committed node by stackTrie
    51  
    52  	// This flag indicates whether nodes on the left boundary are skipped for
    53  	// committing. If set, the left boundary nodes are considered incomplete
    54  	// due to potentially missing left children.
    55  	skipLeftBoundary bool
    56  	db               ethdb.KeyValueReader
    57  	batch            ethdb.Batch
    58  }
    59  
    60  // newPathTrie initializes the path trie.
    61  func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueReader, batch ethdb.Batch) *pathTrie {
    62  	tr := &pathTrie{
    63  		owner:            owner,
    64  		skipLeftBoundary: skipLeftBoundary,
    65  		db:               db,
    66  		batch:            batch,
    67  	}
    68  	tr.tr = trie.NewStackTrie(tr.onTrieNode)
    69  	return tr
    70  }
    71  
    72  // onTrieNode is invoked whenever a new node is committed by the stackTrie.
    73  //
    74  // As the committed nodes might be incomplete if they are on the boundaries
    75  // (left or right), this function has the ability to detect the incomplete
    76  // ones and filter them out for committing.
    77  //
    78  // Additionally, the assumption is made that there may exist leftover dangling
    79  // nodes in the database. This function has the ability to detect the dangling
    80  // nodes that fall within the path space of committed nodes (specifically on
    81  // the path covered by internal extension nodes) and remove them from the
    82  // database. This property ensures that the entire path space is uniquely
    83  // occupied by committed nodes.
    84  //
    85  // Furthermore, all leftover dangling nodes along the path from committed nodes
    86  // to the trie root (left and right boundaries) should be removed as well;
    87  // otherwise, they might potentially disrupt the state healing process.
    88  func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
    89  	// Filter out the nodes on the left boundary if skipLeftBoundary is
    90  	// configured. Nodes are considered to be on the left boundary if
    91  	// it's the first one to be committed, or the parent/ancestor of the
    92  	// first committed node.
    93  	if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
    94  		if t.first == nil {
    95  			// Memorize the path of first committed node, which is regarded
    96  			// as left boundary. Deep-copy is necessary as the path given
    97  			// is volatile.
    98  			t.first = append([]byte{}, path...)
    99  
   100  			// The left boundary can be uniquely determined by the first committed node
   101  			// from stackTrie (e.g., N_1), as the shared path prefix between the first
   102  			// two inserted state items is deterministic (the path of N_3). The path
   103  			// from trie root towards the first committed node is considered the left
   104  			// boundary. The potential leftover dangling nodes on left boundary should
   105  			// be cleaned out.
   106  			//
   107  			//                            +-----+
   108  			//                            | N_3 | shared path prefix of state_1 and state_2
   109  			//                            +-----+
   110  			//                            /-   -\
   111  			//                       +-----+   +-----+
   112  			// First committed node  | N_1 |   | N_2 | latest inserted node (contain state_2)
   113  			//                       +-----+   +-----+
   114  			//
   115  			// The node with the path of the first committed one (e.g, N_1) is not
   116  			// removed because it's a sibling of the nodes we want to commit, not
   117  			// the parent or ancestor.
   118  			for i := 0; i < len(path); i++ {
   119  				t.deleteNode(path[:i], false)
   120  			}
   121  		}
   122  		return
   123  	}
   124  	// If boundary filtering is not configured, or the node is not on the left
   125  	// boundary, commit it to database.
   126  	//
   127  	// Note: If the current committed node is an extension node, then the nodes
   128  	// falling within the path between itself and its standalone (not embedded
   129  	// in parent) child should be cleaned out for exclusively occupy the inner
   130  	// path.
   131  	//
   132  	// This is essential in snap sync to avoid leaving dangling nodes within
   133  	// this range covered by extension node which could potentially break the
   134  	// state healing.
   135  	//
   136  	// The extension node is detected if its path is the prefix of last committed
   137  	// one and path gap is larger than one. If the path gap is only one byte,
   138  	// the current node could either be a full node, or an extension with single
   139  	// byte key. In either case, no gaps will be left in the path.
   140  	if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
   141  		for i := len(path) + 1; i < len(t.last); i++ {
   142  			t.deleteNode(t.last[:i], true)
   143  		}
   144  	}
   145  	t.write(path, blob)
   146  
   147  	// Update the last flag. Deep-copy is necessary as the provided path is volatile.
   148  	if t.last == nil {
   149  		t.last = append([]byte{}, path...)
   150  	} else {
   151  		t.last = append(t.last[:0], path...)
   152  	}
   153  }
   154  
   155  // write commits the node write to provided database batch in path mode.
   156  func (t *pathTrie) write(path []byte, blob []byte) {
   157  	if t.owner == (common.Hash{}) {
   158  		rawdb.WriteAccountTrieNode(t.batch, path, blob)
   159  	} else {
   160  		rawdb.WriteStorageTrieNode(t.batch, t.owner, path, blob)
   161  	}
   162  }
   163  
   164  func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
   165  	if inner {
   166  		accountInnerLookupGauge.Inc(1)
   167  	} else {
   168  		accountOuterLookupGauge.Inc(1)
   169  	}
   170  	if !rawdb.HasAccountTrieNode(t.db, path) {
   171  		return
   172  	}
   173  	if inner {
   174  		accountInnerDeleteGauge.Inc(1)
   175  	} else {
   176  		accountOuterDeleteGauge.Inc(1)
   177  	}
   178  	rawdb.DeleteAccountTrieNode(t.batch, path)
   179  }
   180  
   181  func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
   182  	if inner {
   183  		storageInnerLookupGauge.Inc(1)
   184  	} else {
   185  		storageOuterLookupGauge.Inc(1)
   186  	}
   187  	if !rawdb.HasStorageTrieNode(t.db, t.owner, path) {
   188  		return
   189  	}
   190  	if inner {
   191  		storageInnerDeleteGauge.Inc(1)
   192  	} else {
   193  		storageOuterDeleteGauge.Inc(1)
   194  	}
   195  	rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
   196  }
   197  
   198  // deleteNode commits the node deletion to provided database batch in path mode.
   199  func (t *pathTrie) deleteNode(path []byte, inner bool) {
   200  	if t.owner == (common.Hash{}) {
   201  		t.deleteAccountNode(path, inner)
   202  	} else {
   203  		t.deleteStorageNode(path, inner)
   204  	}
   205  }
   206  
   207  // update implements genTrie interface, inserting a (key, value) pair into the
   208  // stack trie.
   209  func (t *pathTrie) update(key, value []byte) error {
   210  	return t.tr.Update(key, value)
   211  }
   212  
   213  // delete implements genTrie interface, deleting the item from the stack trie.
   214  func (t *pathTrie) delete(key []byte) error {
   215  	// Commit the trie since the right boundary is incomplete because
   216  	// of the deleted item. This will implicitly discard the last inserted
   217  	// item and clean some ancestor trie nodes of the last committed
   218  	// item in the database.
   219  	t.commit(false)
   220  
   221  	// Reset the trie and all the internal trackers
   222  	t.first = nil
   223  	t.last = nil
   224  	t.tr.Reset()
   225  
   226  	// Explicitly mark the left boundary as incomplete, as the left-side
   227  	// item of the next one has been deleted. Be aware that the next item
   228  	// to be inserted will be ignored from committing as well as it's on
   229  	// the left boundary.
   230  	t.skipLeftBoundary = true
   231  
   232  	// Explicitly delete the potential leftover nodes on the specific
   233  	// path from the database.
   234  	tkey := t.tr.TrieKey(key)
   235  	for i := 0; i <= len(tkey); i++ {
   236  		t.deleteNode(tkey[:i], false)
   237  	}
   238  	return nil
   239  }
   240  
   241  // commit implements genTrie interface, flushing the right boundary if it's
   242  // considered as complete. Otherwise, the nodes on the right boundary are
   243  // discarded and cleaned up.
   244  //
   245  // Note, this function must be called before flushing database batch, otherwise,
   246  // dangling nodes might be left in database.
   247  func (t *pathTrie) commit(complete bool) common.Hash {
   248  	// If the right boundary is claimed as complete, flush them out.
   249  	// The nodes on both left and right boundary will still be filtered
   250  	// out if left boundary filtering is configured.
   251  	if complete {
   252  		// Commit all inserted but not yet committed nodes(on the right
   253  		// boundary) in the stackTrie.
   254  		hash := t.tr.Hash()
   255  		if t.skipLeftBoundary {
   256  			return common.Hash{} // hash is meaningless if left side is incomplete
   257  		}
   258  		return hash
   259  	}
   260  	// Discard nodes on the right boundary as it's claimed as incomplete. These
   261  	// nodes might be incomplete due to missing children on the right side.
   262  	// Furthermore, the potential leftover nodes on right boundary should also
   263  	// be cleaned out.
   264  	//
   265  	// The right boundary can be uniquely determined by the last committed node
   266  	// from stackTrie (e.g., N_1), as the shared path prefix between the last
   267  	// two inserted state items is deterministic (the path of N_3). The path
   268  	// from trie root towards the last committed node is considered the right
   269  	// boundary (root to N_3).
   270  	//
   271  	//                           +-----+
   272  	//                           | N_3 | shared path prefix of last two states
   273  	//                           +-----+
   274  	//                           /-   -\
   275  	//                      +-----+   +-----+
   276  	// Last committed node  | N_1 |   | N_2 | latest inserted node  (contain last state)
   277  	//                      +-----+   +-----+
   278  	//
   279  	// Another interesting scenario occurs when the trie is committed due to
   280  	// too many items being accumulated in the batch. To flush them out to
   281  	// the database, the path of the last inserted node (N_2) is temporarily
   282  	// treated as an incomplete right boundary, and nodes on this path are
   283  	// removed (e.g. from root to N_3).
   284  	// However, this path will be reclaimed as an internal path by inserting
   285  	// more items after the batch flush. New nodes on this path can be committed
   286  	// with no issues as they are actually complete. Also, from a database
   287  	// perspective, first deleting and then rewriting is a valid data update.
   288  	for i := 0; i < len(t.last); i++ {
   289  		t.deleteNode(t.last[:i], false)
   290  	}
   291  	return common.Hash{} // the hash is meaningless for incomplete commit
   292  }
   293  
   294  // hashTrie is a wrapper over the stackTrie for implementing genTrie interface.
   295  type hashTrie struct {
   296  	tr *trie.StackTrie
   297  }
   298  
   299  // newHashTrie initializes the hash trie.
   300  func newHashTrie(batch ethdb.Batch) *hashTrie {
   301  	return &hashTrie{tr: trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
   302  		rawdb.WriteLegacyTrieNode(batch, hash, blob)
   303  	})}
   304  }
   305  
   306  // update implements genTrie interface, inserting a (key, value) pair into
   307  // the stack trie.
   308  func (t *hashTrie) update(key, value []byte) error {
   309  	return t.tr.Update(key, value)
   310  }
   311  
   312  // delete implements genTrie interface, ignoring the state item for deleting.
   313  func (t *hashTrie) delete(key []byte) error { return nil }
   314  
   315  // commit implements genTrie interface, committing the nodes on right boundary.
   316  func (t *hashTrie) commit(complete bool) common.Hash {
   317  	if !complete {
   318  		return common.Hash{} // the hash is meaningless for incomplete commit
   319  	}
   320  	return t.tr.Hash() // return hash only if it's claimed as complete
   321  }