github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/trie/sync.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  
    23  	"github.com/ethw3/go-ethereuma/common"
    24  	"github.com/ethw3/go-ethereuma/common/prque"
    25  	"github.com/ethw3/go-ethereuma/core/rawdb"
    26  	"github.com/ethw3/go-ethereuma/ethdb"
    27  	"github.com/ethw3/go-ethereuma/log"
    28  )
    29  
    30  // ErrNotRequested is returned by the trie sync when it's requested to process a
    31  // node it did not request.
    32  var ErrNotRequested = errors.New("not requested")
    33  
    34  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    35  // node it already processed previously.
    36  var ErrAlreadyProcessed = errors.New("already processed")
    37  
    38  // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The
    39  // role of this value is to limit the number of trie nodes that get expanded in
    40  // memory if the node was configured with a significant number of peers.
    41  const maxFetchesPerDepth = 16384
    42  
    43  // SyncPath is a path tuple identifying a particular trie node either in a single
    44  // trie (account) or a layered trie (account -> storage).
    45  //
    46  // Content wise the tuple either has 1 element if it addresses a node in a single
    47  // trie or 2 elements if it addresses a node in a stacked trie.
    48  //
    49  // To support aiming arbitrary trie nodes, the path needs to support odd nibble
    50  // lengths. To avoid transferring expanded hex form over the network, the last
    51  // part of the tuple (which needs to index into the middle of a trie) is compact
    52  // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
    53  // simple binary encoded.
    54  //
    55  // Examples:
    56  //   - Path 0x9  -> {0x19}
    57  //   - Path 0x99 -> {0x0099}
    58  //   - Path 0x01234567890123456789012345678901012345678901234567890123456789019  -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
    59  //   - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
    60  type SyncPath [][]byte
    61  
    62  // NewSyncPath converts an expanded trie path from nibble form into a compact
    63  // version that can be sent over the network.
    64  func NewSyncPath(path []byte) SyncPath {
    65  	// If the hash is from the account trie, append a single item, if it
    66  	// is from the a storage trie, append a tuple. Note, the length 64 is
    67  	// clashing between account leaf and storage root. It's fine though
    68  	// because having a trie node at 64 depth means a hash collision was
    69  	// found and we're long dead.
    70  	if len(path) < 64 {
    71  		return SyncPath{hexToCompact(path)}
    72  	}
    73  	return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
    74  }
    75  
    76  // nodeRequest represents a scheduled or already in-flight trie node retrieval request.
    77  type nodeRequest struct {
    78  	hash common.Hash // Hash of the trie node to retrieve
    79  	path []byte      // Merkle path leading to this node for prioritization
    80  	data []byte      // Data content of the node, cached until all subtrees complete
    81  
    82  	parent   *nodeRequest // Parent state node referencing this entry
    83  	deps     int          // Number of dependencies before allowed to commit this node
    84  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
    85  }
    86  
    87  // codeRequest represents a scheduled or already in-flight bytecode retrieval request.
    88  type codeRequest struct {
    89  	hash    common.Hash    // Hash of the contract bytecode to retrieve
    90  	path    []byte         // Merkle path leading to this node for prioritization
    91  	data    []byte         // Data content of the node, cached until all subtrees complete
    92  	parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion)
    93  }
    94  
    95  // NodeSyncResult is a response with requested trie node along with its node path.
    96  type NodeSyncResult struct {
    97  	Path string // Path of the originally unknown trie node
    98  	Data []byte // Data content of the retrieved trie node
    99  }
   100  
   101  // CodeSyncResult is a response with requested bytecode along with its hash.
   102  type CodeSyncResult struct {
   103  	Hash common.Hash // Hash the originally unknown bytecode
   104  	Data []byte      // Data content of the retrieved bytecode
   105  }
   106  
   107  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
   108  // persisted data items.
   109  type syncMemBatch struct {
   110  	nodes  map[string][]byte      // In-memory membatch of recently completed nodes
   111  	hashes map[string]common.Hash // Hashes of recently completed nodes
   112  	codes  map[common.Hash][]byte // In-memory membatch of recently completed codes
   113  }
   114  
   115  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
   116  func newSyncMemBatch() *syncMemBatch {
   117  	return &syncMemBatch{
   118  		nodes:  make(map[string][]byte),
   119  		hashes: make(map[string]common.Hash),
   120  		codes:  make(map[common.Hash][]byte),
   121  	}
   122  }
   123  
   124  // hasNode reports the trie node with specific path is already cached.
   125  func (batch *syncMemBatch) hasNode(path []byte) bool {
   126  	_, ok := batch.nodes[string(path)]
   127  	return ok
   128  }
   129  
   130  // hasCode reports the contract code with specific hash is already cached.
   131  func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
   132  	_, ok := batch.codes[hash]
   133  	return ok
   134  }
   135  
   136  // Sync is the main state trie synchronisation scheduler, which provides yet
   137  // unknown trie hashes to retrieve, accepts node data associated with said hashes
   138  // and reconstructs the trie step by step until all is done.
   139  type Sync struct {
   140  	database ethdb.KeyValueReader         // Persistent database to check for existing entries
   141  	membatch *syncMemBatch                // Memory buffer to avoid frequent database writes
   142  	nodeReqs map[string]*nodeRequest      // Pending requests pertaining to a trie node path
   143  	codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash
   144  	queue    *prque.Prque                 // Priority queue with the pending requests
   145  	fetches  map[int]int                  // Number of active fetches per trie node depth
   146  }
   147  
   148  // NewSync creates a new trie data download scheduler.
   149  func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback) *Sync {
   150  	ts := &Sync{
   151  		database: database,
   152  		membatch: newSyncMemBatch(),
   153  		nodeReqs: make(map[string]*nodeRequest),
   154  		codeReqs: make(map[common.Hash]*codeRequest),
   155  		queue:    prque.New(nil),
   156  		fetches:  make(map[int]int),
   157  	}
   158  	ts.AddSubTrie(root, nil, common.Hash{}, nil, callback)
   159  	return ts
   160  }
   161  
   162  // AddSubTrie registers a new trie to the sync code, rooted at the designated
   163  // parent for completion tracking. The given path is a unique node path in
   164  // hex format and contain all the parent path if it's layered trie node.
   165  func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) {
   166  	// Short circuit if the trie is empty or already known
   167  	if root == emptyRoot {
   168  		return
   169  	}
   170  	if s.membatch.hasNode(path) {
   171  		return
   172  	}
   173  	if rawdb.HasTrieNode(s.database, root) {
   174  		return
   175  	}
   176  	// Assemble the new sub-trie sync request
   177  	req := &nodeRequest{
   178  		hash:     root,
   179  		path:     path,
   180  		callback: callback,
   181  	}
   182  	// If this sub-trie has a designated parent, link them together
   183  	if parent != (common.Hash{}) {
   184  		ancestor := s.nodeReqs[string(parentPath)]
   185  		if ancestor == nil {
   186  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   187  		}
   188  		ancestor.deps++
   189  		req.parent = ancestor
   190  	}
   191  	s.scheduleNodeRequest(req)
   192  }
   193  
   194  // AddCodeEntry schedules the direct retrieval of a contract code that should not
   195  // be interpreted as a trie node, but rather accepted and stored into the database
   196  // as is.
   197  func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) {
   198  	// Short circuit if the entry is empty or already known
   199  	if hash == emptyState {
   200  		return
   201  	}
   202  	if s.membatch.hasCode(hash) {
   203  		return
   204  	}
   205  	// If database says duplicate, the blob is present for sure.
   206  	// Note we only check the existence with new code scheme, fast
   207  	// sync is expected to run with a fresh new node. Even there
   208  	// exists the code with legacy format, fetch and store with
   209  	// new scheme anyway.
   210  	if rawdb.HasCodeWithPrefix(s.database, hash) {
   211  		return
   212  	}
   213  	// Assemble the new sub-trie sync request
   214  	req := &codeRequest{
   215  		path: path,
   216  		hash: hash,
   217  	}
   218  	// If this sub-trie has a designated parent, link them together
   219  	if parent != (common.Hash{}) {
   220  		ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq
   221  		if ancestor == nil {
   222  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   223  		}
   224  		ancestor.deps++
   225  		req.parents = append(req.parents, ancestor)
   226  	}
   227  	s.scheduleCodeRequest(req)
   228  }
   229  
   230  // Missing retrieves the known missing nodes from the trie for retrieval. To aid
   231  // both eth/6x style fast sync and snap/1x style state sync, the paths of trie
   232  // nodes are returned too, as well as separate hash list for codes.
   233  func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) {
   234  	var (
   235  		nodePaths  []string
   236  		nodeHashes []common.Hash
   237  		codeHashes []common.Hash
   238  	)
   239  	for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
   240  		// Retrieve the next item in line
   241  		item, prio := s.queue.Peek()
   242  
   243  		// If we have too many already-pending tasks for this depth, throttle
   244  		depth := int(prio >> 56)
   245  		if s.fetches[depth] > maxFetchesPerDepth {
   246  			break
   247  		}
   248  		// Item is allowed to be scheduled, add it to the task list
   249  		s.queue.Pop()
   250  		s.fetches[depth]++
   251  
   252  		switch item := item.(type) {
   253  		case common.Hash:
   254  			codeHashes = append(codeHashes, item)
   255  		case string:
   256  			req, ok := s.nodeReqs[item]
   257  			if !ok {
   258  				log.Error("Missing node request", "path", item)
   259  				continue // System very wrong, shouldn't happen
   260  			}
   261  			nodePaths = append(nodePaths, item)
   262  			nodeHashes = append(nodeHashes, req.hash)
   263  		}
   264  	}
   265  	return nodePaths, nodeHashes, codeHashes
   266  }
   267  
   268  // ProcessCode injects the received data for requested item. Note it can
   269  // happpen that the single response commits two pending requests(e.g.
   270  // there are two requests one for code and one for node but the hash
   271  // is same). In this case the second response for the same hash will
   272  // be treated as "non-requested" item or "already-processed" item but
   273  // there is no downside.
   274  func (s *Sync) ProcessCode(result CodeSyncResult) error {
   275  	// If the code was not requested or it's already processed, bail out
   276  	req := s.codeReqs[result.Hash]
   277  	if req == nil {
   278  		return ErrNotRequested
   279  	}
   280  	if req.data != nil {
   281  		return ErrAlreadyProcessed
   282  	}
   283  	req.data = result.Data
   284  	return s.commitCodeRequest(req)
   285  }
   286  
   287  // ProcessNode injects the received data for requested item. Note it can
   288  // happen that the single response commits two pending requests(e.g.
   289  // there are two requests one for code and one for node but the hash
   290  // is same). In this case the second response for the same hash will
   291  // be treated as "non-requested" item or "already-processed" item but
   292  // there is no downside.
   293  func (s *Sync) ProcessNode(result NodeSyncResult) error {
   294  	// If the trie node was not requested or it's already processed, bail out
   295  	req := s.nodeReqs[result.Path]
   296  	if req == nil {
   297  		return ErrNotRequested
   298  	}
   299  	if req.data != nil {
   300  		return ErrAlreadyProcessed
   301  	}
   302  	// Decode the node data content and update the request
   303  	node, err := decodeNode(req.hash.Bytes(), result.Data)
   304  	if err != nil {
   305  		return err
   306  	}
   307  	req.data = result.Data
   308  
   309  	// Create and schedule a request for all the children nodes
   310  	requests, err := s.children(req, node)
   311  	if err != nil {
   312  		return err
   313  	}
   314  	if len(requests) == 0 && req.deps == 0 {
   315  		s.commitNodeRequest(req)
   316  	} else {
   317  		req.deps += len(requests)
   318  		for _, child := range requests {
   319  			s.scheduleNodeRequest(child)
   320  		}
   321  	}
   322  	return nil
   323  }
   324  
   325  // Commit flushes the data stored in the internal membatch out to persistent
   326  // storage, returning any occurred error.
   327  func (s *Sync) Commit(dbw ethdb.Batch) error {
   328  	// Dump the membatch into a database dbw
   329  	for path, value := range s.membatch.nodes {
   330  		rawdb.WriteTrieNode(dbw, s.membatch.hashes[path], value)
   331  	}
   332  	for hash, value := range s.membatch.codes {
   333  		rawdb.WriteCode(dbw, hash, value)
   334  	}
   335  	// Drop the membatch data and return
   336  	s.membatch = newSyncMemBatch()
   337  	return nil
   338  }
   339  
   340  // Pending returns the number of state entries currently pending for download.
   341  func (s *Sync) Pending() int {
   342  	return len(s.nodeReqs) + len(s.codeReqs)
   343  }
   344  
   345  // schedule inserts a new state retrieval request into the fetch queue. If there
   346  // is already a pending request for this node, the new request will be discarded
   347  // and only a parent reference added to the old one.
   348  func (s *Sync) scheduleNodeRequest(req *nodeRequest) {
   349  	s.nodeReqs[string(req.path)] = req
   350  
   351  	// Schedule the request for future retrieval. This queue is shared
   352  	// by both node requests and code requests.
   353  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   354  	for i := 0; i < 14 && i < len(req.path); i++ {
   355  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   356  	}
   357  	s.queue.Push(string(req.path), prio)
   358  }
   359  
   360  // schedule inserts a new state retrieval request into the fetch queue. If there
   361  // is already a pending request for this node, the new request will be discarded
   362  // and only a parent reference added to the old one.
   363  func (s *Sync) scheduleCodeRequest(req *codeRequest) {
   364  	// If we're already requesting this node, add a new reference and stop
   365  	if old, ok := s.codeReqs[req.hash]; ok {
   366  		old.parents = append(old.parents, req.parents...)
   367  		return
   368  	}
   369  	s.codeReqs[req.hash] = req
   370  
   371  	// Schedule the request for future retrieval. This queue is shared
   372  	// by both node requests and code requests.
   373  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   374  	for i := 0; i < 14 && i < len(req.path); i++ {
   375  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   376  	}
   377  	s.queue.Push(req.hash, prio)
   378  }
   379  
   380  // children retrieves all the missing children of a state trie entry for future
   381  // retrieval scheduling.
   382  func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
   383  	// Gather all the children of the node, irrelevant whether known or not
   384  	type child struct {
   385  		path []byte
   386  		node node
   387  	}
   388  	var children []child
   389  
   390  	switch node := (object).(type) {
   391  	case *shortNode:
   392  		key := node.Key
   393  		if hasTerm(key) {
   394  			key = key[:len(key)-1]
   395  		}
   396  		children = []child{{
   397  			node: node.Val,
   398  			path: append(append([]byte(nil), req.path...), key...),
   399  		}}
   400  	case *fullNode:
   401  		for i := 0; i < 17; i++ {
   402  			if node.Children[i] != nil {
   403  				children = append(children, child{
   404  					node: node.Children[i],
   405  					path: append(append([]byte(nil), req.path...), byte(i)),
   406  				})
   407  			}
   408  		}
   409  	default:
   410  		panic(fmt.Sprintf("unknown node: %+v", node))
   411  	}
   412  	// Iterate over the children, and request all unknown ones
   413  	requests := make([]*nodeRequest, 0, len(children))
   414  	for _, child := range children {
   415  		// Notify any external watcher of a new key/value node
   416  		if req.callback != nil {
   417  			if node, ok := (child.node).(valueNode); ok {
   418  				var paths [][]byte
   419  				if len(child.path) == 2*common.HashLength {
   420  					paths = append(paths, hexToKeybytes(child.path))
   421  				} else if len(child.path) == 4*common.HashLength {
   422  					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
   423  					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
   424  				}
   425  				if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil {
   426  					return nil, err
   427  				}
   428  			}
   429  		}
   430  		// If the child references another node, resolve or schedule
   431  		if node, ok := (child.node).(hashNode); ok {
   432  			// Try to resolve the node from the local database
   433  			if s.membatch.hasNode(child.path) {
   434  				continue
   435  			}
   436  			// If database says duplicate, then at least the trie node is present
   437  			// and we hold the assumption that it's NOT legacy contract code.
   438  			chash := common.BytesToHash(node)
   439  			if rawdb.HasTrieNode(s.database, chash) {
   440  				continue
   441  			}
   442  			// Locally unknown node, schedule for retrieval
   443  			requests = append(requests, &nodeRequest{
   444  				path:     child.path,
   445  				hash:     chash,
   446  				parent:   req,
   447  				callback: req.callback,
   448  			})
   449  		}
   450  	}
   451  	return requests, nil
   452  }
   453  
   454  // commit finalizes a retrieval request and stores it into the membatch. If any
   455  // of the referencing parent requests complete due to this commit, they are also
   456  // committed themselves.
   457  func (s *Sync) commitNodeRequest(req *nodeRequest) error {
   458  	// Write the node content to the membatch
   459  	s.membatch.nodes[string(req.path)] = req.data
   460  	s.membatch.hashes[string(req.path)] = req.hash
   461  
   462  	delete(s.nodeReqs, string(req.path))
   463  	s.fetches[len(req.path)]--
   464  
   465  	// Check parent for completion
   466  	if req.parent != nil {
   467  		req.parent.deps--
   468  		if req.parent.deps == 0 {
   469  			if err := s.commitNodeRequest(req.parent); err != nil {
   470  				return err
   471  			}
   472  		}
   473  	}
   474  	return nil
   475  }
   476  
   477  // commit finalizes a retrieval request and stores it into the membatch. If any
   478  // of the referencing parent requests complete due to this commit, they are also
   479  // committed themselves.
   480  func (s *Sync) commitCodeRequest(req *codeRequest) error {
   481  	// Write the node content to the membatch
   482  	s.membatch.codes[req.hash] = req.data
   483  	delete(s.codeReqs, req.hash)
   484  	s.fetches[len(req.path)]--
   485  
   486  	// Check all parents for completion
   487  	for _, parent := range req.parents {
   488  		parent.deps--
   489  		if parent.deps == 0 {
   490  			if err := s.commitNodeRequest(parent); err != nil {
   491  				return err
   492  			}
   493  		}
   494  	}
   495  	return nil
   496  }