github.com/bcnmy/go-ethereum@v1.10.27/trie/sync.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"sync"
    23  
    24  	"github.com/ethereum/go-ethereum/common"
    25  	"github.com/ethereum/go-ethereum/common/prque"
    26  	"github.com/ethereum/go-ethereum/core/rawdb"
    27  	"github.com/ethereum/go-ethereum/ethdb"
    28  	"github.com/ethereum/go-ethereum/log"
    29  )
    30  
    31  // ErrNotRequested is returned by the trie sync when it's requested to process a
    32  // node it did not request.
    33  var ErrNotRequested = errors.New("not requested")
    34  
    35  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    36  // node it already processed previously.
    37  var ErrAlreadyProcessed = errors.New("already processed")
    38  
    39  // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The
    40  // role of this value is to limit the number of trie nodes that get expanded in
    41  // memory if the node was configured with a significant number of peers.
    42  const maxFetchesPerDepth = 16384
    43  
    44  // SyncPath is a path tuple identifying a particular trie node either in a single
    45  // trie (account) or a layered trie (account -> storage).
    46  //
    47  // Content wise the tuple either has 1 element if it addresses a node in a single
    48  // trie or 2 elements if it addresses a node in a stacked trie.
    49  //
    50  // To support aiming arbitrary trie nodes, the path needs to support odd nibble
    51  // lengths. To avoid transferring expanded hex form over the network, the last
    52  // part of the tuple (which needs to index into the middle of a trie) is compact
    53  // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
    54  // simple binary encoded.
    55  //
    56  // Examples:
    57  //   - Path 0x9  -> {0x19}
    58  //   - Path 0x99 -> {0x0099}
    59  //   - Path 0x01234567890123456789012345678901012345678901234567890123456789019  -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
    60  //   - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
    61  type SyncPath [][]byte
    62  
    63  // NewSyncPath converts an expanded trie path from nibble form into a compact
    64  // version that can be sent over the network.
    65  func NewSyncPath(path []byte) SyncPath {
    66  	// If the hash is from the account trie, append a single item, if it
    67  	// is from the a storage trie, append a tuple. Note, the length 64 is
    68  	// clashing between account leaf and storage root. It's fine though
    69  	// because having a trie node at 64 depth means a hash collision was
    70  	// found and we're long dead.
    71  	if len(path) < 64 {
    72  		return SyncPath{hexToCompact(path)}
    73  	}
    74  	return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
    75  }
    76  
    77  // nodeRequest represents a scheduled or already in-flight trie node retrieval request.
    78  type nodeRequest struct {
    79  	hash common.Hash // Hash of the trie node to retrieve
    80  	path []byte      // Merkle path leading to this node for prioritization
    81  	data []byte      // Data content of the node, cached until all subtrees complete
    82  
    83  	parent   *nodeRequest // Parent state node referencing this entry
    84  	deps     int          // Number of dependencies before allowed to commit this node
    85  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
    86  }
    87  
    88  // codeRequest represents a scheduled or already in-flight bytecode retrieval request.
    89  type codeRequest struct {
    90  	hash    common.Hash    // Hash of the contract bytecode to retrieve
    91  	path    []byte         // Merkle path leading to this node for prioritization
    92  	data    []byte         // Data content of the node, cached until all subtrees complete
    93  	parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion)
    94  }
    95  
    96  // NodeSyncResult is a response with requested trie node along with its node path.
    97  type NodeSyncResult struct {
    98  	Path string // Path of the originally unknown trie node
    99  	Data []byte // Data content of the retrieved trie node
   100  }
   101  
   102  // CodeSyncResult is a response with requested bytecode along with its hash.
   103  type CodeSyncResult struct {
   104  	Hash common.Hash // Hash the originally unknown bytecode
   105  	Data []byte      // Data content of the retrieved bytecode
   106  }
   107  
   108  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
   109  // persisted data items.
   110  type syncMemBatch struct {
   111  	nodes  map[string][]byte      // In-memory membatch of recently completed nodes
   112  	hashes map[string]common.Hash // Hashes of recently completed nodes
   113  	codes  map[common.Hash][]byte // In-memory membatch of recently completed codes
   114  }
   115  
   116  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
   117  func newSyncMemBatch() *syncMemBatch {
   118  	return &syncMemBatch{
   119  		nodes:  make(map[string][]byte),
   120  		hashes: make(map[string]common.Hash),
   121  		codes:  make(map[common.Hash][]byte),
   122  	}
   123  }
   124  
   125  // hasNode reports the trie node with specific path is already cached.
   126  func (batch *syncMemBatch) hasNode(path []byte) bool {
   127  	_, ok := batch.nodes[string(path)]
   128  	return ok
   129  }
   130  
   131  // hasCode reports the contract code with specific hash is already cached.
   132  func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
   133  	_, ok := batch.codes[hash]
   134  	return ok
   135  }
   136  
   137  // Sync is the main state trie synchronisation scheduler, which provides yet
   138  // unknown trie hashes to retrieve, accepts node data associated with said hashes
   139  // and reconstructs the trie step by step until all is done.
   140  type Sync struct {
   141  	database ethdb.KeyValueReader         // Persistent database to check for existing entries
   142  	membatch *syncMemBatch                // Memory buffer to avoid frequent database writes
   143  	nodeReqs map[string]*nodeRequest      // Pending requests pertaining to a trie node path
   144  	codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash
   145  	queue    *prque.Prque                 // Priority queue with the pending requests
   146  	fetches  map[int]int                  // Number of active fetches per trie node depth
   147  }
   148  
   149  // NewSync creates a new trie data download scheduler.
   150  func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback) *Sync {
   151  	ts := &Sync{
   152  		database: database,
   153  		membatch: newSyncMemBatch(),
   154  		nodeReqs: make(map[string]*nodeRequest),
   155  		codeReqs: make(map[common.Hash]*codeRequest),
   156  		queue:    prque.New(nil),
   157  		fetches:  make(map[int]int),
   158  	}
   159  	ts.AddSubTrie(root, nil, common.Hash{}, nil, callback)
   160  	return ts
   161  }
   162  
   163  // AddSubTrie registers a new trie to the sync code, rooted at the designated
   164  // parent for completion tracking. The given path is a unique node path in
   165  // hex format and contain all the parent path if it's layered trie node.
   166  func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) {
   167  	// Short circuit if the trie is empty or already known
   168  	if root == emptyRoot {
   169  		return
   170  	}
   171  	if s.membatch.hasNode(path) {
   172  		return
   173  	}
   174  	if rawdb.HasTrieNode(s.database, root) {
   175  		return
   176  	}
   177  	// Assemble the new sub-trie sync request
   178  	req := &nodeRequest{
   179  		hash:     root,
   180  		path:     path,
   181  		callback: callback,
   182  	}
   183  	// If this sub-trie has a designated parent, link them together
   184  	if parent != (common.Hash{}) {
   185  		ancestor := s.nodeReqs[string(parentPath)]
   186  		if ancestor == nil {
   187  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   188  		}
   189  		ancestor.deps++
   190  		req.parent = ancestor
   191  	}
   192  	s.scheduleNodeRequest(req)
   193  }
   194  
   195  // AddCodeEntry schedules the direct retrieval of a contract code that should not
   196  // be interpreted as a trie node, but rather accepted and stored into the database
   197  // as is.
   198  func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) {
   199  	// Short circuit if the entry is empty or already known
   200  	if hash == emptyState {
   201  		return
   202  	}
   203  	if s.membatch.hasCode(hash) {
   204  		return
   205  	}
   206  	// If database says duplicate, the blob is present for sure.
   207  	// Note we only check the existence with new code scheme, fast
   208  	// sync is expected to run with a fresh new node. Even there
   209  	// exists the code with legacy format, fetch and store with
   210  	// new scheme anyway.
   211  	if rawdb.HasCodeWithPrefix(s.database, hash) {
   212  		return
   213  	}
   214  	// Assemble the new sub-trie sync request
   215  	req := &codeRequest{
   216  		path: path,
   217  		hash: hash,
   218  	}
   219  	// If this sub-trie has a designated parent, link them together
   220  	if parent != (common.Hash{}) {
   221  		ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq
   222  		if ancestor == nil {
   223  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   224  		}
   225  		ancestor.deps++
   226  		req.parents = append(req.parents, ancestor)
   227  	}
   228  	s.scheduleCodeRequest(req)
   229  }
   230  
   231  // Missing retrieves the known missing nodes from the trie for retrieval. To aid
   232  // both eth/6x style fast sync and snap/1x style state sync, the paths of trie
   233  // nodes are returned too, as well as separate hash list for codes.
   234  func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) {
   235  	var (
   236  		nodePaths  []string
   237  		nodeHashes []common.Hash
   238  		codeHashes []common.Hash
   239  	)
   240  	for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
   241  		// Retrieve the next item in line
   242  		item, prio := s.queue.Peek()
   243  
   244  		// If we have too many already-pending tasks for this depth, throttle
   245  		depth := int(prio >> 56)
   246  		if s.fetches[depth] > maxFetchesPerDepth {
   247  			break
   248  		}
   249  		// Item is allowed to be scheduled, add it to the task list
   250  		s.queue.Pop()
   251  		s.fetches[depth]++
   252  
   253  		switch item := item.(type) {
   254  		case common.Hash:
   255  			codeHashes = append(codeHashes, item)
   256  		case string:
   257  			req, ok := s.nodeReqs[item]
   258  			if !ok {
   259  				log.Error("Missing node request", "path", item)
   260  				continue // System very wrong, shouldn't happen
   261  			}
   262  			nodePaths = append(nodePaths, item)
   263  			nodeHashes = append(nodeHashes, req.hash)
   264  		}
   265  	}
   266  	return nodePaths, nodeHashes, codeHashes
   267  }
   268  
   269  // ProcessCode injects the received data for requested item. Note it can
   270  // happpen that the single response commits two pending requests(e.g.
   271  // there are two requests one for code and one for node but the hash
   272  // is same). In this case the second response for the same hash will
   273  // be treated as "non-requested" item or "already-processed" item but
   274  // there is no downside.
   275  func (s *Sync) ProcessCode(result CodeSyncResult) error {
   276  	// If the code was not requested or it's already processed, bail out
   277  	req := s.codeReqs[result.Hash]
   278  	if req == nil {
   279  		return ErrNotRequested
   280  	}
   281  	if req.data != nil {
   282  		return ErrAlreadyProcessed
   283  	}
   284  	req.data = result.Data
   285  	return s.commitCodeRequest(req)
   286  }
   287  
   288  // ProcessNode injects the received data for requested item. Note it can
   289  // happen that the single response commits two pending requests(e.g.
   290  // there are two requests one for code and one for node but the hash
   291  // is same). In this case the second response for the same hash will
   292  // be treated as "non-requested" item or "already-processed" item but
   293  // there is no downside.
   294  func (s *Sync) ProcessNode(result NodeSyncResult) error {
   295  	// If the trie node was not requested or it's already processed, bail out
   296  	req := s.nodeReqs[result.Path]
   297  	if req == nil {
   298  		return ErrNotRequested
   299  	}
   300  	if req.data != nil {
   301  		return ErrAlreadyProcessed
   302  	}
   303  	// Decode the node data content and update the request
   304  	node, err := decodeNode(req.hash.Bytes(), result.Data)
   305  	if err != nil {
   306  		return err
   307  	}
   308  	req.data = result.Data
   309  
   310  	// Create and schedule a request for all the children nodes
   311  	requests, err := s.children(req, node)
   312  	if err != nil {
   313  		return err
   314  	}
   315  	if len(requests) == 0 && req.deps == 0 {
   316  		s.commitNodeRequest(req)
   317  	} else {
   318  		req.deps += len(requests)
   319  		for _, child := range requests {
   320  			s.scheduleNodeRequest(child)
   321  		}
   322  	}
   323  	return nil
   324  }
   325  
   326  // Commit flushes the data stored in the internal membatch out to persistent
   327  // storage, returning any occurred error.
   328  func (s *Sync) Commit(dbw ethdb.Batch) error {
   329  	// Dump the membatch into a database dbw
   330  	for path, value := range s.membatch.nodes {
   331  		rawdb.WriteTrieNode(dbw, s.membatch.hashes[path], value)
   332  	}
   333  	for hash, value := range s.membatch.codes {
   334  		rawdb.WriteCode(dbw, hash, value)
   335  	}
   336  	// Drop the membatch data and return
   337  	s.membatch = newSyncMemBatch()
   338  	return nil
   339  }
   340  
   341  // Pending returns the number of state entries currently pending for download.
   342  func (s *Sync) Pending() int {
   343  	return len(s.nodeReqs) + len(s.codeReqs)
   344  }
   345  
   346  // schedule inserts a new state retrieval request into the fetch queue. If there
   347  // is already a pending request for this node, the new request will be discarded
   348  // and only a parent reference added to the old one.
   349  func (s *Sync) scheduleNodeRequest(req *nodeRequest) {
   350  	s.nodeReqs[string(req.path)] = req
   351  
   352  	// Schedule the request for future retrieval. This queue is shared
   353  	// by both node requests and code requests.
   354  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   355  	for i := 0; i < 14 && i < len(req.path); i++ {
   356  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   357  	}
   358  	s.queue.Push(string(req.path), prio)
   359  }
   360  
   361  // schedule inserts a new state retrieval request into the fetch queue. If there
   362  // is already a pending request for this node, the new request will be discarded
   363  // and only a parent reference added to the old one.
   364  func (s *Sync) scheduleCodeRequest(req *codeRequest) {
   365  	// If we're already requesting this node, add a new reference and stop
   366  	if old, ok := s.codeReqs[req.hash]; ok {
   367  		old.parents = append(old.parents, req.parents...)
   368  		return
   369  	}
   370  	s.codeReqs[req.hash] = req
   371  
   372  	// Schedule the request for future retrieval. This queue is shared
   373  	// by both node requests and code requests.
   374  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   375  	for i := 0; i < 14 && i < len(req.path); i++ {
   376  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   377  	}
   378  	s.queue.Push(req.hash, prio)
   379  }
   380  
   381  // children retrieves all the missing children of a state trie entry for future
   382  // retrieval scheduling.
   383  func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
   384  	// Gather all the children of the node, irrelevant whether known or not
   385  	type childNode struct {
   386  		path []byte
   387  		node node
   388  	}
   389  	var children []childNode
   390  
   391  	switch node := (object).(type) {
   392  	case *shortNode:
   393  		key := node.Key
   394  		if hasTerm(key) {
   395  			key = key[:len(key)-1]
   396  		}
   397  		children = []childNode{{
   398  			node: node.Val,
   399  			path: append(append([]byte(nil), req.path...), key...),
   400  		}}
   401  	case *fullNode:
   402  		for i := 0; i < 17; i++ {
   403  			if node.Children[i] != nil {
   404  				children = append(children, childNode{
   405  					node: node.Children[i],
   406  					path: append(append([]byte(nil), req.path...), byte(i)),
   407  				})
   408  			}
   409  		}
   410  	default:
   411  		panic(fmt.Sprintf("unknown node: %+v", node))
   412  	}
   413  	// Iterate over the children, and request all unknown ones
   414  	var (
   415  		missing = make(chan *nodeRequest, len(children))
   416  		pending sync.WaitGroup
   417  	)
   418  	for _, child := range children {
   419  		// Notify any external watcher of a new key/value node
   420  		if req.callback != nil {
   421  			if node, ok := (child.node).(valueNode); ok {
   422  				var paths [][]byte
   423  				if len(child.path) == 2*common.HashLength {
   424  					paths = append(paths, hexToKeybytes(child.path))
   425  				} else if len(child.path) == 4*common.HashLength {
   426  					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
   427  					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
   428  				}
   429  				if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil {
   430  					return nil, err
   431  				}
   432  			}
   433  		}
   434  		// If the child references another node, resolve or schedule
   435  		if node, ok := (child.node).(hashNode); ok {
   436  			// Try to resolve the node from the local database
   437  			if s.membatch.hasNode(child.path) {
   438  				continue
   439  			}
   440  			// Check the presence of children concurrently
   441  			pending.Add(1)
   442  			go func(child childNode) {
   443  				defer pending.Done()
   444  
   445  				// If database says duplicate, then at least the trie node is present
   446  				// and we hold the assumption that it's NOT legacy contract code.
   447  				chash := common.BytesToHash(node)
   448  				if rawdb.HasTrieNode(s.database, chash) {
   449  					return
   450  				}
   451  				// Locally unknown node, schedule for retrieval
   452  				missing <- &nodeRequest{
   453  					path:     child.path,
   454  					hash:     chash,
   455  					parent:   req,
   456  					callback: req.callback,
   457  				}
   458  			}(child)
   459  		}
   460  	}
   461  	pending.Wait()
   462  
   463  	requests := make([]*nodeRequest, 0, len(children))
   464  	for done := false; !done; {
   465  		select {
   466  		case miss := <-missing:
   467  			requests = append(requests, miss)
   468  		default:
   469  			done = true
   470  		}
   471  	}
   472  	return requests, nil
   473  }
   474  
   475  // commit finalizes a retrieval request and stores it into the membatch. If any
   476  // of the referencing parent requests complete due to this commit, they are also
   477  // committed themselves.
   478  func (s *Sync) commitNodeRequest(req *nodeRequest) error {
   479  	// Write the node content to the membatch
   480  	s.membatch.nodes[string(req.path)] = req.data
   481  	s.membatch.hashes[string(req.path)] = req.hash
   482  
   483  	delete(s.nodeReqs, string(req.path))
   484  	s.fetches[len(req.path)]--
   485  
   486  	// Check parent for completion
   487  	if req.parent != nil {
   488  		req.parent.deps--
   489  		if req.parent.deps == 0 {
   490  			if err := s.commitNodeRequest(req.parent); err != nil {
   491  				return err
   492  			}
   493  		}
   494  	}
   495  	return nil
   496  }
   497  
   498  // commit finalizes a retrieval request and stores it into the membatch. If any
   499  // of the referencing parent requests complete due to this commit, they are also
   500  // committed themselves.
   501  func (s *Sync) commitCodeRequest(req *codeRequest) error {
   502  	// Write the node content to the membatch
   503  	s.membatch.codes[req.hash] = req.data
   504  	delete(s.codeReqs, req.hash)
   505  	s.fetches[len(req.path)]--
   506  
   507  	// Check all parents for completion
   508  	for _, parent := range req.parents {
   509  		parent.deps--
   510  		if parent.deps == 0 {
   511  			if err := s.commitNodeRequest(parent); err != nil {
   512  				return err
   513  			}
   514  		}
   515  	}
   516  	return nil
   517  }