github.com/jimmyx0x/go-ethereum@v1.10.28/trie/sync.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"sync"
    23  
    24  	"github.com/ethereum/go-ethereum/common"
    25  	"github.com/ethereum/go-ethereum/common/prque"
    26  	"github.com/ethereum/go-ethereum/core/rawdb"
    27  	"github.com/ethereum/go-ethereum/ethdb"
    28  	"github.com/ethereum/go-ethereum/log"
    29  )
    30  
    31  // ErrNotRequested is returned by the trie sync when it's requested to process a
    32  // node it did not request.
    33  var ErrNotRequested = errors.New("not requested")
    34  
    35  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    36  // node it already processed previously.
    37  var ErrAlreadyProcessed = errors.New("already processed")
    38  
    39  // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The
    40  // role of this value is to limit the number of trie nodes that get expanded in
    41  // memory if the node was configured with a significant number of peers.
    42  const maxFetchesPerDepth = 16384
    43  
    44  // SyncPath is a path tuple identifying a particular trie node either in a single
    45  // trie (account) or a layered trie (account -> storage).
    46  //
    47  // Content wise the tuple either has 1 element if it addresses a node in a single
    48  // trie or 2 elements if it addresses a node in a stacked trie.
    49  //
    50  // To support aiming arbitrary trie nodes, the path needs to support odd nibble
    51  // lengths. To avoid transferring expanded hex form over the network, the last
    52  // part of the tuple (which needs to index into the middle of a trie) is compact
    53  // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
    54  // simple binary encoded.
    55  //
    56  // Examples:
    57  //   - Path 0x9  -> {0x19}
    58  //   - Path 0x99 -> {0x0099}
    59  //   - Path 0x01234567890123456789012345678901012345678901234567890123456789019  -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
    60  //   - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
    61  type SyncPath [][]byte
    62  
    63  // NewSyncPath converts an expanded trie path from nibble form into a compact
    64  // version that can be sent over the network.
    65  func NewSyncPath(path []byte) SyncPath {
    66  	// If the hash is from the account trie, append a single item, if it
    67  	// is from a storage trie, append a tuple. Note, the length 64 is
    68  	// clashing between account leaf and storage root. It's fine though
    69  	// because having a trie node at 64 depth means a hash collision was
    70  	// found and we're long dead.
    71  	if len(path) < 64 {
    72  		return SyncPath{hexToCompact(path)}
    73  	}
    74  	return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
    75  }
    76  
    77  // LeafCallback is a callback type invoked when a trie operation reaches a leaf
    78  // node.
    79  //
    80  // The keys is a path tuple identifying a particular trie node either in a single
    81  // trie (account) or a layered trie (account -> storage). Each key in the tuple
    82  // is in the raw format(32 bytes).
    83  //
    84  // The path is a composite hexary path identifying the trie node. All the key
    85  // bytes are converted to the hexary nibbles and composited with the parent path
    86  // if the trie node is in a layered trie.
    87  //
    88  // It's used by state sync and commit to allow handling external references
    89  // between account and storage tries. And also it's used in the state healing
    90  // for extracting the raw states(leaf nodes) with corresponding paths.
    91  type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error
    92  
    93  // nodeRequest represents a scheduled or already in-flight trie node retrieval request.
    94  type nodeRequest struct {
    95  	hash common.Hash // Hash of the trie node to retrieve
    96  	path []byte      // Merkle path leading to this node for prioritization
    97  	data []byte      // Data content of the node, cached until all subtrees complete
    98  
    99  	parent   *nodeRequest // Parent state node referencing this entry
   100  	deps     int          // Number of dependencies before allowed to commit this node
   101  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
   102  }
   103  
   104  // codeRequest represents a scheduled or already in-flight bytecode retrieval request.
   105  type codeRequest struct {
   106  	hash    common.Hash    // Hash of the contract bytecode to retrieve
   107  	path    []byte         // Merkle path leading to this node for prioritization
   108  	data    []byte         // Data content of the node, cached until all subtrees complete
   109  	parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion)
   110  }
   111  
   112  // NodeSyncResult is a response with requested trie node along with its node path.
   113  type NodeSyncResult struct {
   114  	Path string // Path of the originally unknown trie node
   115  	Data []byte // Data content of the retrieved trie node
   116  }
   117  
   118  // CodeSyncResult is a response with requested bytecode along with its hash.
   119  type CodeSyncResult struct {
   120  	Hash common.Hash // Hash the originally unknown bytecode
   121  	Data []byte      // Data content of the retrieved bytecode
   122  }
   123  
   124  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
   125  // persisted data items.
   126  type syncMemBatch struct {
   127  	nodes  map[string][]byte      // In-memory membatch of recently completed nodes
   128  	hashes map[string]common.Hash // Hashes of recently completed nodes
   129  	codes  map[common.Hash][]byte // In-memory membatch of recently completed codes
   130  	size   uint64                 // Estimated batch-size of in-memory data.
   131  }
   132  
   133  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
   134  func newSyncMemBatch() *syncMemBatch {
   135  	return &syncMemBatch{
   136  		nodes:  make(map[string][]byte),
   137  		hashes: make(map[string]common.Hash),
   138  		codes:  make(map[common.Hash][]byte),
   139  	}
   140  }
   141  
   142  // hasNode reports the trie node with specific path is already cached.
   143  func (batch *syncMemBatch) hasNode(path []byte) bool {
   144  	_, ok := batch.nodes[string(path)]
   145  	return ok
   146  }
   147  
   148  // hasCode reports the contract code with specific hash is already cached.
   149  func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
   150  	_, ok := batch.codes[hash]
   151  	return ok
   152  }
   153  
   154  // Sync is the main state trie synchronisation scheduler, which provides yet
   155  // unknown trie hashes to retrieve, accepts node data associated with said hashes
   156  // and reconstructs the trie step by step until all is done.
   157  type Sync struct {
   158  	scheme   NodeScheme                   // Node scheme descriptor used in database.
   159  	database ethdb.KeyValueReader         // Persistent database to check for existing entries
   160  	membatch *syncMemBatch                // Memory buffer to avoid frequent database writes
   161  	nodeReqs map[string]*nodeRequest      // Pending requests pertaining to a trie node path
   162  	codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash
   163  	queue    *prque.Prque                 // Priority queue with the pending requests
   164  	fetches  map[int]int                  // Number of active fetches per trie node depth
   165  }
   166  
   167  // NewSync creates a new trie data download scheduler.
   168  func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, scheme NodeScheme) *Sync {
   169  	ts := &Sync{
   170  		scheme:   scheme,
   171  		database: database,
   172  		membatch: newSyncMemBatch(),
   173  		nodeReqs: make(map[string]*nodeRequest),
   174  		codeReqs: make(map[common.Hash]*codeRequest),
   175  		queue:    prque.New(nil),
   176  		fetches:  make(map[int]int),
   177  	}
   178  	ts.AddSubTrie(root, nil, common.Hash{}, nil, callback)
   179  	return ts
   180  }
   181  
   182  // AddSubTrie registers a new trie to the sync code, rooted at the designated
   183  // parent for completion tracking. The given path is a unique node path in
   184  // hex format and contain all the parent path if it's layered trie node.
   185  func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) {
   186  	// Short circuit if the trie is empty or already known
   187  	if root == emptyRoot {
   188  		return
   189  	}
   190  	if s.membatch.hasNode(path) {
   191  		return
   192  	}
   193  	owner, inner := ResolvePath(path)
   194  	if s.scheme.HasTrieNode(s.database, owner, inner, root) {
   195  		return
   196  	}
   197  	// Assemble the new sub-trie sync request
   198  	req := &nodeRequest{
   199  		hash:     root,
   200  		path:     path,
   201  		callback: callback,
   202  	}
   203  	// If this sub-trie has a designated parent, link them together
   204  	if parent != (common.Hash{}) {
   205  		ancestor := s.nodeReqs[string(parentPath)]
   206  		if ancestor == nil {
   207  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   208  		}
   209  		ancestor.deps++
   210  		req.parent = ancestor
   211  	}
   212  	s.scheduleNodeRequest(req)
   213  }
   214  
   215  // AddCodeEntry schedules the direct retrieval of a contract code that should not
   216  // be interpreted as a trie node, but rather accepted and stored into the database
   217  // as is.
   218  func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) {
   219  	// Short circuit if the entry is empty or already known
   220  	if hash == emptyState {
   221  		return
   222  	}
   223  	if s.membatch.hasCode(hash) {
   224  		return
   225  	}
   226  	// If database says duplicate, the blob is present for sure.
   227  	// Note we only check the existence with new code scheme, snap
   228  	// sync is expected to run with a fresh new node. Even there
   229  	// exists the code with legacy format, fetch and store with
   230  	// new scheme anyway.
   231  	if rawdb.HasCodeWithPrefix(s.database, hash) {
   232  		return
   233  	}
   234  	// Assemble the new sub-trie sync request
   235  	req := &codeRequest{
   236  		path: path,
   237  		hash: hash,
   238  	}
   239  	// If this sub-trie has a designated parent, link them together
   240  	if parent != (common.Hash{}) {
   241  		ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq
   242  		if ancestor == nil {
   243  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   244  		}
   245  		ancestor.deps++
   246  		req.parents = append(req.parents, ancestor)
   247  	}
   248  	s.scheduleCodeRequest(req)
   249  }
   250  
   251  // Missing retrieves the known missing nodes from the trie for retrieval. To aid
   252  // both eth/6x style fast sync and snap/1x style state sync, the paths of trie
   253  // nodes are returned too, as well as separate hash list for codes.
   254  func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) {
   255  	var (
   256  		nodePaths  []string
   257  		nodeHashes []common.Hash
   258  		codeHashes []common.Hash
   259  	)
   260  	for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
   261  		// Retrieve the next item in line
   262  		item, prio := s.queue.Peek()
   263  
   264  		// If we have too many already-pending tasks for this depth, throttle
   265  		depth := int(prio >> 56)
   266  		if s.fetches[depth] > maxFetchesPerDepth {
   267  			break
   268  		}
   269  		// Item is allowed to be scheduled, add it to the task list
   270  		s.queue.Pop()
   271  		s.fetches[depth]++
   272  
   273  		switch item := item.(type) {
   274  		case common.Hash:
   275  			codeHashes = append(codeHashes, item)
   276  		case string:
   277  			req, ok := s.nodeReqs[item]
   278  			if !ok {
   279  				log.Error("Missing node request", "path", item)
   280  				continue // System very wrong, shouldn't happen
   281  			}
   282  			nodePaths = append(nodePaths, item)
   283  			nodeHashes = append(nodeHashes, req.hash)
   284  		}
   285  	}
   286  	return nodePaths, nodeHashes, codeHashes
   287  }
   288  
   289  // ProcessCode injects the received data for requested item. Note it can
   290  // happpen that the single response commits two pending requests(e.g.
   291  // there are two requests one for code and one for node but the hash
   292  // is same). In this case the second response for the same hash will
   293  // be treated as "non-requested" item or "already-processed" item but
   294  // there is no downside.
   295  func (s *Sync) ProcessCode(result CodeSyncResult) error {
   296  	// If the code was not requested or it's already processed, bail out
   297  	req := s.codeReqs[result.Hash]
   298  	if req == nil {
   299  		return ErrNotRequested
   300  	}
   301  	if req.data != nil {
   302  		return ErrAlreadyProcessed
   303  	}
   304  	req.data = result.Data
   305  	return s.commitCodeRequest(req)
   306  }
   307  
   308  // ProcessNode injects the received data for requested item. Note it can
   309  // happen that the single response commits two pending requests(e.g.
   310  // there are two requests one for code and one for node but the hash
   311  // is same). In this case the second response for the same hash will
   312  // be treated as "non-requested" item or "already-processed" item but
   313  // there is no downside.
   314  func (s *Sync) ProcessNode(result NodeSyncResult) error {
   315  	// If the trie node was not requested or it's already processed, bail out
   316  	req := s.nodeReqs[result.Path]
   317  	if req == nil {
   318  		return ErrNotRequested
   319  	}
   320  	if req.data != nil {
   321  		return ErrAlreadyProcessed
   322  	}
   323  	// Decode the node data content and update the request
   324  	node, err := decodeNode(req.hash.Bytes(), result.Data)
   325  	if err != nil {
   326  		return err
   327  	}
   328  	req.data = result.Data
   329  
   330  	// Create and schedule a request for all the children nodes
   331  	requests, err := s.children(req, node)
   332  	if err != nil {
   333  		return err
   334  	}
   335  	if len(requests) == 0 && req.deps == 0 {
   336  		s.commitNodeRequest(req)
   337  	} else {
   338  		req.deps += len(requests)
   339  		for _, child := range requests {
   340  			s.scheduleNodeRequest(child)
   341  		}
   342  	}
   343  	return nil
   344  }
   345  
   346  // Commit flushes the data stored in the internal membatch out to persistent
   347  // storage, returning any occurred error.
   348  func (s *Sync) Commit(dbw ethdb.Batch) error {
   349  	// Dump the membatch into a database dbw
   350  	for path, value := range s.membatch.nodes {
   351  		owner, inner := ResolvePath([]byte(path))
   352  		s.scheme.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value)
   353  	}
   354  	for hash, value := range s.membatch.codes {
   355  		rawdb.WriteCode(dbw, hash, value)
   356  	}
   357  	// Drop the membatch data and return
   358  	s.membatch = newSyncMemBatch()
   359  	return nil
   360  }
   361  
   362  // MemSize returns an estimated size (in bytes) of the data held in the membatch.
   363  func (s *Sync) MemSize() uint64 {
   364  	return s.membatch.size
   365  }
   366  
   367  // Pending returns the number of state entries currently pending for download.
   368  func (s *Sync) Pending() int {
   369  	return len(s.nodeReqs) + len(s.codeReqs)
   370  }
   371  
   372  // schedule inserts a new state retrieval request into the fetch queue. If there
   373  // is already a pending request for this node, the new request will be discarded
   374  // and only a parent reference added to the old one.
   375  func (s *Sync) scheduleNodeRequest(req *nodeRequest) {
   376  	s.nodeReqs[string(req.path)] = req
   377  
   378  	// Schedule the request for future retrieval. This queue is shared
   379  	// by both node requests and code requests.
   380  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   381  	for i := 0; i < 14 && i < len(req.path); i++ {
   382  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   383  	}
   384  	s.queue.Push(string(req.path), prio)
   385  }
   386  
   387  // schedule inserts a new state retrieval request into the fetch queue. If there
   388  // is already a pending request for this node, the new request will be discarded
   389  // and only a parent reference added to the old one.
   390  func (s *Sync) scheduleCodeRequest(req *codeRequest) {
   391  	// If we're already requesting this node, add a new reference and stop
   392  	if old, ok := s.codeReqs[req.hash]; ok {
   393  		old.parents = append(old.parents, req.parents...)
   394  		return
   395  	}
   396  	s.codeReqs[req.hash] = req
   397  
   398  	// Schedule the request for future retrieval. This queue is shared
   399  	// by both node requests and code requests.
   400  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   401  	for i := 0; i < 14 && i < len(req.path); i++ {
   402  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   403  	}
   404  	s.queue.Push(req.hash, prio)
   405  }
   406  
   407  // children retrieves all the missing children of a state trie entry for future
   408  // retrieval scheduling.
   409  func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
   410  	// Gather all the children of the node, irrelevant whether known or not
   411  	type childNode struct {
   412  		path []byte
   413  		node node
   414  	}
   415  	var children []childNode
   416  
   417  	switch node := (object).(type) {
   418  	case *shortNode:
   419  		key := node.Key
   420  		if hasTerm(key) {
   421  			key = key[:len(key)-1]
   422  		}
   423  		children = []childNode{{
   424  			node: node.Val,
   425  			path: append(append([]byte(nil), req.path...), key...),
   426  		}}
   427  	case *fullNode:
   428  		for i := 0; i < 17; i++ {
   429  			if node.Children[i] != nil {
   430  				children = append(children, childNode{
   431  					node: node.Children[i],
   432  					path: append(append([]byte(nil), req.path...), byte(i)),
   433  				})
   434  			}
   435  		}
   436  	default:
   437  		panic(fmt.Sprintf("unknown node: %+v", node))
   438  	}
   439  	// Iterate over the children, and request all unknown ones
   440  	var (
   441  		missing = make(chan *nodeRequest, len(children))
   442  		pending sync.WaitGroup
   443  	)
   444  	for _, child := range children {
   445  		// Notify any external watcher of a new key/value node
   446  		if req.callback != nil {
   447  			if node, ok := (child.node).(valueNode); ok {
   448  				var paths [][]byte
   449  				if len(child.path) == 2*common.HashLength {
   450  					paths = append(paths, hexToKeybytes(child.path))
   451  				} else if len(child.path) == 4*common.HashLength {
   452  					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
   453  					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
   454  				}
   455  				if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil {
   456  					return nil, err
   457  				}
   458  			}
   459  		}
   460  		// If the child references another node, resolve or schedule
   461  		if node, ok := (child.node).(hashNode); ok {
   462  			// Try to resolve the node from the local database
   463  			if s.membatch.hasNode(child.path) {
   464  				continue
   465  			}
   466  			// Check the presence of children concurrently
   467  			pending.Add(1)
   468  			go func(child childNode) {
   469  				defer pending.Done()
   470  
   471  				// If database says duplicate, then at least the trie node is present
   472  				// and we hold the assumption that it's NOT legacy contract code.
   473  				var (
   474  					chash        = common.BytesToHash(node)
   475  					owner, inner = ResolvePath(child.path)
   476  				)
   477  				if s.scheme.HasTrieNode(s.database, owner, inner, chash) {
   478  					return
   479  				}
   480  				// Locally unknown node, schedule for retrieval
   481  				missing <- &nodeRequest{
   482  					path:     child.path,
   483  					hash:     chash,
   484  					parent:   req,
   485  					callback: req.callback,
   486  				}
   487  			}(child)
   488  		}
   489  	}
   490  	pending.Wait()
   491  
   492  	requests := make([]*nodeRequest, 0, len(children))
   493  	for done := false; !done; {
   494  		select {
   495  		case miss := <-missing:
   496  			requests = append(requests, miss)
   497  		default:
   498  			done = true
   499  		}
   500  	}
   501  	return requests, nil
   502  }
   503  
   504  // commit finalizes a retrieval request and stores it into the membatch. If any
   505  // of the referencing parent requests complete due to this commit, they are also
   506  // committed themselves.
   507  func (s *Sync) commitNodeRequest(req *nodeRequest) error {
   508  	// Write the node content to the membatch
   509  	s.membatch.nodes[string(req.path)] = req.data
   510  	s.membatch.hashes[string(req.path)] = req.hash
   511  	// The size tracking refers to the db-batch, not the in-memory data.
   512  	// Therefore, we ignore the req.path, and account only for the hash+data
   513  	// which eventually is written to db.
   514  	s.membatch.size += common.HashLength + uint64(len(req.data))
   515  	delete(s.nodeReqs, string(req.path))
   516  	s.fetches[len(req.path)]--
   517  
   518  	// Check parent for completion
   519  	if req.parent != nil {
   520  		req.parent.deps--
   521  		if req.parent.deps == 0 {
   522  			if err := s.commitNodeRequest(req.parent); err != nil {
   523  				return err
   524  			}
   525  		}
   526  	}
   527  	return nil
   528  }
   529  
   530  // commit finalizes a retrieval request and stores it into the membatch. If any
   531  // of the referencing parent requests complete due to this commit, they are also
   532  // committed themselves.
   533  func (s *Sync) commitCodeRequest(req *codeRequest) error {
   534  	// Write the node content to the membatch
   535  	s.membatch.codes[req.hash] = req.data
   536  	s.membatch.size += common.HashLength + uint64(len(req.data))
   537  	delete(s.codeReqs, req.hash)
   538  	s.fetches[len(req.path)]--
   539  
   540  	// Check all parents for completion
   541  	for _, parent := range req.parents {
   542  		parent.deps--
   543  		if parent.deps == 0 {
   544  			if err := s.commitNodeRequest(parent); err != nil {
   545  				return err
   546  			}
   547  		}
   548  	}
   549  	return nil
   550  }
   551  
   552  // ResolvePath resolves the provided composite node path by separating the
   553  // path in account trie if it's existent.
   554  func ResolvePath(path []byte) (common.Hash, []byte) {
   555  	var owner common.Hash
   556  	if len(path) >= 2*common.HashLength {
   557  		owner = common.BytesToHash(hexToKeybytes(path[:2*common.HashLength]))
   558  		path = path[2*common.HashLength:]
   559  	}
   560  	return owner, path
   561  }