github.com/Embreum/go-ethereum@v1.9.6/trie/sync.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  
    23  	"github.com/ethereum/go-ethereum/common"
    24  	"github.com/ethereum/go-ethereum/common/prque"
    25  	"github.com/ethereum/go-ethereum/ethdb"
    26  )
    27  
    28  // ErrNotRequested is returned by the trie sync when it's requested to process a
    29  // node it did not request.
    30  var ErrNotRequested = errors.New("not requested")
    31  
    32  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    33  // node it already processed previously.
    34  var ErrAlreadyProcessed = errors.New("already processed")
    35  
    36  // request represents a scheduled or already in-flight state retrieval request.
    37  type request struct {
    38  	hash common.Hash // Hash of the node data content to retrieve
    39  	data []byte      // Data content of the node, cached until all subtrees complete
    40  	raw  bool        // Whether this is a raw entry (code) or a trie node
    41  
    42  	parents []*request // Parent state nodes referencing this entry (notify all upon completion)
    43  	depth   int        // Depth level within the trie the node is located to prioritise DFS
    44  	deps    int        // Number of dependencies before allowed to commit this node
    45  
    46  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
    47  }
    48  
    49  // SyncResult is a simple list to return missing nodes along with their request
    50  // hashes.
    51  type SyncResult struct {
    52  	Hash common.Hash // Hash of the originally unknown trie node
    53  	Data []byte      // Data content of the retrieved node
    54  }
    55  
    56  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
    57  // persisted data items.
    58  type syncMemBatch struct {
    59  	batch map[common.Hash][]byte // In-memory membatch of recently completed items
    60  	order []common.Hash          // Order of completion to prevent out-of-order data loss
    61  }
    62  
    63  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
    64  func newSyncMemBatch() *syncMemBatch {
    65  	return &syncMemBatch{
    66  		batch: make(map[common.Hash][]byte),
    67  		order: make([]common.Hash, 0, 256),
    68  	}
    69  }
    70  
    71  // Sync is the main state trie synchronisation scheduler, which provides yet
    72  // unknown trie hashes to retrieve, accepts node data associated with said hashes
    73  // and reconstructs the trie step by step until all is done.
    74  type Sync struct {
    75  	database ethdb.KeyValueReader     // Persistent database to check for existing entries
    76  	membatch *syncMemBatch            // Memory buffer to avoid frequent database writes
    77  	requests map[common.Hash]*request // Pending requests pertaining to a key hash
    78  	queue    *prque.Prque             // Priority queue with the pending requests
    79  	bloom    *SyncBloom               // Bloom filter for fast node existence checks
    80  }
    81  
    82  // NewSync creates a new trie data download scheduler.
    83  func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback, bloom *SyncBloom) *Sync {
    84  	ts := &Sync{
    85  		database: database,
    86  		membatch: newSyncMemBatch(),
    87  		requests: make(map[common.Hash]*request),
    88  		queue:    prque.New(nil),
    89  		bloom:    bloom,
    90  	}
    91  	ts.AddSubTrie(root, 0, common.Hash{}, callback)
    92  	return ts
    93  }
    94  
    95  // AddSubTrie registers a new trie to the sync code, rooted at the designated parent.
    96  func (s *Sync) AddSubTrie(root common.Hash, depth int, parent common.Hash, callback LeafCallback) {
    97  	// Short circuit if the trie is empty or already known
    98  	if root == emptyRoot {
    99  		return
   100  	}
   101  	if _, ok := s.membatch.batch[root]; ok {
   102  		return
   103  	}
   104  	if s.bloom.Contains(root[:]) {
   105  		// Bloom filter says this might be a duplicate, double check
   106  		blob, _ := s.database.Get(root[:])
   107  		if local, err := decodeNode(root[:], blob); local != nil && err == nil {
   108  			return
   109  		}
   110  		// False positive, bump fault meter
   111  		bloomFaultMeter.Mark(1)
   112  	}
   113  	// Assemble the new sub-trie sync request
   114  	req := &request{
   115  		hash:     root,
   116  		depth:    depth,
   117  		callback: callback,
   118  	}
   119  	// If this sub-trie has a designated parent, link them together
   120  	if parent != (common.Hash{}) {
   121  		ancestor := s.requests[parent]
   122  		if ancestor == nil {
   123  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   124  		}
   125  		ancestor.deps++
   126  		req.parents = append(req.parents, ancestor)
   127  	}
   128  	s.schedule(req)
   129  }
   130  
   131  // AddRawEntry schedules the direct retrieval of a state entry that should not be
   132  // interpreted as a trie node, but rather accepted and stored into the database
   133  // as is. This method's goal is to support misc state metadata retrievals (e.g.
   134  // contract code).
   135  func (s *Sync) AddRawEntry(hash common.Hash, depth int, parent common.Hash) {
   136  	// Short circuit if the entry is empty or already known
   137  	if hash == emptyState {
   138  		return
   139  	}
   140  	if _, ok := s.membatch.batch[hash]; ok {
   141  		return
   142  	}
   143  	if s.bloom.Contains(hash[:]) {
   144  		// Bloom filter says this might be a duplicate, double check
   145  		if ok, _ := s.database.Has(hash[:]); ok {
   146  			return
   147  		}
   148  		// False positive, bump fault meter
   149  		bloomFaultMeter.Mark(1)
   150  	}
   151  	// Assemble the new sub-trie sync request
   152  	req := &request{
   153  		hash:  hash,
   154  		raw:   true,
   155  		depth: depth,
   156  	}
   157  	// If this sub-trie has a designated parent, link them together
   158  	if parent != (common.Hash{}) {
   159  		ancestor := s.requests[parent]
   160  		if ancestor == nil {
   161  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   162  		}
   163  		ancestor.deps++
   164  		req.parents = append(req.parents, ancestor)
   165  	}
   166  	s.schedule(req)
   167  }
   168  
   169  // Missing retrieves the known missing nodes from the trie for retrieval.
   170  func (s *Sync) Missing(max int) []common.Hash {
   171  	var requests []common.Hash
   172  	for !s.queue.Empty() && (max == 0 || len(requests) < max) {
   173  		requests = append(requests, s.queue.PopItem().(common.Hash))
   174  	}
   175  	return requests
   176  }
   177  
   178  // Process injects a batch of retrieved trie nodes data, returning if something
   179  // was committed to the database and also the index of an entry if processing of
   180  // it failed.
   181  func (s *Sync) Process(results []SyncResult) (bool, int, error) {
   182  	committed := false
   183  
   184  	for i, item := range results {
   185  		// If the item was not requested, bail out
   186  		request := s.requests[item.Hash]
   187  		if request == nil {
   188  			return committed, i, ErrNotRequested
   189  		}
   190  		if request.data != nil {
   191  			return committed, i, ErrAlreadyProcessed
   192  		}
   193  		// If the item is a raw entry request, commit directly
   194  		if request.raw {
   195  			request.data = item.Data
   196  			s.commit(request)
   197  			committed = true
   198  			continue
   199  		}
   200  		// Decode the node data content and update the request
   201  		node, err := decodeNode(item.Hash[:], item.Data)
   202  		if err != nil {
   203  			return committed, i, err
   204  		}
   205  		request.data = item.Data
   206  
   207  		// Create and schedule a request for all the children nodes
   208  		requests, err := s.children(request, node)
   209  		if err != nil {
   210  			return committed, i, err
   211  		}
   212  		if len(requests) == 0 && request.deps == 0 {
   213  			s.commit(request)
   214  			committed = true
   215  			continue
   216  		}
   217  		request.deps += len(requests)
   218  		for _, child := range requests {
   219  			s.schedule(child)
   220  		}
   221  	}
   222  	return committed, 0, nil
   223  }
   224  
   225  // Commit flushes the data stored in the internal membatch out to persistent
   226  // storage, returning the number of items written and any occurred error.
   227  func (s *Sync) Commit(dbw ethdb.KeyValueWriter) (int, error) {
   228  	// Dump the membatch into a database dbw
   229  	for i, key := range s.membatch.order {
   230  		if err := dbw.Put(key[:], s.membatch.batch[key]); err != nil {
   231  			return i, err
   232  		}
   233  		s.bloom.Add(key[:])
   234  	}
   235  	written := len(s.membatch.order) // TODO(karalabe): could an order change improve write performance?
   236  
   237  	// Drop the membatch data and return
   238  	s.membatch = newSyncMemBatch()
   239  	return written, nil
   240  }
   241  
   242  // Pending returns the number of state entries currently pending for download.
   243  func (s *Sync) Pending() int {
   244  	return len(s.requests)
   245  }
   246  
   247  // schedule inserts a new state retrieval request into the fetch queue. If there
   248  // is already a pending request for this node, the new request will be discarded
   249  // and only a parent reference added to the old one.
   250  func (s *Sync) schedule(req *request) {
   251  	// If we're already requesting this node, add a new reference and stop
   252  	if old, ok := s.requests[req.hash]; ok {
   253  		old.parents = append(old.parents, req.parents...)
   254  		return
   255  	}
   256  	// Schedule the request for future retrieval
   257  	s.queue.Push(req.hash, int64(req.depth))
   258  	s.requests[req.hash] = req
   259  }
   260  
   261  // children retrieves all the missing children of a state trie entry for future
   262  // retrieval scheduling.
   263  func (s *Sync) children(req *request, object node) ([]*request, error) {
   264  	// Gather all the children of the node, irrelevant whether known or not
   265  	type child struct {
   266  		node  node
   267  		depth int
   268  	}
   269  	var children []child
   270  
   271  	switch node := (object).(type) {
   272  	case *shortNode:
   273  		children = []child{{
   274  			node:  node.Val,
   275  			depth: req.depth + len(node.Key),
   276  		}}
   277  	case *fullNode:
   278  		for i := 0; i < 17; i++ {
   279  			if node.Children[i] != nil {
   280  				children = append(children, child{
   281  					node:  node.Children[i],
   282  					depth: req.depth + 1,
   283  				})
   284  			}
   285  		}
   286  	default:
   287  		panic(fmt.Sprintf("unknown node: %+v", node))
   288  	}
   289  	// Iterate over the children, and request all unknown ones
   290  	requests := make([]*request, 0, len(children))
   291  	for _, child := range children {
   292  		// Notify any external watcher of a new key/value node
   293  		if req.callback != nil {
   294  			if node, ok := (child.node).(valueNode); ok {
   295  				if err := req.callback(node, req.hash); err != nil {
   296  					return nil, err
   297  				}
   298  			}
   299  		}
   300  		// If the child references another node, resolve or schedule
   301  		if node, ok := (child.node).(hashNode); ok {
   302  			// Try to resolve the node from the local database
   303  			hash := common.BytesToHash(node)
   304  			if _, ok := s.membatch.batch[hash]; ok {
   305  				continue
   306  			}
   307  			if s.bloom.Contains(node) {
   308  				// Bloom filter says this might be a duplicate, double check
   309  				if ok, _ := s.database.Has(node); ok {
   310  					continue
   311  				}
   312  				// False positive, bump fault meter
   313  				bloomFaultMeter.Mark(1)
   314  			}
   315  			// Locally unknown node, schedule for retrieval
   316  			requests = append(requests, &request{
   317  				hash:     hash,
   318  				parents:  []*request{req},
   319  				depth:    child.depth,
   320  				callback: req.callback,
   321  			})
   322  		}
   323  	}
   324  	return requests, nil
   325  }
   326  
   327  // commit finalizes a retrieval request and stores it into the membatch. If any
   328  // of the referencing parent requests complete due to this commit, they are also
   329  // committed themselves.
   330  func (s *Sync) commit(req *request) (err error) {
   331  	// Write the node content to the membatch
   332  	s.membatch.batch[req.hash] = req.data
   333  	s.membatch.order = append(s.membatch.order, req.hash)
   334  
   335  	delete(s.requests, req.hash)
   336  
   337  	// Check all parents for completion
   338  	for _, parent := range req.parents {
   339  		parent.deps--
   340  		if parent.deps == 0 {
   341  			if err := s.commit(parent); err != nil {
   342  				return err
   343  			}
   344  		}
   345  	}
   346  	return nil
   347  }