github.com/yuanzimu/bsc@v1.1.4/eth/protocols/snap/sync.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snap
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"math/big"
    25  	"math/rand"
    26  	"sort"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/ethereum/go-ethereum/common"
    31  	"github.com/ethereum/go-ethereum/common/gopool"
    32  	"github.com/ethereum/go-ethereum/common/math"
    33  	"github.com/ethereum/go-ethereum/core/rawdb"
    34  	"github.com/ethereum/go-ethereum/core/state"
    35  	"github.com/ethereum/go-ethereum/core/state/snapshot"
    36  	"github.com/ethereum/go-ethereum/crypto"
    37  	"github.com/ethereum/go-ethereum/ethdb"
    38  	"github.com/ethereum/go-ethereum/event"
    39  	"github.com/ethereum/go-ethereum/light"
    40  	"github.com/ethereum/go-ethereum/log"
    41  	"github.com/ethereum/go-ethereum/rlp"
    42  	"github.com/ethereum/go-ethereum/trie"
    43  	"golang.org/x/crypto/sha3"
    44  )
    45  
    46  var (
    47  	// emptyRoot is the known root hash of an empty trie.
    48  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    49  
    50  	// emptyCode is the known hash of the empty EVM bytecode.
    51  	emptyCode = crypto.Keccak256Hash(nil)
    52  )
    53  
    54  const (
    55  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    56  	maxRequestSize = 128 * 1024
    57  
    58  	// maxStorageSetRequestCount is the maximum number of contracts to request the
    59  	// storage of in a single query. If this number is too low, we're not filling
    60  	// responses fully and waste round trip times. If it's too high, we're capping
    61  	// responses and waste bandwidth.
    62  	maxStorageSetRequestCount = maxRequestSize / 1024
    63  
    64  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    65  	// single query. If this number is too low, we're not filling responses fully
    66  	// and waste round trip times. If it's too high, we're capping responses and
    67  	// waste bandwidth.
    68  	//
    69  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    70  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    71  	// come close to that, requesting 4x should be a good approximation.
    72  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    73  
    74  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    75  	// a single query. If this number is too low, we're not filling responses fully
    76  	// and waste round trip times. If it's too high, we're capping responses and
    77  	// waste bandwidth.
    78  	maxTrieRequestCount = 256
    79  )
    80  
    81  var (
    82  	// accountConcurrency is the number of chunks to split the account trie into
    83  	// to allow concurrent retrievals.
    84  	accountConcurrency = 16
    85  
    86  	// storageConcurrency is the number of chunks to split the a large contract
    87  	// storage trie into to allow concurrent retrievals.
    88  	storageConcurrency = 16
    89  
    90  	// requestTimeout is the maximum time a peer is allowed to spend on serving
    91  	// a single network request.
    92  	requestTimeout = 15 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
    93  )
    94  
    95  // ErrCancelled is returned from snap syncing if the operation was prematurely
    96  // terminated.
    97  var ErrCancelled = errors.New("sync cancelled")
    98  
    99  // accountRequest tracks a pending account range request to ensure responses are
   100  // to actual requests and to validate any security constraints.
   101  //
   102  // Concurrency note: account requests and responses are handled concurrently from
   103  // the main runloop to allow Merkle proof verifications on the peer's thread and
   104  // to drop on invalid response. The request struct must contain all the data to
   105  // construct the response without accessing runloop internals (i.e. task). That
   106  // is only included to allow the runloop to match a response to the task being
   107  // synced without having yet another set of maps.
   108  type accountRequest struct {
   109  	peer string // Peer to which this request is assigned
   110  	id   uint64 // Request ID of this request
   111  
   112  	deliver chan *accountResponse // Channel to deliver successful response on
   113  	revert  chan *accountRequest  // Channel to deliver request failure on
   114  	cancel  chan struct{}         // Channel to track sync cancellation
   115  	timeout *time.Timer           // Timer to track delivery timeout
   116  	stale   chan struct{}         // Channel to signal the request was dropped
   117  
   118  	origin common.Hash // First account requested to allow continuation checks
   119  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   120  
   121  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   122  }
   123  
   124  // accountResponse is an already Merkle-verified remote response to an account
   125  // range request. It contains the subtrie for the requested account range and
   126  // the database that's going to be filled with the internal nodes on commit.
   127  type accountResponse struct {
   128  	task *accountTask // Task which this request is filling
   129  
   130  	hashes   []common.Hash    // Account hashes in the returned range
   131  	accounts []*state.Account // Expanded accounts in the returned range
   132  
   133  	cont bool // Whether the account range has a continuation
   134  }
   135  
   136  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   137  // actual requests and to validate any security constraints.
   138  //
   139  // Concurrency note: bytecode requests and responses are handled concurrently from
   140  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   141  // to drop on invalid response. The request struct must contain all the data to
   142  // construct the response without accessing runloop internals (i.e. task). That
   143  // is only included to allow the runloop to match a response to the task being
   144  // synced without having yet another set of maps.
   145  type bytecodeRequest struct {
   146  	peer string // Peer to which this request is assigned
   147  	id   uint64 // Request ID of this request
   148  
   149  	deliver chan *bytecodeResponse // Channel to deliver successful response on
   150  	revert  chan *bytecodeRequest  // Channel to deliver request failure on
   151  	cancel  chan struct{}          // Channel to track sync cancellation
   152  	timeout *time.Timer            // Timer to track delivery timeout
   153  	stale   chan struct{}          // Channel to signal the request was dropped
   154  
   155  	hashes []common.Hash // Bytecode hashes to validate responses
   156  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   157  }
   158  
   159  // bytecodeResponse is an already verified remote response to a bytecode request.
   160  type bytecodeResponse struct {
   161  	task *accountTask // Task which this request is filling
   162  
   163  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   164  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   165  }
   166  
   167  // storageRequest tracks a pending storage ranges request to ensure responses are
   168  // to actual requests and to validate any security constraints.
   169  //
   170  // Concurrency note: storage requests and responses are handled concurrently from
   171  // the main runloop to allow Merkel proof verifications on the peer's thread and
   172  // to drop on invalid response. The request struct must contain all the data to
   173  // construct the response without accessing runloop internals (i.e. tasks). That
   174  // is only included to allow the runloop to match a response to the task being
   175  // synced without having yet another set of maps.
   176  type storageRequest struct {
   177  	peer string // Peer to which this request is assigned
   178  	id   uint64 // Request ID of this request
   179  
   180  	deliver chan *storageResponse // Channel to deliver successful response on
   181  	revert  chan *storageRequest  // Channel to deliver request failure on
   182  	cancel  chan struct{}         // Channel to track sync cancellation
   183  	timeout *time.Timer           // Timer to track delivery timeout
   184  	stale   chan struct{}         // Channel to signal the request was dropped
   185  
   186  	accounts []common.Hash // Account hashes to validate responses
   187  	roots    []common.Hash // Storage roots to validate responses
   188  
   189  	origin common.Hash // First storage slot requested to allow continuation checks
   190  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   191  
   192  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   193  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   194  }
   195  
   196  // storageResponse is an already Merkle-verified remote response to a storage
   197  // range request. It contains the subtries for the requested storage ranges and
   198  // the databases that's going to be filled with the internal nodes on commit.
   199  type storageResponse struct {
   200  	mainTask *accountTask // Task which this response belongs to
   201  	subTask  *storageTask // Task which this response is filling
   202  
   203  	accounts []common.Hash // Account hashes requested, may be only partially filled
   204  	roots    []common.Hash // Storage roots requested, may be only partially filled
   205  
   206  	hashes [][]common.Hash // Storage slot hashes in the returned range
   207  	slots  [][][]byte      // Storage slot values in the returned range
   208  
   209  	cont bool // Whether the last storage range has a continuation
   210  }
   211  
   212  // trienodeHealRequest tracks a pending state trie request to ensure responses
   213  // are to actual requests and to validate any security constraints.
   214  //
   215  // Concurrency note: trie node requests and responses are handled concurrently from
   216  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   217  // to drop on invalid response. The request struct must contain all the data to
   218  // construct the response without accessing runloop internals (i.e. task). That
   219  // is only included to allow the runloop to match a response to the task being
   220  // synced without having yet another set of maps.
   221  type trienodeHealRequest struct {
   222  	peer string // Peer to which this request is assigned
   223  	id   uint64 // Request ID of this request
   224  
   225  	deliver chan *trienodeHealResponse // Channel to deliver successful response on
   226  	revert  chan *trienodeHealRequest  // Channel to deliver request failure on
   227  	cancel  chan struct{}              // Channel to track sync cancellation
   228  	timeout *time.Timer                // Timer to track delivery timeout
   229  	stale   chan struct{}              // Channel to signal the request was dropped
   230  
   231  	hashes []common.Hash   // Trie node hashes to validate responses
   232  	paths  []trie.SyncPath // Trie node paths requested for rescheduling
   233  
   234  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   235  }
   236  
   237  // trienodeHealResponse is an already verified remote response to a trie node request.
   238  type trienodeHealResponse struct {
   239  	task *healTask // Task which this request is filling
   240  
   241  	hashes []common.Hash   // Hashes of the trie nodes to avoid double hashing
   242  	paths  []trie.SyncPath // Trie node paths requested for rescheduling missing ones
   243  	nodes  [][]byte        // Actual trie nodes to store into the database (nil = missing)
   244  }
   245  
   246  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   247  // actual requests and to validate any security constraints.
   248  //
   249  // Concurrency note: bytecode requests and responses are handled concurrently from
   250  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   251  // to drop on invalid response. The request struct must contain all the data to
   252  // construct the response without accessing runloop internals (i.e. task). That
   253  // is only included to allow the runloop to match a response to the task being
   254  // synced without having yet another set of maps.
   255  type bytecodeHealRequest struct {
   256  	peer string // Peer to which this request is assigned
   257  	id   uint64 // Request ID of this request
   258  
   259  	deliver chan *bytecodeHealResponse // Channel to deliver successful response on
   260  	revert  chan *bytecodeHealRequest  // Channel to deliver request failure on
   261  	cancel  chan struct{}              // Channel to track sync cancellation
   262  	timeout *time.Timer                // Timer to track delivery timeout
   263  	stale   chan struct{}              // Channel to signal the request was dropped
   264  
   265  	hashes []common.Hash // Bytecode hashes to validate responses
   266  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   267  }
   268  
   269  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   270  type bytecodeHealResponse struct {
   271  	task *healTask // Task which this request is filling
   272  
   273  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   274  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   275  }
   276  
   277  // accountTask represents the sync task for a chunk of the account snapshot.
   278  type accountTask struct {
   279  	// These fields get serialized to leveldb on shutdown
   280  	Next     common.Hash                    // Next account to sync in this interval
   281  	Last     common.Hash                    // Last account to sync in this interval
   282  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   283  
   284  	// These fields are internals used during runtime
   285  	req  *accountRequest  // Pending request to fill this task
   286  	res  *accountResponse // Validate response filling this task
   287  	pend int              // Number of pending subtasks for this round
   288  
   289  	needCode  []bool // Flags whether the filling accounts need code retrieval
   290  	needState []bool // Flags whether the filling accounts need storage retrieval
   291  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   292  
   293  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   294  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   295  
   296  	genBatch ethdb.Batch     // Batch used by the node generator
   297  	genTrie  *trie.StackTrie // Node generator from storage slots
   298  
   299  	done bool // Flag whether the task can be removed
   300  }
   301  
   302  // storageTask represents the sync task for a chunk of the storage snapshot.
   303  type storageTask struct {
   304  	Next common.Hash // Next account to sync in this interval
   305  	Last common.Hash // Last account to sync in this interval
   306  
   307  	// These fields are internals used during runtime
   308  	root common.Hash     // Storage root hash for this instance
   309  	req  *storageRequest // Pending request to fill this task
   310  
   311  	genBatch ethdb.Batch     // Batch used by the node generator
   312  	genTrie  *trie.StackTrie // Node generator from storage slots
   313  
   314  	done bool // Flag whether the task can be removed
   315  }
   316  
   317  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   318  type healTask struct {
   319  	scheduler *trie.Sync // State trie sync scheduler defining the tasks
   320  
   321  	trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval
   322  	codeTasks map[common.Hash]struct{}      // Set of byte code tasks currently queued for retrieval
   323  }
   324  
   325  // syncProgress is a database entry to allow suspending and resuming a snapshot state
   326  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   327  // snap sync without prior knowledge of the suspension point.
   328  type syncProgress struct {
   329  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   330  
   331  	// Status report during syncing phase
   332  	AccountSynced  uint64             // Number of accounts downloaded
   333  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   334  	BytecodeSynced uint64             // Number of bytecodes downloaded
   335  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   336  	StorageSynced  uint64             // Number of storage slots downloaded
   337  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   338  
   339  	// Status report during healing phase
   340  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   341  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   342  	TrienodeHealDups   uint64             // Number of state trie nodes already processed
   343  	TrienodeHealNops   uint64             // Number of state trie nodes not requested
   344  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   345  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   346  	BytecodeHealDups   uint64             // Number of bytecodes already processed
   347  	BytecodeHealNops   uint64             // Number of bytecodes not requested
   348  }
   349  
   350  // SyncPeer abstracts out the methods required for a peer to be synced against
   351  // with the goal of allowing the construction of mock peers without the full
   352  // blown networking.
   353  type SyncPeer interface {
   354  	// ID retrieves the peer's unique identifier.
   355  	ID() string
   356  
   357  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   358  	// trie, starting with the origin.
   359  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   360  
   361  	// RequestStorageRanges fetches a batch of storage slots belonging to one or
   362  	// more accounts. If slots from only one accout is requested, an origin marker
   363  	// may also be used to retrieve from there.
   364  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   365  
   366  	// RequestByteCodes fetches a batch of bytecodes by hash.
   367  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   368  
   369  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   370  	// a specificstate trie.
   371  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   372  
   373  	// Log retrieves the peer's own contextual logger.
   374  	Log() log.Logger
   375  }
   376  
   377  // Syncer is an Ethereum account and storage trie syncer based on snapshots and
   378  // the  snap protocol. It's purpose is to download all the accounts and storage
   379  // slots from remote peers and reassemble chunks of the state trie, on top of
   380  // which a state sync can be run to fix any gaps / overlaps.
   381  //
   382  // Every network request has a variety of failure events:
   383  //   - The peer disconnects after task assignment, failing to send the request
   384  //   - The peer disconnects after sending the request, before delivering on it
   385  //   - The peer remains connected, but does not deliver a response in time
   386  //   - The peer delivers a stale response after a previous timeout
   387  //   - The peer delivers a refusal to serve the requested state
   388  type Syncer struct {
   389  	db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup)
   390  
   391  	root    common.Hash    // Current state trie root being synced
   392  	tasks   []*accountTask // Current account task set being synced
   393  	snapped bool           // Flag to signal that snap phase is done
   394  	healer  *healTask      // Current state healing task being executed
   395  	update  chan struct{}  // Notification channel for possible sync progression
   396  
   397  	peers    map[string]SyncPeer // Currently active peers to download from
   398  	peerJoin *event.Feed         // Event feed to react to peers joining
   399  	peerDrop *event.Feed         // Event feed to react to peers dropping
   400  
   401  	// Request tracking during syncing phase
   402  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   403  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   404  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   405  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   406  
   407  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   408  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   409  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   410  
   411  	accountSynced  uint64             // Number of accounts downloaded
   412  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   413  	bytecodeSynced uint64             // Number of bytecodes downloaded
   414  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   415  	storageSynced  uint64             // Number of storage slots downloaded
   416  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   417  
   418  	// Request tracking during healing phase
   419  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   420  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   421  
   422  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   423  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   424  
   425  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   426  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   427  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   428  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   429  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   430  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   431  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   432  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   433  
   434  	stateWriter        ethdb.Batch        // Shared batch writer used for persisting raw states
   435  	accountHealed      uint64             // Number of accounts downloaded during the healing stage
   436  	accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage
   437  	storageHealed      uint64             // Number of storage slots downloaded during the healing stage
   438  	storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage
   439  
   440  	startTime time.Time // Time instance when snapshot sync started
   441  	logTime   time.Time // Time instance when status was last reported
   442  
   443  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   444  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   445  }
   446  
   447  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   448  // snap protocol.
   449  func NewSyncer(db ethdb.KeyValueStore) *Syncer {
   450  	return &Syncer{
   451  		db: db,
   452  
   453  		peers:    make(map[string]SyncPeer),
   454  		peerJoin: new(event.Feed),
   455  		peerDrop: new(event.Feed),
   456  		update:   make(chan struct{}, 1),
   457  
   458  		accountIdlers:  make(map[string]struct{}),
   459  		storageIdlers:  make(map[string]struct{}),
   460  		bytecodeIdlers: make(map[string]struct{}),
   461  
   462  		accountReqs:  make(map[uint64]*accountRequest),
   463  		storageReqs:  make(map[uint64]*storageRequest),
   464  		bytecodeReqs: make(map[uint64]*bytecodeRequest),
   465  
   466  		trienodeHealIdlers: make(map[string]struct{}),
   467  		bytecodeHealIdlers: make(map[string]struct{}),
   468  
   469  		trienodeHealReqs: make(map[uint64]*trienodeHealRequest),
   470  		bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest),
   471  		stateWriter:      db.NewBatch(),
   472  	}
   473  }
   474  
   475  // Register injects a new data source into the syncer's peerset.
   476  func (s *Syncer) Register(peer SyncPeer) error {
   477  	// Make sure the peer is not registered yet
   478  	id := peer.ID()
   479  
   480  	s.lock.Lock()
   481  	if _, ok := s.peers[id]; ok {
   482  		log.Error("Snap peer already registered", "id", id)
   483  
   484  		s.lock.Unlock()
   485  		return errors.New("already registered")
   486  	}
   487  	s.peers[id] = peer
   488  
   489  	// Mark the peer as idle, even if no sync is running
   490  	s.accountIdlers[id] = struct{}{}
   491  	s.storageIdlers[id] = struct{}{}
   492  	s.bytecodeIdlers[id] = struct{}{}
   493  	s.trienodeHealIdlers[id] = struct{}{}
   494  	s.bytecodeHealIdlers[id] = struct{}{}
   495  	s.lock.Unlock()
   496  
   497  	// Notify any active syncs that a new peer can be assigned data
   498  	s.peerJoin.Send(id)
   499  	return nil
   500  }
   501  
   502  // Unregister injects a new data source into the syncer's peerset.
   503  func (s *Syncer) Unregister(id string) error {
   504  	// Remove all traces of the peer from the registry
   505  	s.lock.Lock()
   506  	if _, ok := s.peers[id]; !ok {
   507  		log.Error("Snap peer not registered", "id", id)
   508  
   509  		s.lock.Unlock()
   510  		return errors.New("not registered")
   511  	}
   512  	delete(s.peers, id)
   513  
   514  	// Remove status markers, even if no sync is running
   515  	delete(s.statelessPeers, id)
   516  
   517  	delete(s.accountIdlers, id)
   518  	delete(s.storageIdlers, id)
   519  	delete(s.bytecodeIdlers, id)
   520  	delete(s.trienodeHealIdlers, id)
   521  	delete(s.bytecodeHealIdlers, id)
   522  	s.lock.Unlock()
   523  
   524  	// Notify any active syncs that pending requests need to be reverted
   525  	s.peerDrop.Send(id)
   526  	return nil
   527  }
   528  
   529  // Sync starts (or resumes a previous) sync cycle to iterate over an state trie
   530  // with the given root and reconstruct the nodes based on the snapshot leaves.
   531  // Previously downloaded segments will not be redownloaded of fixed, rather any
   532  // errors will be healed after the leaves are fully accumulated.
   533  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   534  	// Move the trie root from any previous value, revert stateless markers for
   535  	// any peers and initialize the syncer if it was not yet run
   536  	s.lock.Lock()
   537  	s.root = root
   538  	s.healer = &healTask{
   539  		scheduler: state.NewStateSync(root, s.db, nil, s.onHealState),
   540  		trieTasks: make(map[common.Hash]trie.SyncPath),
   541  		codeTasks: make(map[common.Hash]struct{}),
   542  	}
   543  	s.statelessPeers = make(map[string]struct{})
   544  	s.lock.Unlock()
   545  
   546  	if s.startTime == (time.Time{}) {
   547  		s.startTime = time.Now()
   548  	}
   549  	// Retrieve the previous sync status from LevelDB and abort if already synced
   550  	s.loadSyncStatus()
   551  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   552  		log.Debug("Snapshot sync already completed")
   553  		return nil
   554  	}
   555  	defer func() { // Persist any progress, independent of failure
   556  		for _, task := range s.tasks {
   557  			s.forwardAccountTask(task)
   558  		}
   559  		s.cleanAccountTasks()
   560  		s.saveSyncStatus()
   561  	}()
   562  
   563  	log.Debug("Starting snapshot sync cycle", "root", root)
   564  
   565  	// Flush out the last committed raw states
   566  	defer func() {
   567  		if s.stateWriter.ValueSize() > 0 {
   568  			s.stateWriter.Write()
   569  			s.stateWriter.Reset()
   570  		}
   571  	}()
   572  	defer s.report(true)
   573  
   574  	// Whether sync completed or not, disregard any future packets
   575  	defer func() {
   576  		log.Debug("Terminating snapshot sync cycle", "root", root)
   577  		s.lock.Lock()
   578  		s.accountReqs = make(map[uint64]*accountRequest)
   579  		s.storageReqs = make(map[uint64]*storageRequest)
   580  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   581  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   582  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   583  		s.lock.Unlock()
   584  	}()
   585  	// Keep scheduling sync tasks
   586  	peerJoin := make(chan string, 16)
   587  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   588  	defer peerJoinSub.Unsubscribe()
   589  
   590  	peerDrop := make(chan string, 16)
   591  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   592  	defer peerDropSub.Unsubscribe()
   593  
   594  	// Create a set of unique channels for this sync cycle. We need these to be
   595  	// ephemeral so a data race doesn't accidentally deliver something stale on
   596  	// a persistent channel across syncs (yup, this happened)
   597  	var (
   598  		accountReqFails      = make(chan *accountRequest)
   599  		storageReqFails      = make(chan *storageRequest)
   600  		bytecodeReqFails     = make(chan *bytecodeRequest)
   601  		accountResps         = make(chan *accountResponse)
   602  		storageResps         = make(chan *storageResponse)
   603  		bytecodeResps        = make(chan *bytecodeResponse)
   604  		trienodeHealReqFails = make(chan *trienodeHealRequest)
   605  		bytecodeHealReqFails = make(chan *bytecodeHealRequest)
   606  		trienodeHealResps    = make(chan *trienodeHealResponse)
   607  		bytecodeHealResps    = make(chan *bytecodeHealResponse)
   608  	)
   609  	for {
   610  		// Remove all completed tasks and terminate sync if everything's done
   611  		s.cleanStorageTasks()
   612  		s.cleanAccountTasks()
   613  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   614  			return nil
   615  		}
   616  		// Assign all the data retrieval tasks to any free peers
   617  		s.assignAccountTasks(accountResps, accountReqFails, cancel)
   618  		s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel)
   619  		s.assignStorageTasks(storageResps, storageReqFails, cancel)
   620  
   621  		if len(s.tasks) == 0 {
   622  			// Sync phase done, run heal phase
   623  			s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
   624  			s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
   625  		}
   626  		// Wait for something to happen
   627  		select {
   628  		case <-s.update:
   629  			// Something happened (new peer, delivery, timeout), recheck tasks
   630  		case <-peerJoin:
   631  			// A new peer joined, try to schedule it new tasks
   632  		case id := <-peerDrop:
   633  			s.revertRequests(id)
   634  		case <-cancel:
   635  			return ErrCancelled
   636  
   637  		case req := <-accountReqFails:
   638  			s.revertAccountRequest(req)
   639  		case req := <-bytecodeReqFails:
   640  			s.revertBytecodeRequest(req)
   641  		case req := <-storageReqFails:
   642  			s.revertStorageRequest(req)
   643  		case req := <-trienodeHealReqFails:
   644  			s.revertTrienodeHealRequest(req)
   645  		case req := <-bytecodeHealReqFails:
   646  			s.revertBytecodeHealRequest(req)
   647  
   648  		case res := <-accountResps:
   649  			s.processAccountResponse(res)
   650  		case res := <-bytecodeResps:
   651  			s.processBytecodeResponse(res)
   652  		case res := <-storageResps:
   653  			s.processStorageResponse(res)
   654  		case res := <-trienodeHealResps:
   655  			s.processTrienodeHealResponse(res)
   656  		case res := <-bytecodeHealResps:
   657  			s.processBytecodeHealResponse(res)
   658  		}
   659  		// Report stats if something meaningful happened
   660  		s.report(false)
   661  	}
   662  }
   663  
   664  // loadSyncStatus retrieves a previously aborted sync status from the database,
   665  // or generates a fresh one if none is available.
   666  func (s *Syncer) loadSyncStatus() {
   667  	var progress syncProgress
   668  
   669  	if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil {
   670  		if err := json.Unmarshal(status, &progress); err != nil {
   671  			log.Error("Failed to decode snap sync status", "err", err)
   672  		} else {
   673  			for _, task := range progress.Tasks {
   674  				log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   675  			}
   676  			s.tasks = progress.Tasks
   677  			for _, task := range s.tasks {
   678  				task.genBatch = ethdb.HookedBatch{
   679  					Batch: s.db.NewBatch(),
   680  					OnPut: func(key []byte, value []byte) {
   681  						s.accountBytes += common.StorageSize(len(key) + len(value))
   682  					},
   683  				}
   684  				task.genTrie = trie.NewStackTrie(task.genBatch)
   685  
   686  				for _, subtasks := range task.SubTasks {
   687  					for _, subtask := range subtasks {
   688  						subtask.genBatch = ethdb.HookedBatch{
   689  							Batch: s.db.NewBatch(),
   690  							OnPut: func(key []byte, value []byte) {
   691  								s.storageBytes += common.StorageSize(len(key) + len(value))
   692  							},
   693  						}
   694  						subtask.genTrie = trie.NewStackTrie(subtask.genBatch)
   695  					}
   696  				}
   697  			}
   698  			s.snapped = len(s.tasks) == 0
   699  
   700  			s.accountSynced = progress.AccountSynced
   701  			s.accountBytes = progress.AccountBytes
   702  			s.bytecodeSynced = progress.BytecodeSynced
   703  			s.bytecodeBytes = progress.BytecodeBytes
   704  			s.storageSynced = progress.StorageSynced
   705  			s.storageBytes = progress.StorageBytes
   706  
   707  			s.trienodeHealSynced = progress.TrienodeHealSynced
   708  			s.trienodeHealBytes = progress.TrienodeHealBytes
   709  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   710  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   711  			return
   712  		}
   713  	}
   714  	// Either we've failed to decode the previus state, or there was none.
   715  	// Start a fresh sync by chunking up the account range and scheduling
   716  	// them for retrieval.
   717  	s.tasks = nil
   718  	s.accountSynced, s.accountBytes = 0, 0
   719  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   720  	s.storageSynced, s.storageBytes = 0, 0
   721  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   722  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   723  
   724  	var next common.Hash
   725  	step := new(big.Int).Sub(
   726  		new(big.Int).Div(
   727  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   728  			big.NewInt(int64(accountConcurrency)),
   729  		), common.Big1,
   730  	)
   731  	for i := 0; i < accountConcurrency; i++ {
   732  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   733  		if i == accountConcurrency-1 {
   734  			// Make sure we don't overflow if the step is not a proper divisor
   735  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   736  		}
   737  		batch := ethdb.HookedBatch{
   738  			Batch: s.db.NewBatch(),
   739  			OnPut: func(key []byte, value []byte) {
   740  				s.accountBytes += common.StorageSize(len(key) + len(value))
   741  			},
   742  		}
   743  		s.tasks = append(s.tasks, &accountTask{
   744  			Next:     next,
   745  			Last:     last,
   746  			SubTasks: make(map[common.Hash][]*storageTask),
   747  			genBatch: batch,
   748  			genTrie:  trie.NewStackTrie(batch),
   749  		})
   750  		log.Debug("Created account sync task", "from", next, "last", last)
   751  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   752  	}
   753  }
   754  
   755  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   756  func (s *Syncer) saveSyncStatus() {
   757  	// Serialize any partial progress to disk before spinning down
   758  	for _, task := range s.tasks {
   759  		if err := task.genBatch.Write(); err != nil {
   760  			log.Error("Failed to persist account slots", "err", err)
   761  		}
   762  		for _, subtasks := range task.SubTasks {
   763  			for _, subtask := range subtasks {
   764  				if err := subtask.genBatch.Write(); err != nil {
   765  					log.Error("Failed to persist storage slots", "err", err)
   766  				}
   767  			}
   768  		}
   769  	}
   770  	// Store the actual progress markers
   771  	progress := &syncProgress{
   772  		Tasks:              s.tasks,
   773  		AccountSynced:      s.accountSynced,
   774  		AccountBytes:       s.accountBytes,
   775  		BytecodeSynced:     s.bytecodeSynced,
   776  		BytecodeBytes:      s.bytecodeBytes,
   777  		StorageSynced:      s.storageSynced,
   778  		StorageBytes:       s.storageBytes,
   779  		TrienodeHealSynced: s.trienodeHealSynced,
   780  		TrienodeHealBytes:  s.trienodeHealBytes,
   781  		BytecodeHealSynced: s.bytecodeHealSynced,
   782  		BytecodeHealBytes:  s.bytecodeHealBytes,
   783  	}
   784  	status, err := json.Marshal(progress)
   785  	if err != nil {
   786  		panic(err) // This can only fail during implementation
   787  	}
   788  	rawdb.WriteSnapshotSyncStatus(s.db, status)
   789  }
   790  
   791  // cleanAccountTasks removes account range retrieval tasks that have already been
   792  // completed.
   793  func (s *Syncer) cleanAccountTasks() {
   794  	// If the sync was already done before, don't even bother
   795  	if len(s.tasks) == 0 {
   796  		return
   797  	}
   798  	// Sync wasn't finished previously, check for any task that can be finalized
   799  	for i := 0; i < len(s.tasks); i++ {
   800  		if s.tasks[i].done {
   801  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   802  			i--
   803  		}
   804  	}
   805  	// If everything was just finalized just, generate the account trie and start heal
   806  	if len(s.tasks) == 0 {
   807  		s.lock.Lock()
   808  		s.snapped = true
   809  		s.lock.Unlock()
   810  
   811  		// Push the final sync report
   812  		s.reportSyncProgress(true)
   813  	}
   814  }
   815  
   816  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   817  // within, cleaning any that have been completed.
   818  func (s *Syncer) cleanStorageTasks() {
   819  	for _, task := range s.tasks {
   820  		for account, subtasks := range task.SubTasks {
   821  			// Remove storage range retrieval tasks that completed
   822  			for j := 0; j < len(subtasks); j++ {
   823  				if subtasks[j].done {
   824  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   825  					j--
   826  				}
   827  			}
   828  			if len(subtasks) > 0 {
   829  				task.SubTasks[account] = subtasks
   830  				continue
   831  			}
   832  			// If all storage chunks are done, mark the account as done too
   833  			for j, hash := range task.res.hashes {
   834  				if hash == account {
   835  					task.needState[j] = false
   836  				}
   837  			}
   838  			delete(task.SubTasks, account)
   839  			task.pend--
   840  
   841  			// If this was the last pending task, forward the account task
   842  			if task.pend == 0 {
   843  				s.forwardAccountTask(task)
   844  			}
   845  		}
   846  	}
   847  }
   848  
   849  // assignAccountTasks attempts to match idle peers to pending account range
   850  // retrievals.
   851  func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) {
   852  	s.lock.Lock()
   853  	defer s.lock.Unlock()
   854  
   855  	// If there are no idle peers, short circuit assignment
   856  	if len(s.accountIdlers) == 0 {
   857  		return
   858  	}
   859  	// Iterate over all the tasks and try to find a pending one
   860  	for _, task := range s.tasks {
   861  		// Skip any tasks already filling
   862  		if task.req != nil || task.res != nil {
   863  			continue
   864  		}
   865  		// Task pending retrieval, try to find an idle peer. If no such peer
   866  		// exists, we probably assigned tasks for all (or they are stateless).
   867  		// Abort the entire assignment mechanism.
   868  		var idle string
   869  		for id := range s.accountIdlers {
   870  			// If the peer rejected a query in this sync cycle, don't bother asking
   871  			// again for anything, it's either out of sync or already pruned
   872  			if _, ok := s.statelessPeers[id]; ok {
   873  				continue
   874  			}
   875  			idle = id
   876  			break
   877  		}
   878  		if idle == "" {
   879  			return
   880  		}
   881  		peer := s.peers[idle]
   882  
   883  		// Matched a pending task to an idle peer, allocate a unique request id
   884  		var reqid uint64
   885  		for {
   886  			reqid = uint64(rand.Int63())
   887  			if reqid == 0 {
   888  				continue
   889  			}
   890  			if _, ok := s.accountReqs[reqid]; ok {
   891  				continue
   892  			}
   893  			break
   894  		}
   895  		// Generate the network query and send it to the peer
   896  		req := &accountRequest{
   897  			peer:    idle,
   898  			id:      reqid,
   899  			deliver: success,
   900  			revert:  fail,
   901  			cancel:  cancel,
   902  			stale:   make(chan struct{}),
   903  			origin:  task.Next,
   904  			limit:   task.Last,
   905  			task:    task,
   906  		}
   907  		req.timeout = time.AfterFunc(requestTimeout, func() {
   908  			peer.Log().Debug("Account range request timed out", "reqid", reqid)
   909  			s.scheduleRevertAccountRequest(req)
   910  		})
   911  		s.accountReqs[reqid] = req
   912  		delete(s.accountIdlers, idle)
   913  
   914  		s.pend.Add(1)
   915  		root := s.root
   916  		gopool.Submit(func() {
   917  			defer s.pend.Done()
   918  
   919  			// Attempt to send the remote request and revert if it fails
   920  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil {
   921  				peer.Log().Debug("Failed to request account range", "err", err)
   922  				s.scheduleRevertAccountRequest(req)
   923  			}
   924  		})
   925  
   926  		// Inject the request into the task to block further assignments
   927  		task.req = req
   928  	}
   929  }
   930  
   931  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
   932  func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) {
   933  	s.lock.Lock()
   934  	defer s.lock.Unlock()
   935  
   936  	// If there are no idle peers, short circuit assignment
   937  	if len(s.bytecodeIdlers) == 0 {
   938  		return
   939  	}
   940  	// Iterate over all the tasks and try to find a pending one
   941  	for _, task := range s.tasks {
   942  		// Skip any tasks not in the bytecode retrieval phase
   943  		if task.res == nil {
   944  			continue
   945  		}
   946  		// Skip tasks that are already retrieving (or done with) all codes
   947  		if len(task.codeTasks) == 0 {
   948  			continue
   949  		}
   950  		// Task pending retrieval, try to find an idle peer. If no such peer
   951  		// exists, we probably assigned tasks for all (or they are stateless).
   952  		// Abort the entire assignment mechanism.
   953  		var idle string
   954  		for id := range s.bytecodeIdlers {
   955  			// If the peer rejected a query in this sync cycle, don't bother asking
   956  			// again for anything, it's either out of sync or already pruned
   957  			if _, ok := s.statelessPeers[id]; ok {
   958  				continue
   959  			}
   960  			idle = id
   961  			break
   962  		}
   963  		if idle == "" {
   964  			return
   965  		}
   966  		peer := s.peers[idle]
   967  
   968  		// Matched a pending task to an idle peer, allocate a unique request id
   969  		var reqid uint64
   970  		for {
   971  			reqid = uint64(rand.Int63())
   972  			if reqid == 0 {
   973  				continue
   974  			}
   975  			if _, ok := s.bytecodeReqs[reqid]; ok {
   976  				continue
   977  			}
   978  			break
   979  		}
   980  		// Generate the network query and send it to the peer
   981  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
   982  		for hash := range task.codeTasks {
   983  			delete(task.codeTasks, hash)
   984  			hashes = append(hashes, hash)
   985  			if len(hashes) >= maxCodeRequestCount {
   986  				break
   987  			}
   988  		}
   989  		req := &bytecodeRequest{
   990  			peer:    idle,
   991  			id:      reqid,
   992  			deliver: success,
   993  			revert:  fail,
   994  			cancel:  cancel,
   995  			stale:   make(chan struct{}),
   996  			hashes:  hashes,
   997  			task:    task,
   998  		}
   999  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1000  			peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
  1001  			s.scheduleRevertBytecodeRequest(req)
  1002  		})
  1003  		s.bytecodeReqs[reqid] = req
  1004  		delete(s.bytecodeIdlers, idle)
  1005  
  1006  		s.pend.Add(1)
  1007  		gopool.Submit(func() {
  1008  			defer s.pend.Done()
  1009  
  1010  			// Attempt to send the remote request and revert if it fails
  1011  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1012  				log.Debug("Failed to request bytecodes", "err", err)
  1013  				s.scheduleRevertBytecodeRequest(req)
  1014  			}
  1015  		})
  1016  	}
  1017  }
  1018  
  1019  // assignStorageTasks attempts to match idle peers to pending storage range
  1020  // retrievals.
  1021  func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) {
  1022  	s.lock.Lock()
  1023  	defer s.lock.Unlock()
  1024  
  1025  	// If there are no idle peers, short circuit assignment
  1026  	if len(s.storageIdlers) == 0 {
  1027  		return
  1028  	}
  1029  	// Iterate over all the tasks and try to find a pending one
  1030  	for _, task := range s.tasks {
  1031  		// Skip any tasks not in the storage retrieval phase
  1032  		if task.res == nil {
  1033  			continue
  1034  		}
  1035  		// Skip tasks that are already retrieving (or done with) all small states
  1036  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
  1037  			continue
  1038  		}
  1039  		// Task pending retrieval, try to find an idle peer. If no such peer
  1040  		// exists, we probably assigned tasks for all (or they are stateless).
  1041  		// Abort the entire assignment mechanism.
  1042  		var idle string
  1043  		for id := range s.storageIdlers {
  1044  			// If the peer rejected a query in this sync cycle, don't bother asking
  1045  			// again for anything, it's either out of sync or already pruned
  1046  			if _, ok := s.statelessPeers[id]; ok {
  1047  				continue
  1048  			}
  1049  			idle = id
  1050  			break
  1051  		}
  1052  		if idle == "" {
  1053  			return
  1054  		}
  1055  		peer := s.peers[idle]
  1056  
  1057  		// Matched a pending task to an idle peer, allocate a unique request id
  1058  		var reqid uint64
  1059  		for {
  1060  			reqid = uint64(rand.Int63())
  1061  			if reqid == 0 {
  1062  				continue
  1063  			}
  1064  			if _, ok := s.storageReqs[reqid]; ok {
  1065  				continue
  1066  			}
  1067  			break
  1068  		}
  1069  		// Generate the network query and send it to the peer. If there are
  1070  		// large contract tasks pending, complete those before diving into
  1071  		// even more new contracts.
  1072  		var (
  1073  			accounts = make([]common.Hash, 0, maxStorageSetRequestCount)
  1074  			roots    = make([]common.Hash, 0, maxStorageSetRequestCount)
  1075  			subtask  *storageTask
  1076  		)
  1077  		for account, subtasks := range task.SubTasks {
  1078  			for _, st := range subtasks {
  1079  				// Skip any subtasks already filling
  1080  				if st.req != nil {
  1081  					continue
  1082  				}
  1083  				// Found an incomplete storage chunk, schedule it
  1084  				accounts = append(accounts, account)
  1085  				roots = append(roots, st.root)
  1086  				subtask = st
  1087  				break // Large contract chunks are downloaded individually
  1088  			}
  1089  			if subtask != nil {
  1090  				break // Large contract chunks are downloaded individually
  1091  			}
  1092  		}
  1093  		if subtask == nil {
  1094  			// No large contract required retrieval, but small ones available
  1095  			for acccount, root := range task.stateTasks {
  1096  				delete(task.stateTasks, acccount)
  1097  
  1098  				accounts = append(accounts, acccount)
  1099  				roots = append(roots, root)
  1100  
  1101  				if len(accounts) >= maxStorageSetRequestCount {
  1102  					break
  1103  				}
  1104  			}
  1105  		}
  1106  		// If nothing was found, it means this task is actually already fully
  1107  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1108  		if len(accounts) == 0 {
  1109  			continue
  1110  		}
  1111  		req := &storageRequest{
  1112  			peer:     idle,
  1113  			id:       reqid,
  1114  			deliver:  success,
  1115  			revert:   fail,
  1116  			cancel:   cancel,
  1117  			stale:    make(chan struct{}),
  1118  			accounts: accounts,
  1119  			roots:    roots,
  1120  			mainTask: task,
  1121  			subTask:  subtask,
  1122  		}
  1123  		if subtask != nil {
  1124  			req.origin = subtask.Next
  1125  			req.limit = subtask.Last
  1126  		}
  1127  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1128  			peer.Log().Debug("Storage request timed out", "reqid", reqid)
  1129  			s.scheduleRevertStorageRequest(req)
  1130  		})
  1131  		s.storageReqs[reqid] = req
  1132  		delete(s.storageIdlers, idle)
  1133  
  1134  		s.pend.Add(1)
  1135  		root := s.root
  1136  		gopool.Submit(func() {
  1137  			defer s.pend.Done()
  1138  
  1139  			// Attempt to send the remote request and revert if it fails
  1140  			var origin, limit []byte
  1141  			if subtask != nil {
  1142  				origin, limit = req.origin[:], req.limit[:]
  1143  			}
  1144  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil {
  1145  				log.Debug("Failed to request storage", "err", err)
  1146  				s.scheduleRevertStorageRequest(req)
  1147  			}
  1148  		})
  1149  
  1150  		// Inject the request into the subtask to block further assignments
  1151  		if subtask != nil {
  1152  			subtask.req = req
  1153  		}
  1154  	}
  1155  }
  1156  
  1157  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1158  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1159  func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) {
  1160  	s.lock.Lock()
  1161  	defer s.lock.Unlock()
  1162  
  1163  	// If there are no idle peers, short circuit assignment
  1164  	if len(s.trienodeHealIdlers) == 0 {
  1165  		return
  1166  	}
  1167  	// Iterate over pending tasks and try to find a peer to retrieve with
  1168  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1169  		// If there are not enough trie tasks queued to fully assign, fill the
  1170  		// queue from the state sync scheduler. The trie synced schedules these
  1171  		// together with bytecodes, so we need to queue them combined.
  1172  		var (
  1173  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1174  			want = maxTrieRequestCount + maxCodeRequestCount
  1175  		)
  1176  		if have < want {
  1177  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1178  			for i, hash := range nodes {
  1179  				s.healer.trieTasks[hash] = paths[i]
  1180  			}
  1181  			for _, hash := range codes {
  1182  				s.healer.codeTasks[hash] = struct{}{}
  1183  			}
  1184  		}
  1185  		// If all the heal tasks are bytecodes or already downloading, bail
  1186  		if len(s.healer.trieTasks) == 0 {
  1187  			return
  1188  		}
  1189  		// Task pending retrieval, try to find an idle peer. If no such peer
  1190  		// exists, we probably assigned tasks for all (or they are stateless).
  1191  		// Abort the entire assignment mechanism.
  1192  		var idle string
  1193  		for id := range s.trienodeHealIdlers {
  1194  			// If the peer rejected a query in this sync cycle, don't bother asking
  1195  			// again for anything, it's either out of sync or already pruned
  1196  			if _, ok := s.statelessPeers[id]; ok {
  1197  				continue
  1198  			}
  1199  			idle = id
  1200  			break
  1201  		}
  1202  		if idle == "" {
  1203  			return
  1204  		}
  1205  		peer := s.peers[idle]
  1206  
  1207  		// Matched a pending task to an idle peer, allocate a unique request id
  1208  		var reqid uint64
  1209  		for {
  1210  			reqid = uint64(rand.Int63())
  1211  			if reqid == 0 {
  1212  				continue
  1213  			}
  1214  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1215  				continue
  1216  			}
  1217  			break
  1218  		}
  1219  		// Generate the network query and send it to the peer
  1220  		var (
  1221  			hashes   = make([]common.Hash, 0, maxTrieRequestCount)
  1222  			paths    = make([]trie.SyncPath, 0, maxTrieRequestCount)
  1223  			pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount)
  1224  		)
  1225  		for hash, pathset := range s.healer.trieTasks {
  1226  			delete(s.healer.trieTasks, hash)
  1227  
  1228  			hashes = append(hashes, hash)
  1229  			paths = append(paths, pathset)
  1230  			pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash
  1231  
  1232  			if len(hashes) >= maxTrieRequestCount {
  1233  				break
  1234  			}
  1235  		}
  1236  		req := &trienodeHealRequest{
  1237  			peer:    idle,
  1238  			id:      reqid,
  1239  			deliver: success,
  1240  			revert:  fail,
  1241  			cancel:  cancel,
  1242  			stale:   make(chan struct{}),
  1243  			hashes:  hashes,
  1244  			paths:   paths,
  1245  			task:    s.healer,
  1246  		}
  1247  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1248  			peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
  1249  			s.scheduleRevertTrienodeHealRequest(req)
  1250  		})
  1251  		s.trienodeHealReqs[reqid] = req
  1252  		delete(s.trienodeHealIdlers, idle)
  1253  
  1254  		s.pend.Add(1)
  1255  		root := s.root
  1256  		gopool.Submit(func() {
  1257  			defer s.pend.Done()
  1258  
  1259  			// Attempt to send the remote request and revert if it fails
  1260  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1261  				log.Debug("Failed to request trienode healers", "err", err)
  1262  				s.scheduleRevertTrienodeHealRequest(req)
  1263  			}
  1264  		})
  1265  	}
  1266  }
  1267  
  1268  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1269  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1270  func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) {
  1271  	s.lock.Lock()
  1272  	defer s.lock.Unlock()
  1273  
  1274  	// If there are no idle peers, short circuit assignment
  1275  	if len(s.bytecodeHealIdlers) == 0 {
  1276  		return
  1277  	}
  1278  	// Iterate over pending tasks and try to find a peer to retrieve with
  1279  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1280  		// If there are not enough trie tasks queued to fully assign, fill the
  1281  		// queue from the state sync scheduler. The trie synced schedules these
  1282  		// together with trie nodes, so we need to queue them combined.
  1283  		var (
  1284  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1285  			want = maxTrieRequestCount + maxCodeRequestCount
  1286  		)
  1287  		if have < want {
  1288  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1289  			for i, hash := range nodes {
  1290  				s.healer.trieTasks[hash] = paths[i]
  1291  			}
  1292  			for _, hash := range codes {
  1293  				s.healer.codeTasks[hash] = struct{}{}
  1294  			}
  1295  		}
  1296  		// If all the heal tasks are trienodes or already downloading, bail
  1297  		if len(s.healer.codeTasks) == 0 {
  1298  			return
  1299  		}
  1300  		// Task pending retrieval, try to find an idle peer. If no such peer
  1301  		// exists, we probably assigned tasks for all (or they are stateless).
  1302  		// Abort the entire assignment mechanism.
  1303  		var idle string
  1304  		for id := range s.bytecodeHealIdlers {
  1305  			// If the peer rejected a query in this sync cycle, don't bother asking
  1306  			// again for anything, it's either out of sync or already pruned
  1307  			if _, ok := s.statelessPeers[id]; ok {
  1308  				continue
  1309  			}
  1310  			idle = id
  1311  			break
  1312  		}
  1313  		if idle == "" {
  1314  			return
  1315  		}
  1316  		peer := s.peers[idle]
  1317  
  1318  		// Matched a pending task to an idle peer, allocate a unique request id
  1319  		var reqid uint64
  1320  		for {
  1321  			reqid = uint64(rand.Int63())
  1322  			if reqid == 0 {
  1323  				continue
  1324  			}
  1325  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1326  				continue
  1327  			}
  1328  			break
  1329  		}
  1330  		// Generate the network query and send it to the peer
  1331  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
  1332  		for hash := range s.healer.codeTasks {
  1333  			delete(s.healer.codeTasks, hash)
  1334  
  1335  			hashes = append(hashes, hash)
  1336  			if len(hashes) >= maxCodeRequestCount {
  1337  				break
  1338  			}
  1339  		}
  1340  		req := &bytecodeHealRequest{
  1341  			peer:    idle,
  1342  			id:      reqid,
  1343  			deliver: success,
  1344  			revert:  fail,
  1345  			cancel:  cancel,
  1346  			stale:   make(chan struct{}),
  1347  			hashes:  hashes,
  1348  			task:    s.healer,
  1349  		}
  1350  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1351  			peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
  1352  			s.scheduleRevertBytecodeHealRequest(req)
  1353  		})
  1354  		s.bytecodeHealReqs[reqid] = req
  1355  		delete(s.bytecodeHealIdlers, idle)
  1356  
  1357  		s.pend.Add(1)
  1358  		gopool.Submit(func() {
  1359  			defer s.pend.Done()
  1360  
  1361  			// Attempt to send the remote request and revert if it fails
  1362  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1363  				log.Debug("Failed to request bytecode healers", "err", err)
  1364  				s.scheduleRevertBytecodeHealRequest(req)
  1365  			}
  1366  		})
  1367  	}
  1368  }
  1369  
  1370  // revertRequests locates all the currently pending reuqests from a particular
  1371  // peer and reverts them, rescheduling for others to fulfill.
  1372  func (s *Syncer) revertRequests(peer string) {
  1373  	// Gather the requests first, revertals need the lock too
  1374  	s.lock.Lock()
  1375  	var accountReqs []*accountRequest
  1376  	for _, req := range s.accountReqs {
  1377  		if req.peer == peer {
  1378  			accountReqs = append(accountReqs, req)
  1379  		}
  1380  	}
  1381  	var bytecodeReqs []*bytecodeRequest
  1382  	for _, req := range s.bytecodeReqs {
  1383  		if req.peer == peer {
  1384  			bytecodeReqs = append(bytecodeReqs, req)
  1385  		}
  1386  	}
  1387  	var storageReqs []*storageRequest
  1388  	for _, req := range s.storageReqs {
  1389  		if req.peer == peer {
  1390  			storageReqs = append(storageReqs, req)
  1391  		}
  1392  	}
  1393  	var trienodeHealReqs []*trienodeHealRequest
  1394  	for _, req := range s.trienodeHealReqs {
  1395  		if req.peer == peer {
  1396  			trienodeHealReqs = append(trienodeHealReqs, req)
  1397  		}
  1398  	}
  1399  	var bytecodeHealReqs []*bytecodeHealRequest
  1400  	for _, req := range s.bytecodeHealReqs {
  1401  		if req.peer == peer {
  1402  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1403  		}
  1404  	}
  1405  	s.lock.Unlock()
  1406  
  1407  	// Revert all the requests matching the peer
  1408  	for _, req := range accountReqs {
  1409  		s.revertAccountRequest(req)
  1410  	}
  1411  	for _, req := range bytecodeReqs {
  1412  		s.revertBytecodeRequest(req)
  1413  	}
  1414  	for _, req := range storageReqs {
  1415  		s.revertStorageRequest(req)
  1416  	}
  1417  	for _, req := range trienodeHealReqs {
  1418  		s.revertTrienodeHealRequest(req)
  1419  	}
  1420  	for _, req := range bytecodeHealReqs {
  1421  		s.revertBytecodeHealRequest(req)
  1422  	}
  1423  }
  1424  
  1425  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1426  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1427  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1428  	select {
  1429  	case req.revert <- req:
  1430  		// Sync event loop notified
  1431  	case <-req.cancel:
  1432  		// Sync cycle got cancelled
  1433  	case <-req.stale:
  1434  		// Request already reverted
  1435  	}
  1436  }
  1437  
  1438  // revertAccountRequest cleans up an account range request and returns all failed
  1439  // retrieval tasks to the scheduler for reassignment.
  1440  //
  1441  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1442  // On peer threads, use scheduleRevertAccountRequest.
  1443  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1444  	log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1445  	select {
  1446  	case <-req.stale:
  1447  		log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1448  		return
  1449  	default:
  1450  	}
  1451  	close(req.stale)
  1452  
  1453  	// Remove the request from the tracked set
  1454  	s.lock.Lock()
  1455  	delete(s.accountReqs, req.id)
  1456  	s.lock.Unlock()
  1457  
  1458  	// If there's a timeout timer still running, abort it and mark the account
  1459  	// task as not-pending, ready for resheduling
  1460  	req.timeout.Stop()
  1461  	if req.task.req == req {
  1462  		req.task.req = nil
  1463  	}
  1464  }
  1465  
  1466  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1467  // and return all failed retrieval tasks to the scheduler for reassignment.
  1468  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1469  	select {
  1470  	case req.revert <- req:
  1471  		// Sync event loop notified
  1472  	case <-req.cancel:
  1473  		// Sync cycle got cancelled
  1474  	case <-req.stale:
  1475  		// Request already reverted
  1476  	}
  1477  }
  1478  
  1479  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1480  // retrieval tasks to the scheduler for reassignment.
  1481  //
  1482  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1483  // On peer threads, use scheduleRevertBytecodeRequest.
  1484  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1485  	log.Debug("Reverting bytecode request", "peer", req.peer)
  1486  	select {
  1487  	case <-req.stale:
  1488  		log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1489  		return
  1490  	default:
  1491  	}
  1492  	close(req.stale)
  1493  
  1494  	// Remove the request from the tracked set
  1495  	s.lock.Lock()
  1496  	delete(s.bytecodeReqs, req.id)
  1497  	s.lock.Unlock()
  1498  
  1499  	// If there's a timeout timer still running, abort it and mark the code
  1500  	// retrievals as not-pending, ready for resheduling
  1501  	req.timeout.Stop()
  1502  	for _, hash := range req.hashes {
  1503  		req.task.codeTasks[hash] = struct{}{}
  1504  	}
  1505  }
  1506  
  1507  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1508  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1509  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1510  	select {
  1511  	case req.revert <- req:
  1512  		// Sync event loop notified
  1513  	case <-req.cancel:
  1514  		// Sync cycle got cancelled
  1515  	case <-req.stale:
  1516  		// Request already reverted
  1517  	}
  1518  }
  1519  
  1520  // revertStorageRequest cleans up a storage range request and returns all failed
  1521  // retrieval tasks to the scheduler for reassignment.
  1522  //
  1523  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1524  // On peer threads, use scheduleRevertStorageRequest.
  1525  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1526  	log.Debug("Reverting storage request", "peer", req.peer)
  1527  	select {
  1528  	case <-req.stale:
  1529  		log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1530  		return
  1531  	default:
  1532  	}
  1533  	close(req.stale)
  1534  
  1535  	// Remove the request from the tracked set
  1536  	s.lock.Lock()
  1537  	delete(s.storageReqs, req.id)
  1538  	s.lock.Unlock()
  1539  
  1540  	// If there's a timeout timer still running, abort it and mark the storage
  1541  	// task as not-pending, ready for resheduling
  1542  	req.timeout.Stop()
  1543  	if req.subTask != nil {
  1544  		req.subTask.req = nil
  1545  	} else {
  1546  		for i, account := range req.accounts {
  1547  			req.mainTask.stateTasks[account] = req.roots[i]
  1548  		}
  1549  	}
  1550  }
  1551  
  1552  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1553  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1554  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1555  	select {
  1556  	case req.revert <- req:
  1557  		// Sync event loop notified
  1558  	case <-req.cancel:
  1559  		// Sync cycle got cancelled
  1560  	case <-req.stale:
  1561  		// Request already reverted
  1562  	}
  1563  }
  1564  
  1565  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1566  // failed retrieval tasks to the scheduler for reassignment.
  1567  //
  1568  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1569  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1570  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1571  	log.Debug("Reverting trienode heal request", "peer", req.peer)
  1572  	select {
  1573  	case <-req.stale:
  1574  		log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1575  		return
  1576  	default:
  1577  	}
  1578  	close(req.stale)
  1579  
  1580  	// Remove the request from the tracked set
  1581  	s.lock.Lock()
  1582  	delete(s.trienodeHealReqs, req.id)
  1583  	s.lock.Unlock()
  1584  
  1585  	// If there's a timeout timer still running, abort it and mark the trie node
  1586  	// retrievals as not-pending, ready for resheduling
  1587  	req.timeout.Stop()
  1588  	for i, hash := range req.hashes {
  1589  		req.task.trieTasks[hash] = req.paths[i]
  1590  	}
  1591  }
  1592  
  1593  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1594  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1595  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1596  	select {
  1597  	case req.revert <- req:
  1598  		// Sync event loop notified
  1599  	case <-req.cancel:
  1600  		// Sync cycle got cancelled
  1601  	case <-req.stale:
  1602  		// Request already reverted
  1603  	}
  1604  }
  1605  
  1606  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1607  // failed retrieval tasks to the scheduler for reassignment.
  1608  //
  1609  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1610  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1611  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1612  	log.Debug("Reverting bytecode heal request", "peer", req.peer)
  1613  	select {
  1614  	case <-req.stale:
  1615  		log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1616  		return
  1617  	default:
  1618  	}
  1619  	close(req.stale)
  1620  
  1621  	// Remove the request from the tracked set
  1622  	s.lock.Lock()
  1623  	delete(s.bytecodeHealReqs, req.id)
  1624  	s.lock.Unlock()
  1625  
  1626  	// If there's a timeout timer still running, abort it and mark the code
  1627  	// retrievals as not-pending, ready for resheduling
  1628  	req.timeout.Stop()
  1629  	for _, hash := range req.hashes {
  1630  		req.task.codeTasks[hash] = struct{}{}
  1631  	}
  1632  }
  1633  
  1634  // processAccountResponse integrates an already validated account range response
  1635  // into the account tasks.
  1636  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1637  	// Switch the task from pending to filling
  1638  	res.task.req = nil
  1639  	res.task.res = res
  1640  
  1641  	// Ensure that the response doesn't overflow into the subsequent task
  1642  	last := res.task.Last.Big()
  1643  	for i, hash := range res.hashes {
  1644  		// Mark the range complete if the last is already included.
  1645  		// Keep iteration to delete the extra states if exists.
  1646  		cmp := hash.Big().Cmp(last)
  1647  		if cmp == 0 {
  1648  			res.cont = false
  1649  			continue
  1650  		}
  1651  		if cmp > 0 {
  1652  			// Chunk overflown, cut off excess
  1653  			res.hashes = res.hashes[:i]
  1654  			res.accounts = res.accounts[:i]
  1655  			res.cont = false // Mark range completed
  1656  			break
  1657  		}
  1658  	}
  1659  	// Iterate over all the accounts and assemble which ones need further sub-
  1660  	// filling before the entire account range can be persisted.
  1661  	res.task.needCode = make([]bool, len(res.accounts))
  1662  	res.task.needState = make([]bool, len(res.accounts))
  1663  	res.task.needHeal = make([]bool, len(res.accounts))
  1664  
  1665  	res.task.codeTasks = make(map[common.Hash]struct{})
  1666  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1667  
  1668  	resumed := make(map[common.Hash]struct{})
  1669  
  1670  	res.task.pend = 0
  1671  	for i, account := range res.accounts {
  1672  		// Check if the account is a contract with an unknown code
  1673  		if !bytes.Equal(account.CodeHash, emptyCode[:]) {
  1674  			if code := rawdb.ReadCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)); code == nil {
  1675  				res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{}
  1676  				res.task.needCode[i] = true
  1677  				res.task.pend++
  1678  			}
  1679  		}
  1680  		// Check if the account is a contract with an unknown storage trie
  1681  		if account.Root != emptyRoot {
  1682  			if node, err := s.db.Get(account.Root[:]); err != nil || node == nil {
  1683  				// If there was a previous large state retrieval in progress,
  1684  				// don't restart it from scratch. This happens if a sync cycle
  1685  				// is interrupted and resumed later. However, *do* update the
  1686  				// previous root hash.
  1687  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1688  					log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root)
  1689  					for _, subtask := range subtasks {
  1690  						subtask.root = account.Root
  1691  					}
  1692  					res.task.needHeal[i] = true
  1693  					resumed[res.hashes[i]] = struct{}{}
  1694  				} else {
  1695  					res.task.stateTasks[res.hashes[i]] = account.Root
  1696  				}
  1697  				res.task.needState[i] = true
  1698  				res.task.pend++
  1699  			}
  1700  		}
  1701  	}
  1702  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1703  	// some progress if a new peer gives us less accounts than an old one, but for
  1704  	// now we have to live with that.
  1705  	for hash := range res.task.SubTasks {
  1706  		if _, ok := resumed[hash]; !ok {
  1707  			log.Debug("Aborting suspended storage retrieval", "account", hash)
  1708  			delete(res.task.SubTasks, hash)
  1709  		}
  1710  	}
  1711  	// If the account range contained no contracts, or all have been fully filled
  1712  	// beforehand, short circuit storage filling and forward to the next task
  1713  	if res.task.pend == 0 {
  1714  		s.forwardAccountTask(res.task)
  1715  		return
  1716  	}
  1717  	// Some accounts are incomplete, leave as is for the storage and contract
  1718  	// task assigners to pick up and fill.
  1719  }
  1720  
  1721  // processBytecodeResponse integrates an already validated bytecode response
  1722  // into the account tasks.
  1723  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1724  	batch := s.db.NewBatch()
  1725  
  1726  	var (
  1727  		codes uint64
  1728  	)
  1729  	for i, hash := range res.hashes {
  1730  		code := res.codes[i]
  1731  
  1732  		// If the bytecode was not delivered, reschedule it
  1733  		if code == nil {
  1734  			res.task.codeTasks[hash] = struct{}{}
  1735  			continue
  1736  		}
  1737  		// Code was delivered, mark it not needed any more
  1738  		for j, account := range res.task.res.accounts {
  1739  			if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) {
  1740  				res.task.needCode[j] = false
  1741  				res.task.pend--
  1742  			}
  1743  		}
  1744  		// Push the bytecode into a database batch
  1745  		codes++
  1746  		rawdb.WriteCode(batch, hash, code)
  1747  	}
  1748  	bytes := common.StorageSize(batch.ValueSize())
  1749  	if err := batch.Write(); err != nil {
  1750  		log.Crit("Failed to persist bytecodes", "err", err)
  1751  	}
  1752  	s.bytecodeSynced += codes
  1753  	s.bytecodeBytes += bytes
  1754  
  1755  	log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1756  
  1757  	// If this delivery completed the last pending task, forward the account task
  1758  	// to the next chunk
  1759  	if res.task.pend == 0 {
  1760  		s.forwardAccountTask(res.task)
  1761  		return
  1762  	}
  1763  	// Some accounts are still incomplete, leave as is for the storage and contract
  1764  	// task assigners to pick up and fill.
  1765  }
  1766  
  1767  // processStorageResponse integrates an already validated storage response
  1768  // into the account tasks.
  1769  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1770  	// Switch the subtask from pending to idle
  1771  	if res.subTask != nil {
  1772  		res.subTask.req = nil
  1773  	}
  1774  	batch := ethdb.HookedBatch{
  1775  		Batch: s.db.NewBatch(),
  1776  		OnPut: func(key []byte, value []byte) {
  1777  			s.storageBytes += common.StorageSize(len(key) + len(value))
  1778  		},
  1779  	}
  1780  	var (
  1781  		slots           int
  1782  		oldStorageBytes = s.storageBytes
  1783  	)
  1784  	// Iterate over all the accounts and reconstruct their storage tries from the
  1785  	// delivered slots
  1786  	for i, account := range res.accounts {
  1787  		// If the account was not delivered, reschedule it
  1788  		if i >= len(res.hashes) {
  1789  			res.mainTask.stateTasks[account] = res.roots[i]
  1790  			continue
  1791  		}
  1792  		// State was delivered, if complete mark as not needed any more, otherwise
  1793  		// mark the account as needing healing
  1794  		for j, hash := range res.mainTask.res.hashes {
  1795  			if account != hash {
  1796  				continue
  1797  			}
  1798  			acc := res.mainTask.res.accounts[j]
  1799  
  1800  			// If the packet contains multiple contract storage slots, all
  1801  			// but the last are surely complete. The last contract may be
  1802  			// chunked, so check it's continuation flag.
  1803  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1804  				res.mainTask.needState[j] = false
  1805  				res.mainTask.pend--
  1806  			}
  1807  			// If the last contract was chunked, mark it as needing healing
  1808  			// to avoid writing it out to disk prematurely.
  1809  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1810  				res.mainTask.needHeal[j] = true
  1811  			}
  1812  			// If the last contract was chunked, we need to switch to large
  1813  			// contract handling mode
  1814  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1815  				// If we haven't yet started a large-contract retrieval, create
  1816  				// the subtasks for it within the main account task
  1817  				if tasks, ok := res.mainTask.SubTasks[account]; !ok {
  1818  					var (
  1819  						keys    = res.hashes[i]
  1820  						chunks  = uint64(storageConcurrency)
  1821  						lastKey common.Hash
  1822  					)
  1823  					if len(keys) > 0 {
  1824  						lastKey = keys[len(keys)-1]
  1825  					}
  1826  					// If the number of slots remaining is low, decrease the
  1827  					// number of chunks. Somewhere on the order of 10-15K slots
  1828  					// fit into a packet of 500KB. A key/slot pair is maximum 64
  1829  					// bytes, so pessimistically maxRequestSize/64 = 8K.
  1830  					//
  1831  					// Chunk so that at least 2 packets are needed to fill a task.
  1832  					if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil {
  1833  						if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks {
  1834  							chunks = n + 1
  1835  						}
  1836  						log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks)
  1837  					} else {
  1838  						log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks)
  1839  					}
  1840  					r := newHashRange(lastKey, chunks)
  1841  
  1842  					// Our first task is the one that was just filled by this response.
  1843  					batch := ethdb.HookedBatch{
  1844  						Batch: s.db.NewBatch(),
  1845  						OnPut: func(key []byte, value []byte) {
  1846  							s.storageBytes += common.StorageSize(len(key) + len(value))
  1847  						},
  1848  					}
  1849  					tasks = append(tasks, &storageTask{
  1850  						Next:     common.Hash{},
  1851  						Last:     r.End(),
  1852  						root:     acc.Root,
  1853  						genBatch: batch,
  1854  						genTrie:  trie.NewStackTrie(batch),
  1855  					})
  1856  					for r.Next() {
  1857  						batch := ethdb.HookedBatch{
  1858  							Batch: s.db.NewBatch(),
  1859  							OnPut: func(key []byte, value []byte) {
  1860  								s.storageBytes += common.StorageSize(len(key) + len(value))
  1861  							},
  1862  						}
  1863  						tasks = append(tasks, &storageTask{
  1864  							Next:     r.Start(),
  1865  							Last:     r.End(),
  1866  							root:     acc.Root,
  1867  							genBatch: batch,
  1868  							genTrie:  trie.NewStackTrie(batch),
  1869  						})
  1870  					}
  1871  					for _, task := range tasks {
  1872  						log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", task.Next, "last", task.Last)
  1873  					}
  1874  					res.mainTask.SubTasks[account] = tasks
  1875  
  1876  					// Since we've just created the sub-tasks, this response
  1877  					// is surely for the first one (zero origin)
  1878  					res.subTask = tasks[0]
  1879  				}
  1880  			}
  1881  			// If we're in large contract delivery mode, forward the subtask
  1882  			if res.subTask != nil {
  1883  				// Ensure the response doesn't overflow into the subsequent task
  1884  				last := res.subTask.Last.Big()
  1885  				// Find the first overflowing key. While at it, mark res as complete
  1886  				// if we find the range to include or pass the 'last'
  1887  				index := sort.Search(len(res.hashes[i]), func(k int) bool {
  1888  					cmp := res.hashes[i][k].Big().Cmp(last)
  1889  					if cmp >= 0 {
  1890  						res.cont = false
  1891  					}
  1892  					return cmp > 0
  1893  				})
  1894  				if index >= 0 {
  1895  					// cut off excess
  1896  					res.hashes[i] = res.hashes[i][:index]
  1897  					res.slots[i] = res.slots[i][:index]
  1898  				}
  1899  				// Forward the relevant storage chunk (even if created just now)
  1900  				if res.cont {
  1901  					res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1])
  1902  				} else {
  1903  					res.subTask.done = true
  1904  				}
  1905  			}
  1906  		}
  1907  		// Iterate over all the complete contracts, reconstruct the trie nodes and
  1908  		// push them to disk. If the contract is chunked, the trie nodes will be
  1909  		// reconstructed later.
  1910  		slots += len(res.hashes[i])
  1911  
  1912  		if i < len(res.hashes)-1 || res.subTask == nil {
  1913  			tr := trie.NewStackTrie(batch)
  1914  			for j := 0; j < len(res.hashes[i]); j++ {
  1915  				tr.Update(res.hashes[i][j][:], res.slots[i][j])
  1916  			}
  1917  			tr.Commit()
  1918  		}
  1919  		// Persist the received storage segements. These flat state maybe
  1920  		// outdated during the sync, but it can be fixed later during the
  1921  		// snapshot generation.
  1922  		for j := 0; j < len(res.hashes[i]); j++ {
  1923  			rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j])
  1924  
  1925  			// If we're storing large contracts, generate the trie nodes
  1926  			// on the fly to not trash the gluing points
  1927  			if i == len(res.hashes)-1 && res.subTask != nil {
  1928  				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
  1929  			}
  1930  		}
  1931  	}
  1932  	// Large contracts could have generated new trie nodes, flush them to disk
  1933  	if res.subTask != nil {
  1934  		if res.subTask.done {
  1935  			if root, err := res.subTask.genTrie.Commit(); err != nil {
  1936  				log.Error("Failed to commit stack slots", "err", err)
  1937  			} else if root == res.subTask.root {
  1938  				// If the chunk's root is an overflown but full delivery, clear the heal request
  1939  				for i, account := range res.mainTask.res.hashes {
  1940  					if account == res.accounts[len(res.accounts)-1] {
  1941  						res.mainTask.needHeal[i] = false
  1942  					}
  1943  				}
  1944  			}
  1945  		}
  1946  		if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || res.subTask.done {
  1947  			if err := res.subTask.genBatch.Write(); err != nil {
  1948  				log.Error("Failed to persist stack slots", "err", err)
  1949  			}
  1950  			res.subTask.genBatch.Reset()
  1951  		}
  1952  	}
  1953  	// Flush anything written just now and update the stats
  1954  	if err := batch.Write(); err != nil {
  1955  		log.Crit("Failed to persist storage slots", "err", err)
  1956  	}
  1957  	s.storageSynced += uint64(slots)
  1958  
  1959  	log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes)
  1960  
  1961  	// If this delivery completed the last pending task, forward the account task
  1962  	// to the next chunk
  1963  	if res.mainTask.pend == 0 {
  1964  		s.forwardAccountTask(res.mainTask)
  1965  		return
  1966  	}
  1967  	// Some accounts are still incomplete, leave as is for the storage and contract
  1968  	// task assigners to pick up and fill.
  1969  }
  1970  
  1971  // processTrienodeHealResponse integrates an already validated trienode response
  1972  // into the healer tasks.
  1973  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  1974  	for i, hash := range res.hashes {
  1975  		node := res.nodes[i]
  1976  
  1977  		// If the trie node was not delivered, reschedule it
  1978  		if node == nil {
  1979  			res.task.trieTasks[hash] = res.paths[i]
  1980  			continue
  1981  		}
  1982  		// Push the trie node into the state syncer
  1983  		s.trienodeHealSynced++
  1984  		s.trienodeHealBytes += common.StorageSize(len(node))
  1985  
  1986  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  1987  		switch err {
  1988  		case nil:
  1989  		case trie.ErrAlreadyProcessed:
  1990  			s.trienodeHealDups++
  1991  		case trie.ErrNotRequested:
  1992  			s.trienodeHealNops++
  1993  		default:
  1994  			log.Error("Invalid trienode processed", "hash", hash, "err", err)
  1995  		}
  1996  	}
  1997  	batch := s.db.NewBatch()
  1998  	if err := s.healer.scheduler.Commit(batch); err != nil {
  1999  		log.Error("Failed to commit healing data", "err", err)
  2000  	}
  2001  	if err := batch.Write(); err != nil {
  2002  		log.Crit("Failed to persist healing data", "err", err)
  2003  	}
  2004  	log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  2005  }
  2006  
  2007  // processBytecodeHealResponse integrates an already validated bytecode response
  2008  // into the healer tasks.
  2009  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  2010  	for i, hash := range res.hashes {
  2011  		node := res.codes[i]
  2012  
  2013  		// If the trie node was not delivered, reschedule it
  2014  		if node == nil {
  2015  			res.task.codeTasks[hash] = struct{}{}
  2016  			continue
  2017  		}
  2018  		// Push the trie node into the state syncer
  2019  		s.bytecodeHealSynced++
  2020  		s.bytecodeHealBytes += common.StorageSize(len(node))
  2021  
  2022  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  2023  		switch err {
  2024  		case nil:
  2025  		case trie.ErrAlreadyProcessed:
  2026  			s.bytecodeHealDups++
  2027  		case trie.ErrNotRequested:
  2028  			s.bytecodeHealNops++
  2029  		default:
  2030  			log.Error("Invalid bytecode processed", "hash", hash, "err", err)
  2031  		}
  2032  	}
  2033  	batch := s.db.NewBatch()
  2034  	if err := s.healer.scheduler.Commit(batch); err != nil {
  2035  		log.Error("Failed to commit healing data", "err", err)
  2036  	}
  2037  	if err := batch.Write(); err != nil {
  2038  		log.Crit("Failed to persist healing data", "err", err)
  2039  	}
  2040  	log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  2041  }
  2042  
  2043  // forwardAccountTask takes a filled account task and persists anything available
  2044  // into the database, after which it forwards the next account marker so that the
  2045  // task's next chunk may be filled.
  2046  func (s *Syncer) forwardAccountTask(task *accountTask) {
  2047  	// Remove any pending delivery
  2048  	res := task.res
  2049  	if res == nil {
  2050  		return // nothing to forward
  2051  	}
  2052  	task.res = nil
  2053  
  2054  	// Persist the received account segements. These flat state maybe
  2055  	// outdated during the sync, but it can be fixed later during the
  2056  	// snapshot generation.
  2057  	oldAccountBytes := s.accountBytes
  2058  
  2059  	batch := ethdb.HookedBatch{
  2060  		Batch: s.db.NewBatch(),
  2061  		OnPut: func(key []byte, value []byte) {
  2062  			s.accountBytes += common.StorageSize(len(key) + len(value))
  2063  		},
  2064  	}
  2065  	for i, hash := range res.hashes {
  2066  		if task.needCode[i] || task.needState[i] {
  2067  			break
  2068  		}
  2069  		slim := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash)
  2070  		rawdb.WriteAccountSnapshot(batch, hash, slim)
  2071  
  2072  		// If the task is complete, drop it into the stack trie to generate
  2073  		// account trie nodes for it
  2074  		if !task.needHeal[i] {
  2075  			full, err := snapshot.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted
  2076  			if err != nil {
  2077  				panic(err) // Really shouldn't ever happen
  2078  			}
  2079  			task.genTrie.Update(hash[:], full)
  2080  		}
  2081  	}
  2082  	// Flush anything written just now and update the stats
  2083  	if err := batch.Write(); err != nil {
  2084  		log.Crit("Failed to persist accounts", "err", err)
  2085  	}
  2086  	s.accountSynced += uint64(len(res.accounts))
  2087  
  2088  	// Task filling persisted, push it the chunk marker forward to the first
  2089  	// account still missing data.
  2090  	for i, hash := range res.hashes {
  2091  		if task.needCode[i] || task.needState[i] {
  2092  			return
  2093  		}
  2094  		task.Next = incHash(hash)
  2095  	}
  2096  	// All accounts marked as complete, track if the entire task is done
  2097  	task.done = !res.cont
  2098  
  2099  	// Stack trie could have generated trie nodes, push them to disk (we need to
  2100  	// flush after finalizing task.done. It's fine even if we crash and lose this
  2101  	// write as it will only cause more data to be downloaded during heal.
  2102  	if task.done {
  2103  		if _, err := task.genTrie.Commit(); err != nil {
  2104  			log.Error("Failed to commit stack account", "err", err)
  2105  		}
  2106  	}
  2107  	if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
  2108  		if err := task.genBatch.Write(); err != nil {
  2109  			log.Error("Failed to persist stack account", "err", err)
  2110  		}
  2111  		task.genBatch.Reset()
  2112  	}
  2113  	log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes)
  2114  }
  2115  
  2116  // OnAccounts is a callback method to invoke when a range of accounts are
  2117  // received from a remote peer.
  2118  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  2119  	size := common.StorageSize(len(hashes) * common.HashLength)
  2120  	for _, account := range accounts {
  2121  		size += common.StorageSize(len(account))
  2122  	}
  2123  	for _, node := range proof {
  2124  		size += common.StorageSize(len(node))
  2125  	}
  2126  	logger := peer.Log().New("reqid", id)
  2127  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  2128  
  2129  	// Whether or not the response is valid, we can mark the peer as idle and
  2130  	// notify the scheduler to assign a new task. If the response is invalid,
  2131  	// we'll drop the peer in a bit.
  2132  	s.lock.Lock()
  2133  	if _, ok := s.peers[peer.ID()]; ok {
  2134  		s.accountIdlers[peer.ID()] = struct{}{}
  2135  	}
  2136  	select {
  2137  	case s.update <- struct{}{}:
  2138  	default:
  2139  	}
  2140  	// Ensure the response is for a valid request
  2141  	req, ok := s.accountReqs[id]
  2142  	if !ok {
  2143  		// Request stale, perhaps the peer timed out but came through in the end
  2144  		logger.Warn("Unexpected account range packet")
  2145  		s.lock.Unlock()
  2146  		return nil
  2147  	}
  2148  	delete(s.accountReqs, id)
  2149  
  2150  	// Clean up the request timeout timer, we'll see how to proceed further based
  2151  	// on the actual delivered content
  2152  	if !req.timeout.Stop() {
  2153  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2154  		s.lock.Unlock()
  2155  		return nil
  2156  	}
  2157  	// Response is valid, but check if peer is signalling that it does not have
  2158  	// the requested data. For account range queries that means the state being
  2159  	// retrieved was either already pruned remotely, or the peer is not yet
  2160  	// synced to our head.
  2161  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2162  		logger.Debug("Peer rejected account range request", "root", s.root)
  2163  		s.statelessPeers[peer.ID()] = struct{}{}
  2164  		s.lock.Unlock()
  2165  
  2166  		// Signal this request as failed, and ready for rescheduling
  2167  		s.scheduleRevertAccountRequest(req)
  2168  		return nil
  2169  	}
  2170  	root := s.root
  2171  	s.lock.Unlock()
  2172  
  2173  	// Reconstruct a partial trie from the response and verify it
  2174  	keys := make([][]byte, len(hashes))
  2175  	for i, key := range hashes {
  2176  		keys[i] = common.CopyBytes(key[:])
  2177  	}
  2178  	nodes := make(light.NodeList, len(proof))
  2179  	for i, node := range proof {
  2180  		nodes[i] = node
  2181  	}
  2182  	proofdb := nodes.NodeSet()
  2183  
  2184  	var end []byte
  2185  	if len(keys) > 0 {
  2186  		end = keys[len(keys)-1]
  2187  	}
  2188  	cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2189  	if err != nil {
  2190  		logger.Warn("Account range failed proof", "err", err)
  2191  		// Signal this request as failed, and ready for rescheduling
  2192  		s.scheduleRevertAccountRequest(req)
  2193  		return err
  2194  	}
  2195  	accs := make([]*state.Account, len(accounts))
  2196  	for i, account := range accounts {
  2197  		acc := new(state.Account)
  2198  		if err := rlp.DecodeBytes(account, acc); err != nil {
  2199  			panic(err) // We created these blobs, we must be able to decode them
  2200  		}
  2201  		accs[i] = acc
  2202  	}
  2203  	response := &accountResponse{
  2204  		task:     req.task,
  2205  		hashes:   hashes,
  2206  		accounts: accs,
  2207  		cont:     cont,
  2208  	}
  2209  	select {
  2210  	case req.deliver <- response:
  2211  	case <-req.cancel:
  2212  	case <-req.stale:
  2213  	}
  2214  	return nil
  2215  }
  2216  
  2217  // OnByteCodes is a callback method to invoke when a batch of contract
  2218  // bytes codes are received from a remote peer.
  2219  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2220  	s.lock.RLock()
  2221  	syncing := !s.snapped
  2222  	s.lock.RUnlock()
  2223  
  2224  	if syncing {
  2225  		return s.onByteCodes(peer, id, bytecodes)
  2226  	}
  2227  	return s.onHealByteCodes(peer, id, bytecodes)
  2228  }
  2229  
  2230  // onByteCodes is a callback method to invoke when a batch of contract
  2231  // bytes codes are received from a remote peer in the syncing phase.
  2232  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2233  	var size common.StorageSize
  2234  	for _, code := range bytecodes {
  2235  		size += common.StorageSize(len(code))
  2236  	}
  2237  	logger := peer.Log().New("reqid", id)
  2238  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2239  
  2240  	// Whether or not the response is valid, we can mark the peer as idle and
  2241  	// notify the scheduler to assign a new task. If the response is invalid,
  2242  	// we'll drop the peer in a bit.
  2243  	s.lock.Lock()
  2244  	if _, ok := s.peers[peer.ID()]; ok {
  2245  		s.bytecodeIdlers[peer.ID()] = struct{}{}
  2246  	}
  2247  	select {
  2248  	case s.update <- struct{}{}:
  2249  	default:
  2250  	}
  2251  	// Ensure the response is for a valid request
  2252  	req, ok := s.bytecodeReqs[id]
  2253  	if !ok {
  2254  		// Request stale, perhaps the peer timed out but came through in the end
  2255  		logger.Warn("Unexpected bytecode packet")
  2256  		s.lock.Unlock()
  2257  		return nil
  2258  	}
  2259  	delete(s.bytecodeReqs, id)
  2260  
  2261  	// Clean up the request timeout timer, we'll see how to proceed further based
  2262  	// on the actual delivered content
  2263  	if !req.timeout.Stop() {
  2264  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2265  		s.lock.Unlock()
  2266  		return nil
  2267  	}
  2268  
  2269  	// Response is valid, but check if peer is signalling that it does not have
  2270  	// the requested data. For bytecode range queries that means the peer is not
  2271  	// yet synced.
  2272  	if len(bytecodes) == 0 {
  2273  		logger.Debug("Peer rejected bytecode request")
  2274  		s.statelessPeers[peer.ID()] = struct{}{}
  2275  		s.lock.Unlock()
  2276  
  2277  		// Signal this request as failed, and ready for rescheduling
  2278  		s.scheduleRevertBytecodeRequest(req)
  2279  		return nil
  2280  	}
  2281  	s.lock.Unlock()
  2282  
  2283  	// Cross reference the requested bytecodes with the response to find gaps
  2284  	// that the serving node is missing
  2285  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2286  	hash := make([]byte, 32)
  2287  
  2288  	codes := make([][]byte, len(req.hashes))
  2289  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2290  		// Find the next hash that we've been served, leaving misses with nils
  2291  		hasher.Reset()
  2292  		hasher.Write(bytecodes[i])
  2293  		hasher.Read(hash)
  2294  
  2295  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2296  			j++
  2297  		}
  2298  		if j < len(req.hashes) {
  2299  			codes[j] = bytecodes[i]
  2300  			j++
  2301  			continue
  2302  		}
  2303  		// We've either ran out of hashes, or got unrequested data
  2304  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2305  		// Signal this request as failed, and ready for rescheduling
  2306  		s.scheduleRevertBytecodeRequest(req)
  2307  		return errors.New("unexpected bytecode")
  2308  	}
  2309  	// Response validated, send it to the scheduler for filling
  2310  	response := &bytecodeResponse{
  2311  		task:   req.task,
  2312  		hashes: req.hashes,
  2313  		codes:  codes,
  2314  	}
  2315  	select {
  2316  	case req.deliver <- response:
  2317  	case <-req.cancel:
  2318  	case <-req.stale:
  2319  	}
  2320  	return nil
  2321  }
  2322  
  2323  // OnStorage is a callback method to invoke when ranges of storage slots
  2324  // are received from a remote peer.
  2325  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2326  	// Gather some trace stats to aid in debugging issues
  2327  	var (
  2328  		hashCount int
  2329  		slotCount int
  2330  		size      common.StorageSize
  2331  	)
  2332  	for _, hashset := range hashes {
  2333  		size += common.StorageSize(common.HashLength * len(hashset))
  2334  		hashCount += len(hashset)
  2335  	}
  2336  	for _, slotset := range slots {
  2337  		for _, slot := range slotset {
  2338  			size += common.StorageSize(len(slot))
  2339  		}
  2340  		slotCount += len(slotset)
  2341  	}
  2342  	for _, node := range proof {
  2343  		size += common.StorageSize(len(node))
  2344  	}
  2345  	logger := peer.Log().New("reqid", id)
  2346  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2347  
  2348  	// Whether or not the response is valid, we can mark the peer as idle and
  2349  	// notify the scheduler to assign a new task. If the response is invalid,
  2350  	// we'll drop the peer in a bit.
  2351  	s.lock.Lock()
  2352  	if _, ok := s.peers[peer.ID()]; ok {
  2353  		s.storageIdlers[peer.ID()] = struct{}{}
  2354  	}
  2355  	select {
  2356  	case s.update <- struct{}{}:
  2357  	default:
  2358  	}
  2359  	// Ensure the response is for a valid request
  2360  	req, ok := s.storageReqs[id]
  2361  	if !ok {
  2362  		// Request stale, perhaps the peer timed out but came through in the end
  2363  		logger.Warn("Unexpected storage ranges packet")
  2364  		s.lock.Unlock()
  2365  		return nil
  2366  	}
  2367  	delete(s.storageReqs, id)
  2368  
  2369  	// Clean up the request timeout timer, we'll see how to proceed further based
  2370  	// on the actual delivered content
  2371  	if !req.timeout.Stop() {
  2372  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2373  		s.lock.Unlock()
  2374  		return nil
  2375  	}
  2376  
  2377  	// Reject the response if the hash sets and slot sets don't match, or if the
  2378  	// peer sent more data than requested.
  2379  	if len(hashes) != len(slots) {
  2380  		s.lock.Unlock()
  2381  		s.scheduleRevertStorageRequest(req) // reschedule request
  2382  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2383  		return errors.New("hash and slot set size mismatch")
  2384  	}
  2385  	if len(hashes) > len(req.accounts) {
  2386  		s.lock.Unlock()
  2387  		s.scheduleRevertStorageRequest(req) // reschedule request
  2388  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2389  		return errors.New("hash set larger than requested")
  2390  	}
  2391  	// Response is valid, but check if peer is signalling that it does not have
  2392  	// the requested data. For storage range queries that means the state being
  2393  	// retrieved was either already pruned remotely, or the peer is not yet
  2394  	// synced to our head.
  2395  	if len(hashes) == 0 {
  2396  		logger.Debug("Peer rejected storage request")
  2397  		s.statelessPeers[peer.ID()] = struct{}{}
  2398  		s.lock.Unlock()
  2399  		s.scheduleRevertStorageRequest(req) // reschedule request
  2400  		return nil
  2401  	}
  2402  	s.lock.Unlock()
  2403  
  2404  	// Reconstruct the partial tries from the response and verify them
  2405  	var cont bool
  2406  
  2407  	for i := 0; i < len(hashes); i++ {
  2408  		// Convert the keys and proofs into an internal format
  2409  		keys := make([][]byte, len(hashes[i]))
  2410  		for j, key := range hashes[i] {
  2411  			keys[j] = common.CopyBytes(key[:])
  2412  		}
  2413  		nodes := make(light.NodeList, 0, len(proof))
  2414  		if i == len(hashes)-1 {
  2415  			for _, node := range proof {
  2416  				nodes = append(nodes, node)
  2417  			}
  2418  		}
  2419  		var err error
  2420  		if len(nodes) == 0 {
  2421  			// No proof has been attached, the response must cover the entire key
  2422  			// space and hash to the origin root.
  2423  			_, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2424  			if err != nil {
  2425  				s.scheduleRevertStorageRequest(req) // reschedule request
  2426  				logger.Warn("Storage slots failed proof", "err", err)
  2427  				return err
  2428  			}
  2429  		} else {
  2430  			// A proof was attached, the response is only partial, check that the
  2431  			// returned data is indeed part of the storage trie
  2432  			proofdb := nodes.NodeSet()
  2433  
  2434  			var end []byte
  2435  			if len(keys) > 0 {
  2436  				end = keys[len(keys)-1]
  2437  			}
  2438  			cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2439  			if err != nil {
  2440  				s.scheduleRevertStorageRequest(req) // reschedule request
  2441  				logger.Warn("Storage range failed proof", "err", err)
  2442  				return err
  2443  			}
  2444  		}
  2445  	}
  2446  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2447  	response := &storageResponse{
  2448  		mainTask: req.mainTask,
  2449  		subTask:  req.subTask,
  2450  		accounts: req.accounts,
  2451  		roots:    req.roots,
  2452  		hashes:   hashes,
  2453  		slots:    slots,
  2454  		cont:     cont,
  2455  	}
  2456  	select {
  2457  	case req.deliver <- response:
  2458  	case <-req.cancel:
  2459  	case <-req.stale:
  2460  	}
  2461  	return nil
  2462  }
  2463  
  2464  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2465  // are received from a remote peer.
  2466  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2467  	var size common.StorageSize
  2468  	for _, node := range trienodes {
  2469  		size += common.StorageSize(len(node))
  2470  	}
  2471  	logger := peer.Log().New("reqid", id)
  2472  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2473  
  2474  	// Whether or not the response is valid, we can mark the peer as idle and
  2475  	// notify the scheduler to assign a new task. If the response is invalid,
  2476  	// we'll drop the peer in a bit.
  2477  	s.lock.Lock()
  2478  	if _, ok := s.peers[peer.ID()]; ok {
  2479  		s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2480  	}
  2481  	select {
  2482  	case s.update <- struct{}{}:
  2483  	default:
  2484  	}
  2485  	// Ensure the response is for a valid request
  2486  	req, ok := s.trienodeHealReqs[id]
  2487  	if !ok {
  2488  		// Request stale, perhaps the peer timed out but came through in the end
  2489  		logger.Warn("Unexpected trienode heal packet")
  2490  		s.lock.Unlock()
  2491  		return nil
  2492  	}
  2493  	delete(s.trienodeHealReqs, id)
  2494  
  2495  	// Clean up the request timeout timer, we'll see how to proceed further based
  2496  	// on the actual delivered content
  2497  	if !req.timeout.Stop() {
  2498  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2499  		s.lock.Unlock()
  2500  		return nil
  2501  	}
  2502  
  2503  	// Response is valid, but check if peer is signalling that it does not have
  2504  	// the requested data. For bytecode range queries that means the peer is not
  2505  	// yet synced.
  2506  	if len(trienodes) == 0 {
  2507  		logger.Debug("Peer rejected trienode heal request")
  2508  		s.statelessPeers[peer.ID()] = struct{}{}
  2509  		s.lock.Unlock()
  2510  
  2511  		// Signal this request as failed, and ready for rescheduling
  2512  		s.scheduleRevertTrienodeHealRequest(req)
  2513  		return nil
  2514  	}
  2515  	s.lock.Unlock()
  2516  
  2517  	// Cross reference the requested trienodes with the response to find gaps
  2518  	// that the serving node is missing
  2519  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2520  	hash := make([]byte, 32)
  2521  
  2522  	nodes := make([][]byte, len(req.hashes))
  2523  	for i, j := 0, 0; i < len(trienodes); i++ {
  2524  		// Find the next hash that we've been served, leaving misses with nils
  2525  		hasher.Reset()
  2526  		hasher.Write(trienodes[i])
  2527  		hasher.Read(hash)
  2528  
  2529  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2530  			j++
  2531  		}
  2532  		if j < len(req.hashes) {
  2533  			nodes[j] = trienodes[i]
  2534  			j++
  2535  			continue
  2536  		}
  2537  		// We've either ran out of hashes, or got unrequested data
  2538  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2539  		// Signal this request as failed, and ready for rescheduling
  2540  		s.scheduleRevertTrienodeHealRequest(req)
  2541  		return errors.New("unexpected healing trienode")
  2542  	}
  2543  	// Response validated, send it to the scheduler for filling
  2544  	response := &trienodeHealResponse{
  2545  		task:   req.task,
  2546  		hashes: req.hashes,
  2547  		paths:  req.paths,
  2548  		nodes:  nodes,
  2549  	}
  2550  	select {
  2551  	case req.deliver <- response:
  2552  	case <-req.cancel:
  2553  	case <-req.stale:
  2554  	}
  2555  	return nil
  2556  }
  2557  
  2558  // onHealByteCodes is a callback method to invoke when a batch of contract
  2559  // bytes codes are received from a remote peer in the healing phase.
  2560  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2561  	var size common.StorageSize
  2562  	for _, code := range bytecodes {
  2563  		size += common.StorageSize(len(code))
  2564  	}
  2565  	logger := peer.Log().New("reqid", id)
  2566  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2567  
  2568  	// Whether or not the response is valid, we can mark the peer as idle and
  2569  	// notify the scheduler to assign a new task. If the response is invalid,
  2570  	// we'll drop the peer in a bit.
  2571  	s.lock.Lock()
  2572  	if _, ok := s.peers[peer.ID()]; ok {
  2573  		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2574  	}
  2575  	select {
  2576  	case s.update <- struct{}{}:
  2577  	default:
  2578  	}
  2579  	// Ensure the response is for a valid request
  2580  	req, ok := s.bytecodeHealReqs[id]
  2581  	if !ok {
  2582  		// Request stale, perhaps the peer timed out but came through in the end
  2583  		logger.Warn("Unexpected bytecode heal packet")
  2584  		s.lock.Unlock()
  2585  		return nil
  2586  	}
  2587  	delete(s.bytecodeHealReqs, id)
  2588  
  2589  	// Clean up the request timeout timer, we'll see how to proceed further based
  2590  	// on the actual delivered content
  2591  	if !req.timeout.Stop() {
  2592  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2593  		s.lock.Unlock()
  2594  		return nil
  2595  	}
  2596  
  2597  	// Response is valid, but check if peer is signalling that it does not have
  2598  	// the requested data. For bytecode range queries that means the peer is not
  2599  	// yet synced.
  2600  	if len(bytecodes) == 0 {
  2601  		logger.Debug("Peer rejected bytecode heal request")
  2602  		s.statelessPeers[peer.ID()] = struct{}{}
  2603  		s.lock.Unlock()
  2604  
  2605  		// Signal this request as failed, and ready for rescheduling
  2606  		s.scheduleRevertBytecodeHealRequest(req)
  2607  		return nil
  2608  	}
  2609  	s.lock.Unlock()
  2610  
  2611  	// Cross reference the requested bytecodes with the response to find gaps
  2612  	// that the serving node is missing
  2613  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2614  	hash := make([]byte, 32)
  2615  
  2616  	codes := make([][]byte, len(req.hashes))
  2617  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2618  		// Find the next hash that we've been served, leaving misses with nils
  2619  		hasher.Reset()
  2620  		hasher.Write(bytecodes[i])
  2621  		hasher.Read(hash)
  2622  
  2623  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2624  			j++
  2625  		}
  2626  		if j < len(req.hashes) {
  2627  			codes[j] = bytecodes[i]
  2628  			j++
  2629  			continue
  2630  		}
  2631  		// We've either ran out of hashes, or got unrequested data
  2632  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2633  		// Signal this request as failed, and ready for rescheduling
  2634  		s.scheduleRevertBytecodeHealRequest(req)
  2635  		return errors.New("unexpected healing bytecode")
  2636  	}
  2637  	// Response validated, send it to the scheduler for filling
  2638  	response := &bytecodeHealResponse{
  2639  		task:   req.task,
  2640  		hashes: req.hashes,
  2641  		codes:  codes,
  2642  	}
  2643  	select {
  2644  	case req.deliver <- response:
  2645  	case <-req.cancel:
  2646  	case <-req.stale:
  2647  	}
  2648  	return nil
  2649  }
  2650  
  2651  // onHealState is a callback method to invoke when a flat state(account
  2652  // or storage slot) is downloded during the healing stage. The flat states
  2653  // can be persisted blindly and can be fixed later in the generation stage.
  2654  // Note it's not concurrent safe, please handle the concurrent issue outside.
  2655  func (s *Syncer) onHealState(paths [][]byte, value []byte) error {
  2656  	if len(paths) == 1 {
  2657  		var account state.Account
  2658  		if err := rlp.DecodeBytes(value, &account); err != nil {
  2659  			return nil
  2660  		}
  2661  		blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash)
  2662  		rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob)
  2663  		s.accountHealed += 1
  2664  		s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob))
  2665  	}
  2666  	if len(paths) == 2 {
  2667  		rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value)
  2668  		s.storageHealed += 1
  2669  		s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value))
  2670  	}
  2671  	if s.stateWriter.ValueSize() > ethdb.IdealBatchSize {
  2672  		s.stateWriter.Write() // It's fine to ignore the error here
  2673  		s.stateWriter.Reset()
  2674  	}
  2675  	return nil
  2676  }
  2677  
  2678  // hashSpace is the total size of the 256 bit hash space for accounts.
  2679  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2680  
  2681  // report calculates various status reports and provides it to the user.
  2682  func (s *Syncer) report(force bool) {
  2683  	if len(s.tasks) > 0 {
  2684  		s.reportSyncProgress(force)
  2685  		return
  2686  	}
  2687  	s.reportHealProgress(force)
  2688  }
  2689  
  2690  // reportSyncProgress calculates various status reports and provides it to the user.
  2691  func (s *Syncer) reportSyncProgress(force bool) {
  2692  	// Don't report all the events, just occasionally
  2693  	if !force && time.Since(s.logTime) < 8*time.Second {
  2694  		return
  2695  	}
  2696  	// Don't report anything until we have a meaningful progress
  2697  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2698  	if synced == 0 {
  2699  		return
  2700  	}
  2701  	accountGaps := new(big.Int)
  2702  	for _, task := range s.tasks {
  2703  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2704  	}
  2705  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2706  	if accountFills.BitLen() == 0 {
  2707  		return
  2708  	}
  2709  	s.logTime = time.Now()
  2710  	estBytes := float64(new(big.Int).Div(
  2711  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2712  		accountFills,
  2713  	).Uint64())
  2714  
  2715  	elapsed := time.Since(s.startTime)
  2716  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2717  
  2718  	// Create a mega progress report
  2719  	var (
  2720  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2721  		accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountSynced), s.accountBytes.TerminalString())
  2722  		storage  = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageSynced), s.storageBytes.TerminalString())
  2723  		bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeSynced), s.bytecodeBytes.TerminalString())
  2724  	)
  2725  	log.Info("State sync in progress", "synced", progress, "state", synced,
  2726  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2727  }
  2728  
  2729  // reportHealProgress calculates various status reports and provides it to the user.
  2730  func (s *Syncer) reportHealProgress(force bool) {
  2731  	// Don't report all the events, just occasionally
  2732  	if !force && time.Since(s.logTime) < 8*time.Second {
  2733  		return
  2734  	}
  2735  	s.logTime = time.Now()
  2736  
  2737  	// Create a mega progress report
  2738  	var (
  2739  		trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString())
  2740  		bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString())
  2741  		accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString())
  2742  		storage  = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString())
  2743  	)
  2744  	log.Info("State heal in progress", "accounts", accounts, "slots", storage,
  2745  		"codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending())
  2746  }
  2747  
  2748  // estimateRemainingSlots tries to determine roughly how many slots are left in
  2749  // a contract storage, based on the number of keys and the last hash. This method
  2750  // assumes that the hashes are lexicographically ordered and evenly distributed.
  2751  func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
  2752  	if last == (common.Hash{}) {
  2753  		return 0, errors.New("last hash empty")
  2754  	}
  2755  	space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes)))
  2756  	space.Div(space, last.Big())
  2757  	if !space.IsUint64() {
  2758  		// Gigantic address space probably due to too few or malicious slots
  2759  		return 0, errors.New("too few slots for estimation")
  2760  	}
  2761  	return space.Uint64() - uint64(hashes), nil
  2762  }