github.com/juliankolbe/go-ethereum@v1.9.992/eth/protocols/snap/sync.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snap
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"math/big"
    25  	"math/rand"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/juliankolbe/go-ethereum/common"
    30  	"github.com/juliankolbe/go-ethereum/core/rawdb"
    31  	"github.com/juliankolbe/go-ethereum/core/state"
    32  	"github.com/juliankolbe/go-ethereum/crypto"
    33  	"github.com/juliankolbe/go-ethereum/ethdb"
    34  	"github.com/juliankolbe/go-ethereum/event"
    35  	"github.com/juliankolbe/go-ethereum/light"
    36  	"github.com/juliankolbe/go-ethereum/log"
    37  	"github.com/juliankolbe/go-ethereum/rlp"
    38  	"github.com/juliankolbe/go-ethereum/trie"
    39  	"golang.org/x/crypto/sha3"
    40  )
    41  
    42  var (
    43  	// emptyRoot is the known root hash of an empty trie.
    44  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    45  
    46  	// emptyCode is the known hash of the empty EVM bytecode.
    47  	emptyCode = crypto.Keccak256Hash(nil)
    48  )
    49  
    50  const (
    51  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    52  	maxRequestSize = 512 * 1024
    53  
    54  	// maxStorageSetRequestCountis th maximum number of contracts to request the
    55  	// storage of in a single query. If this number is too low, we're not filling
    56  	// responses fully and waste round trip times. If it's too high, we're capping
    57  	// responses and waste bandwidth.
    58  	maxStorageSetRequestCount = maxRequestSize / 1024
    59  
    60  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    61  	// single query. If this number is too low, we're not filling responses fully
    62  	// and waste round trip times. If it's too high, we're capping responses and
    63  	// waste bandwidth.
    64  	//
    65  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    66  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    67  	// come close to that, requesting 4x should be a good approximation.
    68  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    69  
    70  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    71  	// a single query. If this number is too low, we're not filling responses fully
    72  	// and waste round trip times. If it's too high, we're capping responses and
    73  	// waste bandwidth.
    74  	maxTrieRequestCount = 512
    75  
    76  	// accountConcurrency is the number of chunks to split the account trie into
    77  	// to allow concurrent retrievals.
    78  	accountConcurrency = 16
    79  
    80  	// storageConcurrency is the number of chunks to split the a large contract
    81  	// storage trie into to allow concurrent retrievals.
    82  	storageConcurrency = 16
    83  )
    84  
    85  var (
    86  	// requestTimeout is the maximum time a peer is allowed to spend on serving
    87  	// a single network request.
    88  	requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
    89  )
    90  
    91  // ErrCancelled is returned from snap syncing if the operation was prematurely
    92  // terminated.
    93  var ErrCancelled = errors.New("sync cancelled")
    94  
    95  // accountRequest tracks a pending account range request to ensure responses are
    96  // to actual requests and to validate any security constraints.
    97  //
    98  // Concurrency note: account requests and responses are handled concurrently from
    99  // the main runloop to allow Merkle proof verifications on the peer's thread and
   100  // to drop on invalid response. The request struct must contain all the data to
   101  // construct the response without accessing runloop internals (i.e. task). That
   102  // is only included to allow the runloop to match a response to the task being
   103  // synced without having yet another set of maps.
   104  type accountRequest struct {
   105  	peer string // Peer to which this request is assigned
   106  	id   uint64 // Request ID of this request
   107  
   108  	cancel  chan struct{} // Channel to track sync cancellation
   109  	timeout *time.Timer   // Timer to track delivery timeout
   110  	stale   chan struct{} // Channel to signal the request was dropped
   111  
   112  	origin common.Hash // First account requested to allow continuation checks
   113  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   114  
   115  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   116  }
   117  
   118  // accountResponse is an already Merkle-verified remote response to an account
   119  // range request. It contains the subtrie for the requested account range and
   120  // the database that's going to be filled with the internal nodes on commit.
   121  type accountResponse struct {
   122  	task *accountTask // Task which this request is filling
   123  
   124  	hashes   []common.Hash    // Account hashes in the returned range
   125  	accounts []*state.Account // Expanded accounts in the returned range
   126  
   127  	nodes ethdb.KeyValueStore // Database containing the reconstructed trie nodes
   128  	trie  *trie.Trie          // Reconstructed trie to reject incomplete account paths
   129  
   130  	bounds   map[common.Hash]struct{} // Boundary nodes to avoid persisting incomplete accounts
   131  	overflow *light.NodeSet           // Overflow nodes to avoid persisting across chunk boundaries
   132  
   133  	cont bool // Whether the account range has a continuation
   134  }
   135  
   136  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   137  // actual requests and to validate any security constraints.
   138  //
   139  // Concurrency note: bytecode requests and responses are handled concurrently from
   140  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   141  // to drop on invalid response. The request struct must contain all the data to
   142  // construct the response without accessing runloop internals (i.e. task). That
   143  // is only included to allow the runloop to match a response to the task being
   144  // synced without having yet another set of maps.
   145  type bytecodeRequest struct {
   146  	peer string // Peer to which this request is assigned
   147  	id   uint64 // Request ID of this request
   148  
   149  	cancel  chan struct{} // Channel to track sync cancellation
   150  	timeout *time.Timer   // Timer to track delivery timeout
   151  	stale   chan struct{} // Channel to signal the request was dropped
   152  
   153  	hashes []common.Hash // Bytecode hashes to validate responses
   154  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   155  }
   156  
   157  // bytecodeResponse is an already verified remote response to a bytecode request.
   158  type bytecodeResponse struct {
   159  	task *accountTask // Task which this request is filling
   160  
   161  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   162  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   163  }
   164  
   165  // storageRequest tracks a pending storage ranges request to ensure responses are
   166  // to actual requests and to validate any security constraints.
   167  //
   168  // Concurrency note: storage requests and responses are handled concurrently from
   169  // the main runloop to allow Merkel proof verifications on the peer's thread and
   170  // to drop on invalid response. The request struct must contain all the data to
   171  // construct the response without accessing runloop internals (i.e. tasks). That
   172  // is only included to allow the runloop to match a response to the task being
   173  // synced without having yet another set of maps.
   174  type storageRequest struct {
   175  	peer string // Peer to which this request is assigned
   176  	id   uint64 // Request ID of this request
   177  
   178  	cancel  chan struct{} // Channel to track sync cancellation
   179  	timeout *time.Timer   // Timer to track delivery timeout
   180  	stale   chan struct{} // Channel to signal the request was dropped
   181  
   182  	accounts []common.Hash // Account hashes to validate responses
   183  	roots    []common.Hash // Storage roots to validate responses
   184  
   185  	origin common.Hash // First storage slot requested to allow continuation checks
   186  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   187  
   188  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   189  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   190  }
   191  
   192  // storageResponse is an already Merkle-verified remote response to a storage
   193  // range request. It contains the subtries for the requested storage ranges and
   194  // the databases that's going to be filled with the internal nodes on commit.
   195  type storageResponse struct {
   196  	mainTask *accountTask // Task which this response belongs to
   197  	subTask  *storageTask // Task which this response is filling
   198  
   199  	accounts []common.Hash // Account hashes requested, may be only partially filled
   200  	roots    []common.Hash // Storage roots requested, may be only partially filled
   201  
   202  	hashes [][]common.Hash       // Storage slot hashes in the returned range
   203  	slots  [][][]byte            // Storage slot values in the returned range
   204  	nodes  []ethdb.KeyValueStore // Database containing the reconstructed trie nodes
   205  	tries  []*trie.Trie          // Reconstructed tries to reject overflown slots
   206  
   207  	// Fields relevant for the last account only
   208  	bounds   map[common.Hash]struct{} // Boundary nodes to avoid persisting (incomplete)
   209  	overflow *light.NodeSet           // Overflow nodes to avoid persisting across chunk boundaries
   210  	cont     bool                     // Whether the last storage range has a continuation
   211  }
   212  
   213  // trienodeHealRequest tracks a pending state trie request to ensure responses
   214  // are to actual requests and to validate any security constraints.
   215  //
   216  // Concurrency note: trie node requests and responses are handled concurrently from
   217  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   218  // to drop on invalid response. The request struct must contain all the data to
   219  // construct the response without accessing runloop internals (i.e. task). That
   220  // is only included to allow the runloop to match a response to the task being
   221  // synced without having yet another set of maps.
   222  type trienodeHealRequest struct {
   223  	peer string // Peer to which this request is assigned
   224  	id   uint64 // Request ID of this request
   225  
   226  	cancel  chan struct{} // Channel to track sync cancellation
   227  	timeout *time.Timer   // Timer to track delivery timeout
   228  	stale   chan struct{} // Channel to signal the request was dropped
   229  
   230  	hashes []common.Hash   // Trie node hashes to validate responses
   231  	paths  []trie.SyncPath // Trie node paths requested for rescheduling
   232  
   233  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   234  }
   235  
   236  // trienodeHealResponse is an already verified remote response to a trie node request.
   237  type trienodeHealResponse struct {
   238  	task *healTask // Task which this request is filling
   239  
   240  	hashes []common.Hash   // Hashes of the trie nodes to avoid double hashing
   241  	paths  []trie.SyncPath // Trie node paths requested for rescheduling missing ones
   242  	nodes  [][]byte        // Actual trie nodes to store into the database (nil = missing)
   243  }
   244  
   245  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   246  // actual requests and to validate any security constraints.
   247  //
   248  // Concurrency note: bytecode requests and responses are handled concurrently from
   249  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   250  // to drop on invalid response. The request struct must contain all the data to
   251  // construct the response without accessing runloop internals (i.e. task). That
   252  // is only included to allow the runloop to match a response to the task being
   253  // synced without having yet another set of maps.
   254  type bytecodeHealRequest struct {
   255  	peer string // Peer to which this request is assigned
   256  	id   uint64 // Request ID of this request
   257  
   258  	cancel  chan struct{} // Channel to track sync cancellation
   259  	timeout *time.Timer   // Timer to track delivery timeout
   260  	stale   chan struct{} // Channel to signal the request was dropped
   261  
   262  	hashes []common.Hash // Bytecode hashes to validate responses
   263  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   264  }
   265  
   266  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   267  type bytecodeHealResponse struct {
   268  	task *healTask // Task which this request is filling
   269  
   270  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   271  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   272  }
   273  
   274  // accountTask represents the sync task for a chunk of the account snapshot.
   275  type accountTask struct {
   276  	// These fields get serialized to leveldb on shutdown
   277  	Next     common.Hash                    // Next account to sync in this interval
   278  	Last     common.Hash                    // Last account to sync in this interval
   279  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   280  
   281  	// These fields are internals used during runtime
   282  	req  *accountRequest  // Pending request to fill this task
   283  	res  *accountResponse // Validate response filling this task
   284  	pend int              // Number of pending subtasks for this round
   285  
   286  	needCode  []bool // Flags whether the filling accounts need code retrieval
   287  	needState []bool // Flags whether the filling accounts need storage retrieval
   288  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   289  
   290  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   291  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   292  
   293  	done bool // Flag whether the task can be removed
   294  }
   295  
   296  // storageTask represents the sync task for a chunk of the storage snapshot.
   297  type storageTask struct {
   298  	Next common.Hash // Next account to sync in this interval
   299  	Last common.Hash // Last account to sync in this interval
   300  
   301  	// These fields are internals used during runtime
   302  	root common.Hash     // Storage root hash for this instance
   303  	req  *storageRequest // Pending request to fill this task
   304  	done bool            // Flag whether the task can be removed
   305  }
   306  
   307  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   308  type healTask struct {
   309  	scheduler *trie.Sync // State trie sync scheduler defining the tasks
   310  
   311  	trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval
   312  	codeTasks map[common.Hash]struct{}      // Set of byte code tasks currently queued for retrieval
   313  }
   314  
   315  // syncProgress is a database entry to allow suspending and resuming a snapshot state
   316  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   317  // snap sync without prior knowledge of the suspension point.
   318  type syncProgress struct {
   319  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   320  
   321  	// Status report during syncing phase
   322  	AccountSynced  uint64             // Number of accounts downloaded
   323  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   324  	BytecodeSynced uint64             // Number of bytecodes downloaded
   325  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   326  	StorageSynced  uint64             // Number of storage slots downloaded
   327  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   328  
   329  	// Status report during healing phase
   330  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   331  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   332  	TrienodeHealDups   uint64             // Number of state trie nodes already processed
   333  	TrienodeHealNops   uint64             // Number of state trie nodes not requested
   334  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   335  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   336  	BytecodeHealDups   uint64             // Number of bytecodes already processed
   337  	BytecodeHealNops   uint64             // Number of bytecodes not requested
   338  }
   339  
   340  // SyncPeer abstracts out the methods required for a peer to be synced against
   341  // with the goal of allowing the construction of mock peers without the full
   342  // blown networking.
   343  type SyncPeer interface {
   344  	// ID retrieves the peer's unique identifier.
   345  	ID() string
   346  
   347  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   348  	// trie, starting with the origin.
   349  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   350  
   351  	// RequestStorageRange fetches a batch of storage slots belonging to one or
   352  	// more accounts. If slots from only one accout is requested, an origin marker
   353  	// may also be used to retrieve from there.
   354  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   355  
   356  	// RequestByteCodes fetches a batch of bytecodes by hash.
   357  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   358  
   359  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   360  	// a specificstate trie.
   361  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   362  
   363  	// Log retrieves the peer's own contextual logger.
   364  	Log() log.Logger
   365  }
   366  
   367  // Syncer is an Ethereum account and storage trie syncer based on snapshots and
   368  // the  snap protocol. It's purpose is to download all the accounts and storage
   369  // slots from remote peers and reassemble chunks of the state trie, on top of
   370  // which a state sync can be run to fix any gaps / overlaps.
   371  //
   372  // Every network request has a variety of failure events:
   373  //   - The peer disconnects after task assignment, failing to send the request
   374  //   - The peer disconnects after sending the request, before delivering on it
   375  //   - The peer remains connected, but does not deliver a response in time
   376  //   - The peer delivers a stale response after a previous timeout
   377  //   - The peer delivers a refusal to serve the requested state
   378  type Syncer struct {
   379  	db    ethdb.KeyValueStore // Database to store the trie nodes into (and dedup)
   380  	bloom *trie.SyncBloom     // Bloom filter to deduplicate nodes for state fixup
   381  
   382  	root    common.Hash    // Current state trie root being synced
   383  	tasks   []*accountTask // Current account task set being synced
   384  	snapped bool           // Flag to signal that snap phase is done
   385  	healer  *healTask      // Current state healing task being executed
   386  	update  chan struct{}  // Notification channel for possible sync progression
   387  
   388  	peers    map[string]SyncPeer // Currently active peers to download from
   389  	peerJoin *event.Feed         // Event feed to react to peers joining
   390  	peerDrop *event.Feed         // Event feed to react to peers dropping
   391  
   392  	// Request tracking during syncing phase
   393  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   394  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   395  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   396  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   397  
   398  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   399  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   400  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   401  
   402  	accountReqFails  chan *accountRequest  // Failed account range requests to revert
   403  	bytecodeReqFails chan *bytecodeRequest // Failed bytecode requests to revert
   404  	storageReqFails  chan *storageRequest  // Failed storage requests to revert
   405  
   406  	accountResps  chan *accountResponse  // Account sub-tries to integrate into the database
   407  	bytecodeResps chan *bytecodeResponse // Bytecodes to integrate into the database
   408  	storageResps  chan *storageResponse  // Storage sub-tries to integrate into the database
   409  
   410  	accountSynced  uint64             // Number of accounts downloaded
   411  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   412  	bytecodeSynced uint64             // Number of bytecodes downloaded
   413  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   414  	storageSynced  uint64             // Number of storage slots downloaded
   415  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   416  
   417  	// Request tracking during healing phase
   418  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   419  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   420  
   421  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   422  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   423  
   424  	trienodeHealReqFails chan *trienodeHealRequest // Failed trienode requests to revert
   425  	bytecodeHealReqFails chan *bytecodeHealRequest // Failed bytecode requests to revert
   426  
   427  	trienodeHealResps chan *trienodeHealResponse // Trie nodes to integrate into the database
   428  	bytecodeHealResps chan *bytecodeHealResponse // Bytecodes to integrate into the database
   429  
   430  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   431  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   432  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   433  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   434  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   435  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   436  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   437  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   438  
   439  	startTime time.Time   // Time instance when snapshot sync started
   440  	startAcc  common.Hash // Account hash where sync started from
   441  	logTime   time.Time   // Time instance when status was last reported
   442  
   443  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   444  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   445  }
   446  
   447  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   448  // snap protocol.
   449  func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer {
   450  	return &Syncer{
   451  		db:    db,
   452  		bloom: bloom,
   453  
   454  		peers:    make(map[string]SyncPeer),
   455  		peerJoin: new(event.Feed),
   456  		peerDrop: new(event.Feed),
   457  		update:   make(chan struct{}, 1),
   458  
   459  		accountIdlers:  make(map[string]struct{}),
   460  		storageIdlers:  make(map[string]struct{}),
   461  		bytecodeIdlers: make(map[string]struct{}),
   462  
   463  		accountReqs:      make(map[uint64]*accountRequest),
   464  		storageReqs:      make(map[uint64]*storageRequest),
   465  		bytecodeReqs:     make(map[uint64]*bytecodeRequest),
   466  		accountReqFails:  make(chan *accountRequest),
   467  		storageReqFails:  make(chan *storageRequest),
   468  		bytecodeReqFails: make(chan *bytecodeRequest),
   469  		accountResps:     make(chan *accountResponse),
   470  		storageResps:     make(chan *storageResponse),
   471  		bytecodeResps:    make(chan *bytecodeResponse),
   472  
   473  		trienodeHealIdlers: make(map[string]struct{}),
   474  		bytecodeHealIdlers: make(map[string]struct{}),
   475  
   476  		trienodeHealReqs:     make(map[uint64]*trienodeHealRequest),
   477  		bytecodeHealReqs:     make(map[uint64]*bytecodeHealRequest),
   478  		trienodeHealReqFails: make(chan *trienodeHealRequest),
   479  		bytecodeHealReqFails: make(chan *bytecodeHealRequest),
   480  		trienodeHealResps:    make(chan *trienodeHealResponse),
   481  		bytecodeHealResps:    make(chan *bytecodeHealResponse),
   482  	}
   483  }
   484  
   485  // Register injects a new data source into the syncer's peerset.
   486  func (s *Syncer) Register(peer SyncPeer) error {
   487  	// Make sure the peer is not registered yet
   488  	id := peer.ID()
   489  
   490  	s.lock.Lock()
   491  	if _, ok := s.peers[id]; ok {
   492  		log.Error("Snap peer already registered", "id", id)
   493  
   494  		s.lock.Unlock()
   495  		return errors.New("already registered")
   496  	}
   497  	s.peers[id] = peer
   498  
   499  	// Mark the peer as idle, even if no sync is running
   500  	s.accountIdlers[id] = struct{}{}
   501  	s.storageIdlers[id] = struct{}{}
   502  	s.bytecodeIdlers[id] = struct{}{}
   503  	s.trienodeHealIdlers[id] = struct{}{}
   504  	s.bytecodeHealIdlers[id] = struct{}{}
   505  	s.lock.Unlock()
   506  
   507  	// Notify any active syncs that a new peer can be assigned data
   508  	s.peerJoin.Send(id)
   509  	return nil
   510  }
   511  
   512  // Unregister injects a new data source into the syncer's peerset.
   513  func (s *Syncer) Unregister(id string) error {
   514  	// Remove all traces of the peer from the registry
   515  	s.lock.Lock()
   516  	if _, ok := s.peers[id]; !ok {
   517  		log.Error("Snap peer not registered", "id", id)
   518  
   519  		s.lock.Unlock()
   520  		return errors.New("not registered")
   521  	}
   522  	delete(s.peers, id)
   523  
   524  	// Remove status markers, even if no sync is running
   525  	delete(s.statelessPeers, id)
   526  
   527  	delete(s.accountIdlers, id)
   528  	delete(s.storageIdlers, id)
   529  	delete(s.bytecodeIdlers, id)
   530  	delete(s.trienodeHealIdlers, id)
   531  	delete(s.bytecodeHealIdlers, id)
   532  	s.lock.Unlock()
   533  
   534  	// Notify any active syncs that pending requests need to be reverted
   535  	s.peerDrop.Send(id)
   536  	return nil
   537  }
   538  
   539  // Sync starts (or resumes a previous) sync cycle to iterate over an state trie
   540  // with the given root and reconstruct the nodes based on the snapshot leaves.
   541  // Previously downloaded segments will not be redownloaded of fixed, rather any
   542  // errors will be healed after the leaves are fully accumulated.
   543  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   544  	// Move the trie root from any previous value, revert stateless markers for
   545  	// any peers and initialize the syncer if it was not yet run
   546  	s.lock.Lock()
   547  	s.root = root
   548  	s.healer = &healTask{
   549  		scheduler: state.NewStateSync(root, s.db, s.bloom),
   550  		trieTasks: make(map[common.Hash]trie.SyncPath),
   551  		codeTasks: make(map[common.Hash]struct{}),
   552  	}
   553  	s.statelessPeers = make(map[string]struct{})
   554  	s.lock.Unlock()
   555  
   556  	if s.startTime == (time.Time{}) {
   557  		s.startTime = time.Now()
   558  	}
   559  	// Retrieve the previous sync status from LevelDB and abort if already synced
   560  	s.loadSyncStatus()
   561  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   562  		log.Debug("Snapshot sync already completed")
   563  		return nil
   564  	}
   565  	defer func() { // Persist any progress, independent of failure
   566  		for _, task := range s.tasks {
   567  			s.forwardAccountTask(task)
   568  		}
   569  		s.cleanAccountTasks()
   570  		s.saveSyncStatus()
   571  	}()
   572  
   573  	log.Debug("Starting snapshot sync cycle", "root", root)
   574  	defer s.report(true)
   575  
   576  	// Whether sync completed or not, disregard any future packets
   577  	defer func() {
   578  		log.Debug("Terminating snapshot sync cycle", "root", root)
   579  		s.lock.Lock()
   580  		s.accountReqs = make(map[uint64]*accountRequest)
   581  		s.storageReqs = make(map[uint64]*storageRequest)
   582  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   583  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   584  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   585  		s.lock.Unlock()
   586  	}()
   587  	// Keep scheduling sync tasks
   588  	peerJoin := make(chan string, 16)
   589  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   590  	defer peerJoinSub.Unsubscribe()
   591  
   592  	peerDrop := make(chan string, 16)
   593  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   594  	defer peerDropSub.Unsubscribe()
   595  
   596  	for {
   597  		// Remove all completed tasks and terminate sync if everything's done
   598  		s.cleanStorageTasks()
   599  		s.cleanAccountTasks()
   600  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   601  			return nil
   602  		}
   603  		// Assign all the data retrieval tasks to any free peers
   604  		s.assignAccountTasks(cancel)
   605  		s.assignBytecodeTasks(cancel)
   606  		s.assignStorageTasks(cancel)
   607  
   608  		if len(s.tasks) == 0 {
   609  			// Sync phase done, run heal phase
   610  			s.assignTrienodeHealTasks(cancel)
   611  			s.assignBytecodeHealTasks(cancel)
   612  		}
   613  		// Wait for something to happen
   614  		select {
   615  		case <-s.update:
   616  			// Something happened (new peer, delivery, timeout), recheck tasks
   617  		case <-peerJoin:
   618  			// A new peer joined, try to schedule it new tasks
   619  		case id := <-peerDrop:
   620  			s.revertRequests(id)
   621  		case <-cancel:
   622  			return ErrCancelled
   623  
   624  		case req := <-s.accountReqFails:
   625  			s.revertAccountRequest(req)
   626  		case req := <-s.bytecodeReqFails:
   627  			s.revertBytecodeRequest(req)
   628  		case req := <-s.storageReqFails:
   629  			s.revertStorageRequest(req)
   630  		case req := <-s.trienodeHealReqFails:
   631  			s.revertTrienodeHealRequest(req)
   632  		case req := <-s.bytecodeHealReqFails:
   633  			s.revertBytecodeHealRequest(req)
   634  
   635  		case res := <-s.accountResps:
   636  			s.processAccountResponse(res)
   637  		case res := <-s.bytecodeResps:
   638  			s.processBytecodeResponse(res)
   639  		case res := <-s.storageResps:
   640  			s.processStorageResponse(res)
   641  		case res := <-s.trienodeHealResps:
   642  			s.processTrienodeHealResponse(res)
   643  		case res := <-s.bytecodeHealResps:
   644  			s.processBytecodeHealResponse(res)
   645  		}
   646  		// Report stats if something meaningful happened
   647  		s.report(false)
   648  	}
   649  }
   650  
   651  // loadSyncStatus retrieves a previously aborted sync status from the database,
   652  // or generates a fresh one if none is available.
   653  func (s *Syncer) loadSyncStatus() {
   654  	var progress syncProgress
   655  
   656  	if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil {
   657  		if err := json.Unmarshal(status, &progress); err != nil {
   658  			log.Error("Failed to decode snap sync status", "err", err)
   659  		} else {
   660  			for _, task := range progress.Tasks {
   661  				log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   662  			}
   663  			s.tasks = progress.Tasks
   664  			s.snapped = len(s.tasks) == 0
   665  
   666  			s.accountSynced = progress.AccountSynced
   667  			s.accountBytes = progress.AccountBytes
   668  			s.bytecodeSynced = progress.BytecodeSynced
   669  			s.bytecodeBytes = progress.BytecodeBytes
   670  			s.storageSynced = progress.StorageSynced
   671  			s.storageBytes = progress.StorageBytes
   672  
   673  			s.trienodeHealSynced = progress.TrienodeHealSynced
   674  			s.trienodeHealBytes = progress.TrienodeHealBytes
   675  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   676  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   677  			return
   678  		}
   679  	}
   680  	// Either we've failed to decode the previus state, or there was none.
   681  	// Start a fresh sync by chunking up the account range and scheduling
   682  	// them for retrieval.
   683  	s.tasks = nil
   684  	s.accountSynced, s.accountBytes = 0, 0
   685  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   686  	s.storageSynced, s.storageBytes = 0, 0
   687  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   688  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   689  
   690  	var next common.Hash
   691  	step := new(big.Int).Sub(
   692  		new(big.Int).Div(
   693  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   694  			big.NewInt(accountConcurrency),
   695  		), common.Big1,
   696  	)
   697  	for i := 0; i < accountConcurrency; i++ {
   698  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   699  		if i == accountConcurrency-1 {
   700  			// Make sure we don't overflow if the step is not a proper divisor
   701  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   702  		}
   703  		s.tasks = append(s.tasks, &accountTask{
   704  			Next:     next,
   705  			Last:     last,
   706  			SubTasks: make(map[common.Hash][]*storageTask),
   707  		})
   708  		log.Debug("Created account sync task", "from", next, "last", last)
   709  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   710  	}
   711  }
   712  
   713  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   714  func (s *Syncer) saveSyncStatus() {
   715  	progress := &syncProgress{
   716  		Tasks:              s.tasks,
   717  		AccountSynced:      s.accountSynced,
   718  		AccountBytes:       s.accountBytes,
   719  		BytecodeSynced:     s.bytecodeSynced,
   720  		BytecodeBytes:      s.bytecodeBytes,
   721  		StorageSynced:      s.storageSynced,
   722  		StorageBytes:       s.storageBytes,
   723  		TrienodeHealSynced: s.trienodeHealSynced,
   724  		TrienodeHealBytes:  s.trienodeHealBytes,
   725  		BytecodeHealSynced: s.bytecodeHealSynced,
   726  		BytecodeHealBytes:  s.bytecodeHealBytes,
   727  	}
   728  	status, err := json.Marshal(progress)
   729  	if err != nil {
   730  		panic(err) // This can only fail during implementation
   731  	}
   732  	rawdb.WriteSnapshotSyncStatus(s.db, status)
   733  }
   734  
   735  // cleanAccountTasks removes account range retrieval tasks that have already been
   736  // completed.
   737  func (s *Syncer) cleanAccountTasks() {
   738  	for i := 0; i < len(s.tasks); i++ {
   739  		if s.tasks[i].done {
   740  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   741  			i--
   742  		}
   743  	}
   744  	if len(s.tasks) == 0 {
   745  		s.lock.Lock()
   746  		s.snapped = true
   747  		s.lock.Unlock()
   748  	}
   749  }
   750  
   751  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   752  // within, cleaning any that have been completed.
   753  func (s *Syncer) cleanStorageTasks() {
   754  	for _, task := range s.tasks {
   755  		for account, subtasks := range task.SubTasks {
   756  			// Remove storage range retrieval tasks that completed
   757  			for j := 0; j < len(subtasks); j++ {
   758  				if subtasks[j].done {
   759  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   760  					j--
   761  				}
   762  			}
   763  			if len(subtasks) > 0 {
   764  				task.SubTasks[account] = subtasks
   765  				continue
   766  			}
   767  			// If all storage chunks are done, mark the account as done too
   768  			for j, hash := range task.res.hashes {
   769  				if hash == account {
   770  					task.needState[j] = false
   771  				}
   772  			}
   773  			delete(task.SubTasks, account)
   774  			task.pend--
   775  
   776  			// If this was the last pending task, forward the account task
   777  			if task.pend == 0 {
   778  				s.forwardAccountTask(task)
   779  			}
   780  		}
   781  	}
   782  }
   783  
   784  // assignAccountTasks attempts to match idle peers to pending account range
   785  // retrievals.
   786  func (s *Syncer) assignAccountTasks(cancel chan struct{}) {
   787  	s.lock.Lock()
   788  	defer s.lock.Unlock()
   789  
   790  	// If there are no idle peers, short circuit assignment
   791  	if len(s.accountIdlers) == 0 {
   792  		return
   793  	}
   794  	// Iterate over all the tasks and try to find a pending one
   795  	for _, task := range s.tasks {
   796  		// Skip any tasks already filling
   797  		if task.req != nil || task.res != nil {
   798  			continue
   799  		}
   800  		// Task pending retrieval, try to find an idle peer. If no such peer
   801  		// exists, we probably assigned tasks for all (or they are stateless).
   802  		// Abort the entire assignment mechanism.
   803  		var idle string
   804  		for id := range s.accountIdlers {
   805  			// If the peer rejected a query in this sync cycle, don't bother asking
   806  			// again for anything, it's either out of sync or already pruned
   807  			if _, ok := s.statelessPeers[id]; ok {
   808  				continue
   809  			}
   810  			idle = id
   811  			break
   812  		}
   813  		if idle == "" {
   814  			return
   815  		}
   816  		// Matched a pending task to an idle peer, allocate a unique request id
   817  		var reqid uint64
   818  		for {
   819  			reqid = uint64(rand.Int63())
   820  			if reqid == 0 {
   821  				continue
   822  			}
   823  			if _, ok := s.accountReqs[reqid]; ok {
   824  				continue
   825  			}
   826  			break
   827  		}
   828  		// Generate the network query and send it to the peer
   829  		req := &accountRequest{
   830  			peer:   idle,
   831  			id:     reqid,
   832  			cancel: cancel,
   833  			stale:  make(chan struct{}),
   834  			origin: task.Next,
   835  			limit:  task.Last,
   836  			task:   task,
   837  		}
   838  		req.timeout = time.AfterFunc(requestTimeout, func() {
   839  			log.Debug("Account range request timed out")
   840  			s.scheduleRevertAccountRequest(req)
   841  		})
   842  		s.accountReqs[reqid] = req
   843  		delete(s.accountIdlers, idle)
   844  
   845  		s.pend.Add(1)
   846  		go func(peer SyncPeer, root common.Hash) {
   847  			defer s.pend.Done()
   848  
   849  			// Attempt to send the remote request and revert if it fails
   850  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil {
   851  				peer.Log().Debug("Failed to request account range", "err", err)
   852  				s.scheduleRevertAccountRequest(req)
   853  			}
   854  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
   855  
   856  		// Inject the request into the task to block further assignments
   857  		task.req = req
   858  	}
   859  }
   860  
   861  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
   862  func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) {
   863  	s.lock.Lock()
   864  	defer s.lock.Unlock()
   865  
   866  	// If there are no idle peers, short circuit assignment
   867  	if len(s.bytecodeIdlers) == 0 {
   868  		return
   869  	}
   870  	// Iterate over all the tasks and try to find a pending one
   871  	for _, task := range s.tasks {
   872  		// Skip any tasks not in the bytecode retrieval phase
   873  		if task.res == nil {
   874  			continue
   875  		}
   876  		// Skip tasks that are already retrieving (or done with) all codes
   877  		if len(task.codeTasks) == 0 {
   878  			continue
   879  		}
   880  		// Task pending retrieval, try to find an idle peer. If no such peer
   881  		// exists, we probably assigned tasks for all (or they are stateless).
   882  		// Abort the entire assignment mechanism.
   883  		var idle string
   884  		for id := range s.bytecodeIdlers {
   885  			// If the peer rejected a query in this sync cycle, don't bother asking
   886  			// again for anything, it's either out of sync or already pruned
   887  			if _, ok := s.statelessPeers[id]; ok {
   888  				continue
   889  			}
   890  			idle = id
   891  			break
   892  		}
   893  		if idle == "" {
   894  			return
   895  		}
   896  		// Matched a pending task to an idle peer, allocate a unique request id
   897  		var reqid uint64
   898  		for {
   899  			reqid = uint64(rand.Int63())
   900  			if reqid == 0 {
   901  				continue
   902  			}
   903  			if _, ok := s.bytecodeReqs[reqid]; ok {
   904  				continue
   905  			}
   906  			break
   907  		}
   908  		// Generate the network query and send it to the peer
   909  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
   910  		for hash := range task.codeTasks {
   911  			delete(task.codeTasks, hash)
   912  			hashes = append(hashes, hash)
   913  			if len(hashes) >= maxCodeRequestCount {
   914  				break
   915  			}
   916  		}
   917  		req := &bytecodeRequest{
   918  			peer:   idle,
   919  			id:     reqid,
   920  			cancel: cancel,
   921  			stale:  make(chan struct{}),
   922  			hashes: hashes,
   923  			task:   task,
   924  		}
   925  		req.timeout = time.AfterFunc(requestTimeout, func() {
   926  			log.Debug("Bytecode request timed out")
   927  			s.scheduleRevertBytecodeRequest(req)
   928  		})
   929  		s.bytecodeReqs[reqid] = req
   930  		delete(s.bytecodeIdlers, idle)
   931  
   932  		s.pend.Add(1)
   933  		go func(peer SyncPeer) {
   934  			defer s.pend.Done()
   935  
   936  			// Attempt to send the remote request and revert if it fails
   937  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
   938  				log.Debug("Failed to request bytecodes", "err", err)
   939  				s.scheduleRevertBytecodeRequest(req)
   940  			}
   941  		}(s.peers[idle]) // We're in the lock, peers[id] surely exists
   942  	}
   943  }
   944  
   945  // assignStorageTasks attempts to match idle peers to pending storage range
   946  // retrievals.
   947  func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
   948  	s.lock.Lock()
   949  	defer s.lock.Unlock()
   950  
   951  	// If there are no idle peers, short circuit assignment
   952  	if len(s.storageIdlers) == 0 {
   953  		return
   954  	}
   955  	// Iterate over all the tasks and try to find a pending one
   956  	for _, task := range s.tasks {
   957  		// Skip any tasks not in the storage retrieval phase
   958  		if task.res == nil {
   959  			continue
   960  		}
   961  		// Skip tasks that are already retrieving (or done with) all small states
   962  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
   963  			continue
   964  		}
   965  		// Task pending retrieval, try to find an idle peer. If no such peer
   966  		// exists, we probably assigned tasks for all (or they are stateless).
   967  		// Abort the entire assignment mechanism.
   968  		var idle string
   969  		for id := range s.storageIdlers {
   970  			// If the peer rejected a query in this sync cycle, don't bother asking
   971  			// again for anything, it's either out of sync or already pruned
   972  			if _, ok := s.statelessPeers[id]; ok {
   973  				continue
   974  			}
   975  			idle = id
   976  			break
   977  		}
   978  		if idle == "" {
   979  			return
   980  		}
   981  		// Matched a pending task to an idle peer, allocate a unique request id
   982  		var reqid uint64
   983  		for {
   984  			reqid = uint64(rand.Int63())
   985  			if reqid == 0 {
   986  				continue
   987  			}
   988  			if _, ok := s.storageReqs[reqid]; ok {
   989  				continue
   990  			}
   991  			break
   992  		}
   993  		// Generate the network query and send it to the peer. If there are
   994  		// large contract tasks pending, complete those before diving into
   995  		// even more new contracts.
   996  		var (
   997  			accounts = make([]common.Hash, 0, maxStorageSetRequestCount)
   998  			roots    = make([]common.Hash, 0, maxStorageSetRequestCount)
   999  			subtask  *storageTask
  1000  		)
  1001  		for account, subtasks := range task.SubTasks {
  1002  			for _, st := range subtasks {
  1003  				// Skip any subtasks already filling
  1004  				if st.req != nil {
  1005  					continue
  1006  				}
  1007  				// Found an incomplete storage chunk, schedule it
  1008  				accounts = append(accounts, account)
  1009  				roots = append(roots, st.root)
  1010  				subtask = st
  1011  				break // Large contract chunks are downloaded individually
  1012  			}
  1013  			if subtask != nil {
  1014  				break // Large contract chunks are downloaded individually
  1015  			}
  1016  		}
  1017  		if subtask == nil {
  1018  			// No large contract required retrieval, but small ones available
  1019  			for acccount, root := range task.stateTasks {
  1020  				delete(task.stateTasks, acccount)
  1021  
  1022  				accounts = append(accounts, acccount)
  1023  				roots = append(roots, root)
  1024  
  1025  				if len(accounts) >= maxStorageSetRequestCount {
  1026  					break
  1027  				}
  1028  			}
  1029  		}
  1030  		// If nothing was found, it means this task is actually already fully
  1031  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1032  		if len(accounts) == 0 {
  1033  			continue
  1034  		}
  1035  		req := &storageRequest{
  1036  			peer:     idle,
  1037  			id:       reqid,
  1038  			cancel:   cancel,
  1039  			stale:    make(chan struct{}),
  1040  			accounts: accounts,
  1041  			roots:    roots,
  1042  			mainTask: task,
  1043  			subTask:  subtask,
  1044  		}
  1045  		if subtask != nil {
  1046  			req.origin = subtask.Next
  1047  			req.limit = subtask.Last
  1048  		}
  1049  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1050  			log.Debug("Storage request timed out")
  1051  			s.scheduleRevertStorageRequest(req)
  1052  		})
  1053  		s.storageReqs[reqid] = req
  1054  		delete(s.storageIdlers, idle)
  1055  
  1056  		s.pend.Add(1)
  1057  		go func(peer SyncPeer, root common.Hash) {
  1058  			defer s.pend.Done()
  1059  
  1060  			// Attempt to send the remote request and revert if it fails
  1061  			var origin, limit []byte
  1062  			if subtask != nil {
  1063  				origin, limit = req.origin[:], req.limit[:]
  1064  			}
  1065  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil {
  1066  				log.Debug("Failed to request storage", "err", err)
  1067  				s.scheduleRevertStorageRequest(req)
  1068  			}
  1069  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
  1070  
  1071  		// Inject the request into the subtask to block further assignments
  1072  		if subtask != nil {
  1073  			subtask.req = req
  1074  		}
  1075  	}
  1076  }
  1077  
  1078  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1079  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1080  func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) {
  1081  	s.lock.Lock()
  1082  	defer s.lock.Unlock()
  1083  
  1084  	// If there are no idle peers, short circuit assignment
  1085  	if len(s.trienodeHealIdlers) == 0 {
  1086  		return
  1087  	}
  1088  	// Iterate over pending tasks and try to find a peer to retrieve with
  1089  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1090  		// If there are not enough trie tasks queued to fully assign, fill the
  1091  		// queue from the state sync scheduler. The trie synced schedules these
  1092  		// together with bytecodes, so we need to queue them combined.
  1093  		var (
  1094  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1095  			want = maxTrieRequestCount + maxCodeRequestCount
  1096  		)
  1097  		if have < want {
  1098  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1099  			for i, hash := range nodes {
  1100  				s.healer.trieTasks[hash] = paths[i]
  1101  			}
  1102  			for _, hash := range codes {
  1103  				s.healer.codeTasks[hash] = struct{}{}
  1104  			}
  1105  		}
  1106  		// If all the heal tasks are bytecodes or already downloading, bail
  1107  		if len(s.healer.trieTasks) == 0 {
  1108  			return
  1109  		}
  1110  		// Task pending retrieval, try to find an idle peer. If no such peer
  1111  		// exists, we probably assigned tasks for all (or they are stateless).
  1112  		// Abort the entire assignment mechanism.
  1113  		var idle string
  1114  		for id := range s.trienodeHealIdlers {
  1115  			// If the peer rejected a query in this sync cycle, don't bother asking
  1116  			// again for anything, it's either out of sync or already pruned
  1117  			if _, ok := s.statelessPeers[id]; ok {
  1118  				continue
  1119  			}
  1120  			idle = id
  1121  			break
  1122  		}
  1123  		if idle == "" {
  1124  			return
  1125  		}
  1126  		// Matched a pending task to an idle peer, allocate a unique request id
  1127  		var reqid uint64
  1128  		for {
  1129  			reqid = uint64(rand.Int63())
  1130  			if reqid == 0 {
  1131  				continue
  1132  			}
  1133  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1134  				continue
  1135  			}
  1136  			break
  1137  		}
  1138  		// Generate the network query and send it to the peer
  1139  		var (
  1140  			hashes   = make([]common.Hash, 0, maxTrieRequestCount)
  1141  			paths    = make([]trie.SyncPath, 0, maxTrieRequestCount)
  1142  			pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount)
  1143  		)
  1144  		for hash, pathset := range s.healer.trieTasks {
  1145  			delete(s.healer.trieTasks, hash)
  1146  
  1147  			hashes = append(hashes, hash)
  1148  			paths = append(paths, pathset)
  1149  			pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash
  1150  
  1151  			if len(hashes) >= maxTrieRequestCount {
  1152  				break
  1153  			}
  1154  		}
  1155  		req := &trienodeHealRequest{
  1156  			peer:   idle,
  1157  			id:     reqid,
  1158  			cancel: cancel,
  1159  			stale:  make(chan struct{}),
  1160  			hashes: hashes,
  1161  			paths:  paths,
  1162  			task:   s.healer,
  1163  		}
  1164  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1165  			log.Debug("Trienode heal request timed out")
  1166  			s.scheduleRevertTrienodeHealRequest(req)
  1167  		})
  1168  		s.trienodeHealReqs[reqid] = req
  1169  		delete(s.trienodeHealIdlers, idle)
  1170  
  1171  		s.pend.Add(1)
  1172  		go func(peer SyncPeer, root common.Hash) {
  1173  			defer s.pend.Done()
  1174  
  1175  			// Attempt to send the remote request and revert if it fails
  1176  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1177  				log.Debug("Failed to request trienode healers", "err", err)
  1178  				s.scheduleRevertTrienodeHealRequest(req)
  1179  			}
  1180  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
  1181  	}
  1182  }
  1183  
  1184  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1185  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1186  func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) {
  1187  	s.lock.Lock()
  1188  	defer s.lock.Unlock()
  1189  
  1190  	// If there are no idle peers, short circuit assignment
  1191  	if len(s.bytecodeHealIdlers) == 0 {
  1192  		return
  1193  	}
  1194  	// Iterate over pending tasks and try to find a peer to retrieve with
  1195  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1196  		// If there are not enough trie tasks queued to fully assign, fill the
  1197  		// queue from the state sync scheduler. The trie synced schedules these
  1198  		// together with trie nodes, so we need to queue them combined.
  1199  		var (
  1200  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1201  			want = maxTrieRequestCount + maxCodeRequestCount
  1202  		)
  1203  		if have < want {
  1204  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1205  			for i, hash := range nodes {
  1206  				s.healer.trieTasks[hash] = paths[i]
  1207  			}
  1208  			for _, hash := range codes {
  1209  				s.healer.codeTasks[hash] = struct{}{}
  1210  			}
  1211  		}
  1212  		// If all the heal tasks are trienodes or already downloading, bail
  1213  		if len(s.healer.codeTasks) == 0 {
  1214  			return
  1215  		}
  1216  		// Task pending retrieval, try to find an idle peer. If no such peer
  1217  		// exists, we probably assigned tasks for all (or they are stateless).
  1218  		// Abort the entire assignment mechanism.
  1219  		var idle string
  1220  		for id := range s.bytecodeHealIdlers {
  1221  			// If the peer rejected a query in this sync cycle, don't bother asking
  1222  			// again for anything, it's either out of sync or already pruned
  1223  			if _, ok := s.statelessPeers[id]; ok {
  1224  				continue
  1225  			}
  1226  			idle = id
  1227  			break
  1228  		}
  1229  		if idle == "" {
  1230  			return
  1231  		}
  1232  		// Matched a pending task to an idle peer, allocate a unique request id
  1233  		var reqid uint64
  1234  		for {
  1235  			reqid = uint64(rand.Int63())
  1236  			if reqid == 0 {
  1237  				continue
  1238  			}
  1239  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1240  				continue
  1241  			}
  1242  			break
  1243  		}
  1244  		// Generate the network query and send it to the peer
  1245  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
  1246  		for hash := range s.healer.codeTasks {
  1247  			delete(s.healer.codeTasks, hash)
  1248  
  1249  			hashes = append(hashes, hash)
  1250  			if len(hashes) >= maxCodeRequestCount {
  1251  				break
  1252  			}
  1253  		}
  1254  		req := &bytecodeHealRequest{
  1255  			peer:   idle,
  1256  			id:     reqid,
  1257  			cancel: cancel,
  1258  			stale:  make(chan struct{}),
  1259  			hashes: hashes,
  1260  			task:   s.healer,
  1261  		}
  1262  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1263  			log.Debug("Bytecode heal request timed out")
  1264  			s.scheduleRevertBytecodeHealRequest(req)
  1265  		})
  1266  		s.bytecodeHealReqs[reqid] = req
  1267  		delete(s.bytecodeHealIdlers, idle)
  1268  
  1269  		s.pend.Add(1)
  1270  		go func(peer SyncPeer) {
  1271  			defer s.pend.Done()
  1272  
  1273  			// Attempt to send the remote request and revert if it fails
  1274  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1275  				log.Debug("Failed to request bytecode healers", "err", err)
  1276  				s.scheduleRevertBytecodeHealRequest(req)
  1277  			}
  1278  		}(s.peers[idle]) // We're in the lock, peers[id] surely exists
  1279  	}
  1280  }
  1281  
  1282  // revertRequests locates all the currently pending reuqests from a particular
  1283  // peer and reverts them, rescheduling for others to fulfill.
  1284  func (s *Syncer) revertRequests(peer string) {
  1285  	// Gather the requests first, revertals need the lock too
  1286  	s.lock.Lock()
  1287  	var accountReqs []*accountRequest
  1288  	for _, req := range s.accountReqs {
  1289  		if req.peer == peer {
  1290  			accountReqs = append(accountReqs, req)
  1291  		}
  1292  	}
  1293  	var bytecodeReqs []*bytecodeRequest
  1294  	for _, req := range s.bytecodeReqs {
  1295  		if req.peer == peer {
  1296  			bytecodeReqs = append(bytecodeReqs, req)
  1297  		}
  1298  	}
  1299  	var storageReqs []*storageRequest
  1300  	for _, req := range s.storageReqs {
  1301  		if req.peer == peer {
  1302  			storageReqs = append(storageReqs, req)
  1303  		}
  1304  	}
  1305  	var trienodeHealReqs []*trienodeHealRequest
  1306  	for _, req := range s.trienodeHealReqs {
  1307  		if req.peer == peer {
  1308  			trienodeHealReqs = append(trienodeHealReqs, req)
  1309  		}
  1310  	}
  1311  	var bytecodeHealReqs []*bytecodeHealRequest
  1312  	for _, req := range s.bytecodeHealReqs {
  1313  		if req.peer == peer {
  1314  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1315  		}
  1316  	}
  1317  	s.lock.Unlock()
  1318  
  1319  	// Revert all the requests matching the peer
  1320  	for _, req := range accountReqs {
  1321  		s.revertAccountRequest(req)
  1322  	}
  1323  	for _, req := range bytecodeReqs {
  1324  		s.revertBytecodeRequest(req)
  1325  	}
  1326  	for _, req := range storageReqs {
  1327  		s.revertStorageRequest(req)
  1328  	}
  1329  	for _, req := range trienodeHealReqs {
  1330  		s.revertTrienodeHealRequest(req)
  1331  	}
  1332  	for _, req := range bytecodeHealReqs {
  1333  		s.revertBytecodeHealRequest(req)
  1334  	}
  1335  }
  1336  
  1337  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1338  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1339  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1340  	select {
  1341  	case s.accountReqFails <- req:
  1342  		// Sync event loop notified
  1343  	case <-req.cancel:
  1344  		// Sync cycle got cancelled
  1345  	case <-req.stale:
  1346  		// Request already reverted
  1347  	}
  1348  }
  1349  
  1350  // revertAccountRequest cleans up an account range request and returns all failed
  1351  // retrieval tasks to the scheduler for reassignment.
  1352  //
  1353  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1354  // On peer threads, use scheduleRevertAccountRequest.
  1355  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1356  	log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1357  	select {
  1358  	case <-req.stale:
  1359  		log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1360  		return
  1361  	default:
  1362  	}
  1363  	close(req.stale)
  1364  
  1365  	// Remove the request from the tracked set
  1366  	s.lock.Lock()
  1367  	delete(s.accountReqs, req.id)
  1368  	s.lock.Unlock()
  1369  
  1370  	// If there's a timeout timer still running, abort it and mark the account
  1371  	// task as not-pending, ready for resheduling
  1372  	req.timeout.Stop()
  1373  	if req.task.req == req {
  1374  		req.task.req = nil
  1375  	}
  1376  }
  1377  
  1378  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1379  // and return all failed retrieval tasks to the scheduler for reassignment.
  1380  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1381  	select {
  1382  	case s.bytecodeReqFails <- req:
  1383  		// Sync event loop notified
  1384  	case <-req.cancel:
  1385  		// Sync cycle got cancelled
  1386  	case <-req.stale:
  1387  		// Request already reverted
  1388  	}
  1389  }
  1390  
  1391  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1392  // retrieval tasks to the scheduler for reassignment.
  1393  //
  1394  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1395  // On peer threads, use scheduleRevertBytecodeRequest.
  1396  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1397  	log.Debug("Reverting bytecode request", "peer", req.peer)
  1398  	select {
  1399  	case <-req.stale:
  1400  		log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1401  		return
  1402  	default:
  1403  	}
  1404  	close(req.stale)
  1405  
  1406  	// Remove the request from the tracked set
  1407  	s.lock.Lock()
  1408  	delete(s.bytecodeReqs, req.id)
  1409  	s.lock.Unlock()
  1410  
  1411  	// If there's a timeout timer still running, abort it and mark the code
  1412  	// retrievals as not-pending, ready for resheduling
  1413  	req.timeout.Stop()
  1414  	for _, hash := range req.hashes {
  1415  		req.task.codeTasks[hash] = struct{}{}
  1416  	}
  1417  }
  1418  
  1419  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1420  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1421  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1422  	select {
  1423  	case s.storageReqFails <- req:
  1424  		// Sync event loop notified
  1425  	case <-req.cancel:
  1426  		// Sync cycle got cancelled
  1427  	case <-req.stale:
  1428  		// Request already reverted
  1429  	}
  1430  }
  1431  
  1432  // revertStorageRequest cleans up a storage range request and returns all failed
  1433  // retrieval tasks to the scheduler for reassignment.
  1434  //
  1435  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1436  // On peer threads, use scheduleRevertStorageRequest.
  1437  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1438  	log.Debug("Reverting storage request", "peer", req.peer)
  1439  	select {
  1440  	case <-req.stale:
  1441  		log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1442  		return
  1443  	default:
  1444  	}
  1445  	close(req.stale)
  1446  
  1447  	// Remove the request from the tracked set
  1448  	s.lock.Lock()
  1449  	delete(s.storageReqs, req.id)
  1450  	s.lock.Unlock()
  1451  
  1452  	// If there's a timeout timer still running, abort it and mark the storage
  1453  	// task as not-pending, ready for resheduling
  1454  	req.timeout.Stop()
  1455  	if req.subTask != nil {
  1456  		req.subTask.req = nil
  1457  	} else {
  1458  		for i, account := range req.accounts {
  1459  			req.mainTask.stateTasks[account] = req.roots[i]
  1460  		}
  1461  	}
  1462  }
  1463  
  1464  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1465  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1466  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1467  	select {
  1468  	case s.trienodeHealReqFails <- req:
  1469  		// Sync event loop notified
  1470  	case <-req.cancel:
  1471  		// Sync cycle got cancelled
  1472  	case <-req.stale:
  1473  		// Request already reverted
  1474  	}
  1475  }
  1476  
  1477  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1478  // failed retrieval tasks to the scheduler for reassignment.
  1479  //
  1480  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1481  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1482  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1483  	log.Debug("Reverting trienode heal request", "peer", req.peer)
  1484  	select {
  1485  	case <-req.stale:
  1486  		log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1487  		return
  1488  	default:
  1489  	}
  1490  	close(req.stale)
  1491  
  1492  	// Remove the request from the tracked set
  1493  	s.lock.Lock()
  1494  	delete(s.trienodeHealReqs, req.id)
  1495  	s.lock.Unlock()
  1496  
  1497  	// If there's a timeout timer still running, abort it and mark the trie node
  1498  	// retrievals as not-pending, ready for resheduling
  1499  	req.timeout.Stop()
  1500  	for i, hash := range req.hashes {
  1501  		req.task.trieTasks[hash] = req.paths[i]
  1502  	}
  1503  }
  1504  
  1505  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1506  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1507  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1508  	select {
  1509  	case s.bytecodeHealReqFails <- req:
  1510  		// Sync event loop notified
  1511  	case <-req.cancel:
  1512  		// Sync cycle got cancelled
  1513  	case <-req.stale:
  1514  		// Request already reverted
  1515  	}
  1516  }
  1517  
  1518  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1519  // failed retrieval tasks to the scheduler for reassignment.
  1520  //
  1521  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1522  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1523  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1524  	log.Debug("Reverting bytecode heal request", "peer", req.peer)
  1525  	select {
  1526  	case <-req.stale:
  1527  		log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1528  		return
  1529  	default:
  1530  	}
  1531  	close(req.stale)
  1532  
  1533  	// Remove the request from the tracked set
  1534  	s.lock.Lock()
  1535  	delete(s.bytecodeHealReqs, req.id)
  1536  	s.lock.Unlock()
  1537  
  1538  	// If there's a timeout timer still running, abort it and mark the code
  1539  	// retrievals as not-pending, ready for resheduling
  1540  	req.timeout.Stop()
  1541  	for _, hash := range req.hashes {
  1542  		req.task.codeTasks[hash] = struct{}{}
  1543  	}
  1544  }
  1545  
  1546  // processAccountResponse integrates an already validated account range response
  1547  // into the account tasks.
  1548  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1549  	// Switch the task from pending to filling
  1550  	res.task.req = nil
  1551  	res.task.res = res
  1552  
  1553  	// Ensure that the response doesn't overflow into the subsequent task
  1554  	last := res.task.Last.Big()
  1555  	for i, hash := range res.hashes {
  1556  		if hash.Big().Cmp(last) > 0 {
  1557  			// Chunk overflown, cut off excess, but also update the boundary nodes
  1558  			for j := i; j < len(res.hashes); j++ {
  1559  				if err := res.trie.Prove(res.hashes[j][:], 0, res.overflow); err != nil {
  1560  					panic(err) // Account range was already proven, what happened
  1561  				}
  1562  			}
  1563  			res.hashes = res.hashes[:i]
  1564  			res.accounts = res.accounts[:i]
  1565  			res.cont = false // Mark range completed
  1566  			break
  1567  		}
  1568  	}
  1569  	// Iterate over all the accounts and assemble which ones need further sub-
  1570  	// filling before the entire account range can be persisted.
  1571  	res.task.needCode = make([]bool, len(res.accounts))
  1572  	res.task.needState = make([]bool, len(res.accounts))
  1573  	res.task.needHeal = make([]bool, len(res.accounts))
  1574  
  1575  	res.task.codeTasks = make(map[common.Hash]struct{})
  1576  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1577  
  1578  	resumed := make(map[common.Hash]struct{})
  1579  
  1580  	res.task.pend = 0
  1581  	for i, account := range res.accounts {
  1582  		// Check if the account is a contract with an unknown code
  1583  		if !bytes.Equal(account.CodeHash, emptyCode[:]) {
  1584  			if code := rawdb.ReadCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)); code == nil {
  1585  				res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{}
  1586  				res.task.needCode[i] = true
  1587  				res.task.pend++
  1588  			}
  1589  		}
  1590  		// Check if the account is a contract with an unknown storage trie
  1591  		if account.Root != emptyRoot {
  1592  			if node, err := s.db.Get(account.Root[:]); err != nil || node == nil {
  1593  				// If there was a previous large state retrieval in progress,
  1594  				// don't restart it from scratch. This happens if a sync cycle
  1595  				// is interrupted and resumed later. However, *do* update the
  1596  				// previous root hash.
  1597  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1598  					log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root)
  1599  					for _, subtask := range subtasks {
  1600  						subtask.root = account.Root
  1601  					}
  1602  					res.task.needHeal[i] = true
  1603  					resumed[res.hashes[i]] = struct{}{}
  1604  				} else {
  1605  					res.task.stateTasks[res.hashes[i]] = account.Root
  1606  				}
  1607  				res.task.needState[i] = true
  1608  				res.task.pend++
  1609  			}
  1610  		}
  1611  	}
  1612  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1613  	// some progress if a new peer gives us less accounts than an old one, but for
  1614  	// now we have to live with that.
  1615  	for hash := range res.task.SubTasks {
  1616  		if _, ok := resumed[hash]; !ok {
  1617  			log.Debug("Aborting suspended storage retrieval", "account", hash)
  1618  			delete(res.task.SubTasks, hash)
  1619  		}
  1620  	}
  1621  	// If the account range contained no contracts, or all have been fully filled
  1622  	// beforehand, short circuit storage filling and forward to the next task
  1623  	if res.task.pend == 0 {
  1624  		s.forwardAccountTask(res.task)
  1625  		return
  1626  	}
  1627  	// Some accounts are incomplete, leave as is for the storage and contract
  1628  	// task assigners to pick up and fill.
  1629  }
  1630  
  1631  // processBytecodeResponse integrates an already validated bytecode response
  1632  // into the account tasks.
  1633  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1634  	batch := s.db.NewBatch()
  1635  
  1636  	var (
  1637  		codes uint64
  1638  		bytes common.StorageSize
  1639  	)
  1640  	for i, hash := range res.hashes {
  1641  		code := res.codes[i]
  1642  
  1643  		// If the bytecode was not delivered, reschedule it
  1644  		if code == nil {
  1645  			res.task.codeTasks[hash] = struct{}{}
  1646  			continue
  1647  		}
  1648  		// Code was delivered, mark it not needed any more
  1649  		for j, account := range res.task.res.accounts {
  1650  			if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) {
  1651  				res.task.needCode[j] = false
  1652  				res.task.pend--
  1653  			}
  1654  		}
  1655  		// Push the bytecode into a database batch
  1656  		s.bytecodeSynced++
  1657  		s.bytecodeBytes += common.StorageSize(len(code))
  1658  
  1659  		codes++
  1660  		bytes += common.StorageSize(len(code))
  1661  
  1662  		rawdb.WriteCode(batch, hash, code)
  1663  		s.bloom.Add(hash[:])
  1664  	}
  1665  	if err := batch.Write(); err != nil {
  1666  		log.Crit("Failed to persist bytecodes", "err", err)
  1667  	}
  1668  	log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1669  
  1670  	// If this delivery completed the last pending task, forward the account task
  1671  	// to the next chunk
  1672  	if res.task.pend == 0 {
  1673  		s.forwardAccountTask(res.task)
  1674  		return
  1675  	}
  1676  	// Some accounts are still incomplete, leave as is for the storage and contract
  1677  	// task assigners to pick up and fill.
  1678  }
  1679  
  1680  // processStorageResponse integrates an already validated storage response
  1681  // into the account tasks.
  1682  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1683  	// Switch the suntask from pending to idle
  1684  	if res.subTask != nil {
  1685  		res.subTask.req = nil
  1686  	}
  1687  	batch := s.db.NewBatch()
  1688  
  1689  	var (
  1690  		slots   int
  1691  		nodes   int
  1692  		skipped int
  1693  		bytes   common.StorageSize
  1694  	)
  1695  	// Iterate over all the accounts and reconstruct their storage tries from the
  1696  	// delivered slots
  1697  	for i, account := range res.accounts {
  1698  		// If the account was not delivered, reschedule it
  1699  		if i >= len(res.hashes) {
  1700  			res.mainTask.stateTasks[account] = res.roots[i]
  1701  			continue
  1702  		}
  1703  		// State was delivered, if complete mark as not needed any more, otherwise
  1704  		// mark the account as needing healing
  1705  		for j, hash := range res.mainTask.res.hashes {
  1706  			if account != hash {
  1707  				continue
  1708  			}
  1709  			acc := res.mainTask.res.accounts[j]
  1710  
  1711  			// If the packet contains multiple contract storage slots, all
  1712  			// but the last are surely complete. The last contract may be
  1713  			// chunked, so check it's continuation flag.
  1714  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1715  				res.mainTask.needState[j] = false
  1716  				res.mainTask.pend--
  1717  			}
  1718  			// If the last contract was chunked, mark it as needing healing
  1719  			// to avoid writing it out to disk prematurely.
  1720  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1721  				res.mainTask.needHeal[j] = true
  1722  			}
  1723  			// If the last contract was chunked, we need to switch to large
  1724  			// contract handling mode
  1725  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1726  				// If we haven't yet started a large-contract retrieval, create
  1727  				// the subtasks for it within the main account task
  1728  				if tasks, ok := res.mainTask.SubTasks[account]; !ok {
  1729  					var (
  1730  						next common.Hash
  1731  					)
  1732  					step := new(big.Int).Sub(
  1733  						new(big.Int).Div(
  1734  							new(big.Int).Exp(common.Big2, common.Big256, nil),
  1735  							big.NewInt(storageConcurrency),
  1736  						), common.Big1,
  1737  					)
  1738  					for k := 0; k < storageConcurrency; k++ {
  1739  						last := common.BigToHash(new(big.Int).Add(next.Big(), step))
  1740  						if k == storageConcurrency-1 {
  1741  							// Make sure we don't overflow if the step is not a proper divisor
  1742  							last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
  1743  						}
  1744  						tasks = append(tasks, &storageTask{
  1745  							Next: next,
  1746  							Last: last,
  1747  							root: acc.Root,
  1748  						})
  1749  						log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last)
  1750  						next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
  1751  					}
  1752  					res.mainTask.SubTasks[account] = tasks
  1753  
  1754  					// Since we've just created the sub-tasks, this response
  1755  					// is surely for the first one (zero origin)
  1756  					res.subTask = tasks[0]
  1757  				}
  1758  			}
  1759  			// If we're in large contract delivery mode, forward the subtask
  1760  			if res.subTask != nil {
  1761  				// Ensure the response doesn't overflow into the subsequent task
  1762  				last := res.subTask.Last.Big()
  1763  				for k, hash := range res.hashes[i] {
  1764  					if hash.Big().Cmp(last) > 0 {
  1765  						// Chunk overflown, cut off excess, but also update the boundary
  1766  						for l := k; l < len(res.hashes[i]); l++ {
  1767  							if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil {
  1768  								panic(err) // Account range was already proven, what happened
  1769  							}
  1770  						}
  1771  						res.hashes[i] = res.hashes[i][:k]
  1772  						res.slots[i] = res.slots[i][:k]
  1773  						res.cont = false // Mark range completed
  1774  						break
  1775  					}
  1776  				}
  1777  				// Forward the relevant storage chunk (even if created just now)
  1778  				if res.cont {
  1779  					res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1)))
  1780  				} else {
  1781  					res.subTask.done = true
  1782  				}
  1783  			}
  1784  		}
  1785  		// Iterate over all the reconstructed trie nodes and push them to disk
  1786  		slots += len(res.hashes[i])
  1787  
  1788  		it := res.nodes[i].NewIterator(nil, nil)
  1789  		for it.Next() {
  1790  			// Boundary nodes are not written for the last result, since they are incomplete
  1791  			if i == len(res.hashes)-1 {
  1792  				if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok {
  1793  					skipped++
  1794  					continue
  1795  				}
  1796  			}
  1797  			// Node is not a boundary, persist to disk
  1798  			batch.Put(it.Key(), it.Value())
  1799  			s.bloom.Add(it.Key())
  1800  
  1801  			bytes += common.StorageSize(common.HashLength + len(it.Value()))
  1802  			nodes++
  1803  		}
  1804  		it.Release()
  1805  	}
  1806  	if err := batch.Write(); err != nil {
  1807  		log.Crit("Failed to persist storage slots", "err", err)
  1808  	}
  1809  	s.storageSynced += uint64(slots)
  1810  	s.storageBytes += bytes
  1811  
  1812  	log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "nodes", nodes, "skipped", skipped, "bytes", bytes)
  1813  
  1814  	// If this delivery completed the last pending task, forward the account task
  1815  	// to the next chunk
  1816  	if res.mainTask.pend == 0 {
  1817  		s.forwardAccountTask(res.mainTask)
  1818  		return
  1819  	}
  1820  	// Some accounts are still incomplete, leave as is for the storage and contract
  1821  	// task assigners to pick up and fill.
  1822  }
  1823  
  1824  // processTrienodeHealResponse integrates an already validated trienode response
  1825  // into the healer tasks.
  1826  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  1827  	for i, hash := range res.hashes {
  1828  		node := res.nodes[i]
  1829  
  1830  		// If the trie node was not delivered, reschedule it
  1831  		if node == nil {
  1832  			res.task.trieTasks[hash] = res.paths[i]
  1833  			continue
  1834  		}
  1835  		// Push the trie node into the state syncer
  1836  		s.trienodeHealSynced++
  1837  		s.trienodeHealBytes += common.StorageSize(len(node))
  1838  
  1839  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  1840  		switch err {
  1841  		case nil:
  1842  		case trie.ErrAlreadyProcessed:
  1843  			s.trienodeHealDups++
  1844  		case trie.ErrNotRequested:
  1845  			s.trienodeHealNops++
  1846  		default:
  1847  			log.Error("Invalid trienode processed", "hash", hash, "err", err)
  1848  		}
  1849  	}
  1850  	batch := s.db.NewBatch()
  1851  	if err := s.healer.scheduler.Commit(batch); err != nil {
  1852  		log.Error("Failed to commit healing data", "err", err)
  1853  	}
  1854  	if err := batch.Write(); err != nil {
  1855  		log.Crit("Failed to persist healing data", "err", err)
  1856  	}
  1857  	log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  1858  }
  1859  
  1860  // processBytecodeHealResponse integrates an already validated bytecode response
  1861  // into the healer tasks.
  1862  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  1863  	for i, hash := range res.hashes {
  1864  		node := res.codes[i]
  1865  
  1866  		// If the trie node was not delivered, reschedule it
  1867  		if node == nil {
  1868  			res.task.codeTasks[hash] = struct{}{}
  1869  			continue
  1870  		}
  1871  		// Push the trie node into the state syncer
  1872  		s.bytecodeHealSynced++
  1873  		s.bytecodeHealBytes += common.StorageSize(len(node))
  1874  
  1875  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  1876  		switch err {
  1877  		case nil:
  1878  		case trie.ErrAlreadyProcessed:
  1879  			s.bytecodeHealDups++
  1880  		case trie.ErrNotRequested:
  1881  			s.bytecodeHealNops++
  1882  		default:
  1883  			log.Error("Invalid bytecode processed", "hash", hash, "err", err)
  1884  		}
  1885  	}
  1886  	batch := s.db.NewBatch()
  1887  	if err := s.healer.scheduler.Commit(batch); err != nil {
  1888  		log.Error("Failed to commit healing data", "err", err)
  1889  	}
  1890  	if err := batch.Write(); err != nil {
  1891  		log.Crit("Failed to persist healing data", "err", err)
  1892  	}
  1893  	log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  1894  }
  1895  
  1896  // forwardAccountTask takes a filled account task and persists anything available
  1897  // into the database, after which it forwards the next account marker so that the
  1898  // task's next chunk may be filled.
  1899  func (s *Syncer) forwardAccountTask(task *accountTask) {
  1900  	// Remove any pending delivery
  1901  	res := task.res
  1902  	if res == nil {
  1903  		return // nothing to forward
  1904  	}
  1905  	task.res = nil
  1906  
  1907  	// Iterate over all the accounts and gather all the incomplete trie nodes. A
  1908  	// node is incomplete if we haven't yet filled it (sync was interrupted), or
  1909  	// if we filled it in multiple chunks (storage trie), in which case the few
  1910  	// nodes on the chunk boundaries are missing.
  1911  	incompletes := light.NewNodeSet()
  1912  	for i := range res.accounts {
  1913  		// If the filling was interrupted, mark everything after as incomplete
  1914  		if task.needCode[i] || task.needState[i] {
  1915  			for j := i; j < len(res.accounts); j++ {
  1916  				if err := res.trie.Prove(res.hashes[j][:], 0, incompletes); err != nil {
  1917  					panic(err) // Account range was already proven, what happened
  1918  				}
  1919  			}
  1920  			break
  1921  		}
  1922  		// Filling not interrupted until this point, mark incomplete if needs healing
  1923  		if task.needHeal[i] {
  1924  			if err := res.trie.Prove(res.hashes[i][:], 0, incompletes); err != nil {
  1925  				panic(err) // Account range was already proven, what happened
  1926  			}
  1927  		}
  1928  	}
  1929  	// Persist every finalized trie node that's not on the boundary
  1930  	batch := s.db.NewBatch()
  1931  
  1932  	var (
  1933  		nodes   int
  1934  		skipped int
  1935  		bytes   common.StorageSize
  1936  	)
  1937  	it := res.nodes.NewIterator(nil, nil)
  1938  	for it.Next() {
  1939  		// Boundary nodes are not written, since they are incomplete
  1940  		if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok {
  1941  			skipped++
  1942  			continue
  1943  		}
  1944  		// Overflow nodes are not written, since they mess with another task
  1945  		if _, err := res.overflow.Get(it.Key()); err == nil {
  1946  			skipped++
  1947  			continue
  1948  		}
  1949  		// Accounts with split storage requests are incomplete
  1950  		if _, err := incompletes.Get(it.Key()); err == nil {
  1951  			skipped++
  1952  			continue
  1953  		}
  1954  		// Node is neither a boundary, not an incomplete account, persist to disk
  1955  		batch.Put(it.Key(), it.Value())
  1956  		s.bloom.Add(it.Key())
  1957  
  1958  		bytes += common.StorageSize(common.HashLength + len(it.Value()))
  1959  		nodes++
  1960  	}
  1961  	it.Release()
  1962  
  1963  	if err := batch.Write(); err != nil {
  1964  		log.Crit("Failed to persist accounts", "err", err)
  1965  	}
  1966  	s.accountBytes += bytes
  1967  	s.accountSynced += uint64(len(res.accounts))
  1968  
  1969  	log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "nodes", nodes, "skipped", skipped, "bytes", bytes)
  1970  
  1971  	// Task filling persisted, push it the chunk marker forward to the first
  1972  	// account still missing data.
  1973  	for i, hash := range res.hashes {
  1974  		if task.needCode[i] || task.needState[i] {
  1975  			return
  1976  		}
  1977  		task.Next = common.BigToHash(new(big.Int).Add(hash.Big(), big.NewInt(1)))
  1978  	}
  1979  	// All accounts marked as complete, track if the entire task is done
  1980  	task.done = !res.cont
  1981  }
  1982  
  1983  // OnAccounts is a callback method to invoke when a range of accounts are
  1984  // received from a remote peer.
  1985  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  1986  	size := common.StorageSize(len(hashes) * common.HashLength)
  1987  	for _, account := range accounts {
  1988  		size += common.StorageSize(len(account))
  1989  	}
  1990  	for _, node := range proof {
  1991  		size += common.StorageSize(len(node))
  1992  	}
  1993  	logger := peer.Log().New("reqid", id)
  1994  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  1995  
  1996  	// Whether or not the response is valid, we can mark the peer as idle and
  1997  	// notify the scheduler to assign a new task. If the response is invalid,
  1998  	// we'll drop the peer in a bit.
  1999  	s.lock.Lock()
  2000  	if _, ok := s.peers[peer.ID()]; ok {
  2001  		s.accountIdlers[peer.ID()] = struct{}{}
  2002  	}
  2003  	select {
  2004  	case s.update <- struct{}{}:
  2005  	default:
  2006  	}
  2007  	// Ensure the response is for a valid request
  2008  	req, ok := s.accountReqs[id]
  2009  	if !ok {
  2010  		// Request stale, perhaps the peer timed out but came through in the end
  2011  		logger.Warn("Unexpected account range packet")
  2012  		s.lock.Unlock()
  2013  		return nil
  2014  	}
  2015  	delete(s.accountReqs, id)
  2016  
  2017  	// Clean up the request timeout timer, we'll see how to proceed further based
  2018  	// on the actual delivered content
  2019  	if !req.timeout.Stop() {
  2020  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2021  		s.lock.Unlock()
  2022  		return nil
  2023  	}
  2024  
  2025  	// Response is valid, but check if peer is signalling that it does not have
  2026  	// the requested data. For account range queries that means the state being
  2027  	// retrieved was either already pruned remotely, or the peer is not yet
  2028  	// synced to our head.
  2029  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2030  		logger.Debug("Peer rejected account range request", "root", s.root)
  2031  		s.statelessPeers[peer.ID()] = struct{}{}
  2032  		s.lock.Unlock()
  2033  
  2034  		// Signal this request as failed, and ready for rescheduling
  2035  		s.scheduleRevertAccountRequest(req)
  2036  		return nil
  2037  	}
  2038  	root := s.root
  2039  	s.lock.Unlock()
  2040  
  2041  	// Reconstruct a partial trie from the response and verify it
  2042  	keys := make([][]byte, len(hashes))
  2043  	for i, key := range hashes {
  2044  		keys[i] = common.CopyBytes(key[:])
  2045  	}
  2046  	nodes := make(light.NodeList, len(proof))
  2047  	for i, node := range proof {
  2048  		nodes[i] = node
  2049  	}
  2050  	proofdb := nodes.NodeSet()
  2051  
  2052  	var end []byte
  2053  	if len(keys) > 0 {
  2054  		end = keys[len(keys)-1]
  2055  	}
  2056  	db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2057  	if err != nil {
  2058  		logger.Warn("Account range failed proof", "err", err)
  2059  		// Signal this request as failed, and ready for rescheduling
  2060  		s.scheduleRevertAccountRequest(req)
  2061  		return err
  2062  	}
  2063  	// Partial trie reconstructed, send it to the scheduler for storage filling
  2064  	bounds := make(map[common.Hash]struct{})
  2065  
  2066  	it := notary.Accessed().NewIterator(nil, nil)
  2067  	for it.Next() {
  2068  		bounds[common.BytesToHash(it.Key())] = struct{}{}
  2069  	}
  2070  	it.Release()
  2071  
  2072  	accs := make([]*state.Account, len(accounts))
  2073  	for i, account := range accounts {
  2074  		acc := new(state.Account)
  2075  		if err := rlp.DecodeBytes(account, acc); err != nil {
  2076  			panic(err) // We created these blobs, we must be able to decode them
  2077  		}
  2078  		accs[i] = acc
  2079  	}
  2080  	response := &accountResponse{
  2081  		task:     req.task,
  2082  		hashes:   hashes,
  2083  		accounts: accs,
  2084  		nodes:    db,
  2085  		trie:     tr,
  2086  		bounds:   bounds,
  2087  		overflow: light.NewNodeSet(),
  2088  		cont:     cont,
  2089  	}
  2090  	select {
  2091  	case s.accountResps <- response:
  2092  	case <-req.cancel:
  2093  	case <-req.stale:
  2094  	}
  2095  	return nil
  2096  }
  2097  
  2098  // OnByteCodes is a callback method to invoke when a batch of contract
  2099  // bytes codes are received from a remote peer.
  2100  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2101  	s.lock.RLock()
  2102  	syncing := !s.snapped
  2103  	s.lock.RUnlock()
  2104  
  2105  	if syncing {
  2106  		return s.onByteCodes(peer, id, bytecodes)
  2107  	}
  2108  	return s.onHealByteCodes(peer, id, bytecodes)
  2109  }
  2110  
  2111  // onByteCodes is a callback method to invoke when a batch of contract
  2112  // bytes codes are received from a remote peer in the syncing phase.
  2113  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2114  	var size common.StorageSize
  2115  	for _, code := range bytecodes {
  2116  		size += common.StorageSize(len(code))
  2117  	}
  2118  	logger := peer.Log().New("reqid", id)
  2119  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2120  
  2121  	// Whether or not the response is valid, we can mark the peer as idle and
  2122  	// notify the scheduler to assign a new task. If the response is invalid,
  2123  	// we'll drop the peer in a bit.
  2124  	s.lock.Lock()
  2125  	if _, ok := s.peers[peer.ID()]; ok {
  2126  		s.bytecodeIdlers[peer.ID()] = struct{}{}
  2127  	}
  2128  	select {
  2129  	case s.update <- struct{}{}:
  2130  	default:
  2131  	}
  2132  	// Ensure the response is for a valid request
  2133  	req, ok := s.bytecodeReqs[id]
  2134  	if !ok {
  2135  		// Request stale, perhaps the peer timed out but came through in the end
  2136  		logger.Warn("Unexpected bytecode packet")
  2137  		s.lock.Unlock()
  2138  		return nil
  2139  	}
  2140  	delete(s.bytecodeReqs, id)
  2141  
  2142  	// Clean up the request timeout timer, we'll see how to proceed further based
  2143  	// on the actual delivered content
  2144  	if !req.timeout.Stop() {
  2145  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2146  		s.lock.Unlock()
  2147  		return nil
  2148  	}
  2149  
  2150  	// Response is valid, but check if peer is signalling that it does not have
  2151  	// the requested data. For bytecode range queries that means the peer is not
  2152  	// yet synced.
  2153  	if len(bytecodes) == 0 {
  2154  		logger.Debug("Peer rejected bytecode request")
  2155  		s.statelessPeers[peer.ID()] = struct{}{}
  2156  		s.lock.Unlock()
  2157  
  2158  		// Signal this request as failed, and ready for rescheduling
  2159  		s.scheduleRevertBytecodeRequest(req)
  2160  		return nil
  2161  	}
  2162  	s.lock.Unlock()
  2163  
  2164  	// Cross reference the requested bytecodes with the response to find gaps
  2165  	// that the serving node is missing
  2166  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2167  	hash := make([]byte, 32)
  2168  
  2169  	codes := make([][]byte, len(req.hashes))
  2170  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2171  		// Find the next hash that we've been served, leaving misses with nils
  2172  		hasher.Reset()
  2173  		hasher.Write(bytecodes[i])
  2174  		hasher.Read(hash)
  2175  
  2176  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2177  			j++
  2178  		}
  2179  		if j < len(req.hashes) {
  2180  			codes[j] = bytecodes[i]
  2181  			j++
  2182  			continue
  2183  		}
  2184  		// We've either ran out of hashes, or got unrequested data
  2185  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2186  		// Signal this request as failed, and ready for rescheduling
  2187  		s.scheduleRevertBytecodeRequest(req)
  2188  		return errors.New("unexpected bytecode")
  2189  	}
  2190  	// Response validated, send it to the scheduler for filling
  2191  	response := &bytecodeResponse{
  2192  		task:   req.task,
  2193  		hashes: req.hashes,
  2194  		codes:  codes,
  2195  	}
  2196  	select {
  2197  	case s.bytecodeResps <- response:
  2198  	case <-req.cancel:
  2199  	case <-req.stale:
  2200  	}
  2201  	return nil
  2202  }
  2203  
  2204  // OnStorage is a callback method to invoke when ranges of storage slots
  2205  // are received from a remote peer.
  2206  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2207  	// Gather some trace stats to aid in debugging issues
  2208  	var (
  2209  		hashCount int
  2210  		slotCount int
  2211  		size      common.StorageSize
  2212  	)
  2213  	for _, hashset := range hashes {
  2214  		size += common.StorageSize(common.HashLength * len(hashset))
  2215  		hashCount += len(hashset)
  2216  	}
  2217  	for _, slotset := range slots {
  2218  		for _, slot := range slotset {
  2219  			size += common.StorageSize(len(slot))
  2220  		}
  2221  		slotCount += len(slotset)
  2222  	}
  2223  	for _, node := range proof {
  2224  		size += common.StorageSize(len(node))
  2225  	}
  2226  	logger := peer.Log().New("reqid", id)
  2227  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2228  
  2229  	// Whether or not the response is valid, we can mark the peer as idle and
  2230  	// notify the scheduler to assign a new task. If the response is invalid,
  2231  	// we'll drop the peer in a bit.
  2232  	s.lock.Lock()
  2233  	if _, ok := s.peers[peer.ID()]; ok {
  2234  		s.storageIdlers[peer.ID()] = struct{}{}
  2235  	}
  2236  	select {
  2237  	case s.update <- struct{}{}:
  2238  	default:
  2239  	}
  2240  	// Ensure the response is for a valid request
  2241  	req, ok := s.storageReqs[id]
  2242  	if !ok {
  2243  		// Request stale, perhaps the peer timed out but came through in the end
  2244  		logger.Warn("Unexpected storage ranges packet")
  2245  		s.lock.Unlock()
  2246  		return nil
  2247  	}
  2248  	delete(s.storageReqs, id)
  2249  
  2250  	// Clean up the request timeout timer, we'll see how to proceed further based
  2251  	// on the actual delivered content
  2252  	if !req.timeout.Stop() {
  2253  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2254  		s.lock.Unlock()
  2255  		return nil
  2256  	}
  2257  
  2258  	// Reject the response if the hash sets and slot sets don't match, or if the
  2259  	// peer sent more data than requested.
  2260  	if len(hashes) != len(slots) {
  2261  		s.lock.Unlock()
  2262  		s.scheduleRevertStorageRequest(req) // reschedule request
  2263  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2264  		return errors.New("hash and slot set size mismatch")
  2265  	}
  2266  	if len(hashes) > len(req.accounts) {
  2267  		s.lock.Unlock()
  2268  		s.scheduleRevertStorageRequest(req) // reschedule request
  2269  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2270  		return errors.New("hash set larger than requested")
  2271  	}
  2272  	// Response is valid, but check if peer is signalling that it does not have
  2273  	// the requested data. For storage range queries that means the state being
  2274  	// retrieved was either already pruned remotely, or the peer is not yet
  2275  	// synced to our head.
  2276  	if len(hashes) == 0 {
  2277  		logger.Debug("Peer rejected storage request")
  2278  		s.statelessPeers[peer.ID()] = struct{}{}
  2279  		s.lock.Unlock()
  2280  		s.scheduleRevertStorageRequest(req) // reschedule request
  2281  		return nil
  2282  	}
  2283  	s.lock.Unlock()
  2284  
  2285  	// Reconstruct the partial tries from the response and verify them
  2286  	var (
  2287  		dbs    = make([]ethdb.KeyValueStore, len(hashes))
  2288  		tries  = make([]*trie.Trie, len(hashes))
  2289  		notary *trie.KeyValueNotary
  2290  		cont   bool
  2291  	)
  2292  	for i := 0; i < len(hashes); i++ {
  2293  		// Convert the keys and proofs into an internal format
  2294  		keys := make([][]byte, len(hashes[i]))
  2295  		for j, key := range hashes[i] {
  2296  			keys[j] = common.CopyBytes(key[:])
  2297  		}
  2298  		nodes := make(light.NodeList, 0, len(proof))
  2299  		if i == len(hashes)-1 {
  2300  			for _, node := range proof {
  2301  				nodes = append(nodes, node)
  2302  			}
  2303  		}
  2304  		var err error
  2305  		if len(nodes) == 0 {
  2306  			// No proof has been attached, the response must cover the entire key
  2307  			// space and hash to the origin root.
  2308  			dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2309  			if err != nil {
  2310  				s.scheduleRevertStorageRequest(req) // reschedule request
  2311  				logger.Warn("Storage slots failed proof", "err", err)
  2312  				return err
  2313  			}
  2314  		} else {
  2315  			// A proof was attached, the response is only partial, check that the
  2316  			// returned data is indeed part of the storage trie
  2317  			proofdb := nodes.NodeSet()
  2318  
  2319  			var end []byte
  2320  			if len(keys) > 0 {
  2321  				end = keys[len(keys)-1]
  2322  			}
  2323  			dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2324  			if err != nil {
  2325  				s.scheduleRevertStorageRequest(req) // reschedule request
  2326  				logger.Warn("Storage range failed proof", "err", err)
  2327  				return err
  2328  			}
  2329  		}
  2330  	}
  2331  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2332  	bounds := make(map[common.Hash]struct{})
  2333  
  2334  	if notary != nil { // if all contract storages are delivered in full, no notary will be created
  2335  		it := notary.Accessed().NewIterator(nil, nil)
  2336  		for it.Next() {
  2337  			bounds[common.BytesToHash(it.Key())] = struct{}{}
  2338  		}
  2339  		it.Release()
  2340  	}
  2341  	response := &storageResponse{
  2342  		mainTask: req.mainTask,
  2343  		subTask:  req.subTask,
  2344  		accounts: req.accounts,
  2345  		roots:    req.roots,
  2346  		hashes:   hashes,
  2347  		slots:    slots,
  2348  		nodes:    dbs,
  2349  		tries:    tries,
  2350  		bounds:   bounds,
  2351  		overflow: light.NewNodeSet(),
  2352  		cont:     cont,
  2353  	}
  2354  	select {
  2355  	case s.storageResps <- response:
  2356  	case <-req.cancel:
  2357  	case <-req.stale:
  2358  	}
  2359  	return nil
  2360  }
  2361  
  2362  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2363  // are received from a remote peer.
  2364  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2365  	var size common.StorageSize
  2366  	for _, node := range trienodes {
  2367  		size += common.StorageSize(len(node))
  2368  	}
  2369  	logger := peer.Log().New("reqid", id)
  2370  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2371  
  2372  	// Whether or not the response is valid, we can mark the peer as idle and
  2373  	// notify the scheduler to assign a new task. If the response is invalid,
  2374  	// we'll drop the peer in a bit.
  2375  	s.lock.Lock()
  2376  	if _, ok := s.peers[peer.ID()]; ok {
  2377  		s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2378  	}
  2379  	select {
  2380  	case s.update <- struct{}{}:
  2381  	default:
  2382  	}
  2383  	// Ensure the response is for a valid request
  2384  	req, ok := s.trienodeHealReqs[id]
  2385  	if !ok {
  2386  		// Request stale, perhaps the peer timed out but came through in the end
  2387  		logger.Warn("Unexpected trienode heal packet")
  2388  		s.lock.Unlock()
  2389  		return nil
  2390  	}
  2391  	delete(s.trienodeHealReqs, id)
  2392  
  2393  	// Clean up the request timeout timer, we'll see how to proceed further based
  2394  	// on the actual delivered content
  2395  	if !req.timeout.Stop() {
  2396  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2397  		s.lock.Unlock()
  2398  		return nil
  2399  	}
  2400  
  2401  	// Response is valid, but check if peer is signalling that it does not have
  2402  	// the requested data. For bytecode range queries that means the peer is not
  2403  	// yet synced.
  2404  	if len(trienodes) == 0 {
  2405  		logger.Debug("Peer rejected trienode heal request")
  2406  		s.statelessPeers[peer.ID()] = struct{}{}
  2407  		s.lock.Unlock()
  2408  
  2409  		// Signal this request as failed, and ready for rescheduling
  2410  		s.scheduleRevertTrienodeHealRequest(req)
  2411  		return nil
  2412  	}
  2413  	s.lock.Unlock()
  2414  
  2415  	// Cross reference the requested trienodes with the response to find gaps
  2416  	// that the serving node is missing
  2417  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2418  	hash := make([]byte, 32)
  2419  
  2420  	nodes := make([][]byte, len(req.hashes))
  2421  	for i, j := 0, 0; i < len(trienodes); i++ {
  2422  		// Find the next hash that we've been served, leaving misses with nils
  2423  		hasher.Reset()
  2424  		hasher.Write(trienodes[i])
  2425  		hasher.Read(hash)
  2426  
  2427  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2428  			j++
  2429  		}
  2430  		if j < len(req.hashes) {
  2431  			nodes[j] = trienodes[i]
  2432  			j++
  2433  			continue
  2434  		}
  2435  		// We've either ran out of hashes, or got unrequested data
  2436  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2437  		// Signal this request as failed, and ready for rescheduling
  2438  		s.scheduleRevertTrienodeHealRequest(req)
  2439  		return errors.New("unexpected healing trienode")
  2440  	}
  2441  	// Response validated, send it to the scheduler for filling
  2442  	response := &trienodeHealResponse{
  2443  		task:   req.task,
  2444  		hashes: req.hashes,
  2445  		paths:  req.paths,
  2446  		nodes:  nodes,
  2447  	}
  2448  	select {
  2449  	case s.trienodeHealResps <- response:
  2450  	case <-req.cancel:
  2451  	case <-req.stale:
  2452  	}
  2453  	return nil
  2454  }
  2455  
  2456  // onHealByteCodes is a callback method to invoke when a batch of contract
  2457  // bytes codes are received from a remote peer in the healing phase.
  2458  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2459  	var size common.StorageSize
  2460  	for _, code := range bytecodes {
  2461  		size += common.StorageSize(len(code))
  2462  	}
  2463  	logger := peer.Log().New("reqid", id)
  2464  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2465  
  2466  	// Whether or not the response is valid, we can mark the peer as idle and
  2467  	// notify the scheduler to assign a new task. If the response is invalid,
  2468  	// we'll drop the peer in a bit.
  2469  	s.lock.Lock()
  2470  	if _, ok := s.peers[peer.ID()]; ok {
  2471  		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2472  	}
  2473  	select {
  2474  	case s.update <- struct{}{}:
  2475  	default:
  2476  	}
  2477  	// Ensure the response is for a valid request
  2478  	req, ok := s.bytecodeHealReqs[id]
  2479  	if !ok {
  2480  		// Request stale, perhaps the peer timed out but came through in the end
  2481  		logger.Warn("Unexpected bytecode heal packet")
  2482  		s.lock.Unlock()
  2483  		return nil
  2484  	}
  2485  	delete(s.bytecodeHealReqs, id)
  2486  
  2487  	// Clean up the request timeout timer, we'll see how to proceed further based
  2488  	// on the actual delivered content
  2489  	if !req.timeout.Stop() {
  2490  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2491  		s.lock.Unlock()
  2492  		return nil
  2493  	}
  2494  
  2495  	// Response is valid, but check if peer is signalling that it does not have
  2496  	// the requested data. For bytecode range queries that means the peer is not
  2497  	// yet synced.
  2498  	if len(bytecodes) == 0 {
  2499  		logger.Debug("Peer rejected bytecode heal request")
  2500  		s.statelessPeers[peer.ID()] = struct{}{}
  2501  		s.lock.Unlock()
  2502  
  2503  		// Signal this request as failed, and ready for rescheduling
  2504  		s.scheduleRevertBytecodeHealRequest(req)
  2505  		return nil
  2506  	}
  2507  	s.lock.Unlock()
  2508  
  2509  	// Cross reference the requested bytecodes with the response to find gaps
  2510  	// that the serving node is missing
  2511  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2512  	hash := make([]byte, 32)
  2513  
  2514  	codes := make([][]byte, len(req.hashes))
  2515  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2516  		// Find the next hash that we've been served, leaving misses with nils
  2517  		hasher.Reset()
  2518  		hasher.Write(bytecodes[i])
  2519  		hasher.Read(hash)
  2520  
  2521  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2522  			j++
  2523  		}
  2524  		if j < len(req.hashes) {
  2525  			codes[j] = bytecodes[i]
  2526  			j++
  2527  			continue
  2528  		}
  2529  		// We've either ran out of hashes, or got unrequested data
  2530  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2531  		// Signal this request as failed, and ready for rescheduling
  2532  		s.scheduleRevertBytecodeHealRequest(req)
  2533  		return errors.New("unexpected healing bytecode")
  2534  	}
  2535  	// Response validated, send it to the scheduler for filling
  2536  	response := &bytecodeHealResponse{
  2537  		task:   req.task,
  2538  		hashes: req.hashes,
  2539  		codes:  codes,
  2540  	}
  2541  	select {
  2542  	case s.bytecodeHealResps <- response:
  2543  	case <-req.cancel:
  2544  	case <-req.stale:
  2545  	}
  2546  	return nil
  2547  }
  2548  
  2549  // hashSpace is the total size of the 256 bit hash space for accounts.
  2550  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2551  
  2552  // report calculates various status reports and provides it to the user.
  2553  func (s *Syncer) report(force bool) {
  2554  	if len(s.tasks) > 0 {
  2555  		s.reportSyncProgress(force)
  2556  		return
  2557  	}
  2558  	s.reportHealProgress(force)
  2559  }
  2560  
  2561  // reportSyncProgress calculates various status reports and provides it to the user.
  2562  func (s *Syncer) reportSyncProgress(force bool) {
  2563  	// Don't report all the events, just occasionally
  2564  	if !force && time.Since(s.logTime) < 3*time.Second {
  2565  		return
  2566  	}
  2567  	// Don't report anything until we have a meaningful progress
  2568  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2569  	if synced == 0 {
  2570  		return
  2571  	}
  2572  	accountGaps := new(big.Int)
  2573  	for _, task := range s.tasks {
  2574  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2575  	}
  2576  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2577  	if accountFills.BitLen() == 0 {
  2578  		return
  2579  	}
  2580  	s.logTime = time.Now()
  2581  	estBytes := float64(new(big.Int).Div(
  2582  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2583  		accountFills,
  2584  	).Uint64())
  2585  
  2586  	elapsed := time.Since(s.startTime)
  2587  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2588  
  2589  	// Create a mega progress report
  2590  	var (
  2591  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2592  		accounts = fmt.Sprintf("%d@%v", s.accountSynced, s.accountBytes.TerminalString())
  2593  		storage  = fmt.Sprintf("%d@%v", s.storageSynced, s.storageBytes.TerminalString())
  2594  		bytecode = fmt.Sprintf("%d@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString())
  2595  	)
  2596  	log.Info("State sync in progress", "synced", progress, "state", synced,
  2597  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2598  }
  2599  
  2600  // reportHealProgress calculates various status reports and provides it to the user.
  2601  func (s *Syncer) reportHealProgress(force bool) {
  2602  	// Don't report all the events, just occasionally
  2603  	if !force && time.Since(s.logTime) < 3*time.Second {
  2604  		return
  2605  	}
  2606  	s.logTime = time.Now()
  2607  
  2608  	// Create a mega progress report
  2609  	var (
  2610  		trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString())
  2611  		bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString())
  2612  	)
  2613  	log.Info("State heal in progress", "nodes", trienode, "codes", bytecode,
  2614  		"pending", s.healer.scheduler.Pending())
  2615  }