github.com/cryptogateway/go-paymex@v0.0.0-20210204174735-96277fb1e602/eth/protocols/snap/sync.go

github.com/cryptogateway/go-paymex@v0.0.0-20210204174735-96277fb1e602/eth/protocols/snap/sync.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snap
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"math/big"
    25  	"math/rand"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/cryptogateway/go-paymex/common"
    30  	"github.com/cryptogateway/go-paymex/core/rawdb"
    31  	"github.com/cryptogateway/go-paymex/core/state"
    32  	"github.com/cryptogateway/go-paymex/crypto"
    33  	"github.com/cryptogateway/go-paymex/ethdb"
    34  	"github.com/cryptogateway/go-paymex/event"
    35  	"github.com/cryptogateway/go-paymex/light"
    36  	"github.com/cryptogateway/go-paymex/log"
    37  	"github.com/cryptogateway/go-paymex/rlp"
    38  	"github.com/cryptogateway/go-paymex/trie"
    39  	"golang.org/x/crypto/sha3"
    40  )
    41  
    42  var (
    43  	// emptyRoot is the known root hash of an empty trie.
    44  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    45  
    46  	// emptyCode is the known hash of the empty EVM bytecode.
    47  	emptyCode = crypto.Keccak256Hash(nil)
    48  )
    49  
    50  const (
    51  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    52  	maxRequestSize = 512 * 1024
    53  
    54  	// maxStorageSetRequestCountis th maximum number of contracts to request the
    55  	// storage of in a single query. If this number is too low, we're not filling
    56  	// responses fully and waste round trip times. If it's too high, we're capping
    57  	// responses and waste bandwidth.
    58  	maxStorageSetRequestCount = maxRequestSize / 1024
    59  
    60  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    61  	// single query. If this number is too low, we're not filling responses fully
    62  	// and waste round trip times. If it's too high, we're capping responses and
    63  	// waste bandwidth.
    64  	//
    65  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    66  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    67  	// come close to that, requesting 4x should be a good approximation.
    68  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    69  
    70  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    71  	// a single query. If this number is too low, we're not filling responses fully
    72  	// and waste round trip times. If it's too high, we're capping responses and
    73  	// waste bandwidth.
    74  	maxTrieRequestCount = 512
    75  
    76  	// accountConcurrency is the number of chunks to split the account trie into
    77  	// to allow concurrent retrievals.
    78  	accountConcurrency = 16
    79  
    80  	// storageConcurrency is the number of chunks to split the a large contract
    81  	// storage trie into to allow concurrent retrievals.
    82  	storageConcurrency = 16
    83  )
    84  
    85  var (
    86  	// requestTimeout is the maximum time a peer is allowed to spend on serving
    87  	// a single network request.
    88  	requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync?
    89  )
    90  
    91  // accountRequest tracks a pending account range request to ensure responses are
    92  // to actual requests and to validate any security constraints.
    93  //
    94  // Concurrency note: account requests and responses are handled concurrently from
    95  // the main runloop to allow Merkle proof verifications on the peer's thread and
    96  // to drop on invalid response. The request struct must contain all the data to
    97  // construct the response without accessing runloop internals (i.e. task). That
    98  // is only included to allow the runloop to match a response to the task being
    99  // synced without having yet another set of maps.
   100  type accountRequest struct {
   101  	peer string // Peer to which this request is assigned
   102  	id   uint64 // Request ID of this request
   103  
   104  	cancel  chan struct{} // Channel to track sync cancellation
   105  	timeout *time.Timer   // Timer to track delivery timeout
   106  	stale   chan struct{} // Channel to signal the request was dropped
   107  
   108  	origin common.Hash // First account requested to allow continuation checks
   109  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   110  
   111  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   112  }
   113  
   114  // accountResponse is an already Merkle-verified remote response to an account
   115  // range request. It contains the subtrie for the requested account range and
   116  // the database that's going to be filled with the internal nodes on commit.
   117  type accountResponse struct {
   118  	task *accountTask // Task which this request is filling
   119  
   120  	hashes   []common.Hash    // Account hashes in the returned range
   121  	accounts []*state.Account // Expanded accounts in the returned range
   122  
   123  	nodes ethdb.KeyValueStore // Database containing the reconstructed trie nodes
   124  	trie  *trie.Trie          // Reconstructed trie to reject incomplete account paths
   125  
   126  	bounds   map[common.Hash]struct{} // Boundary nodes to avoid persisting incomplete accounts
   127  	overflow *light.NodeSet           // Overflow nodes to avoid persisting across chunk boundaries
   128  
   129  	cont bool // Whether the account range has a continuation
   130  }
   131  
   132  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   133  // actual requests and to validate any security constraints.
   134  //
   135  // Concurrency note: bytecode requests and responses are handled concurrently from
   136  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   137  // to drop on invalid response. The request struct must contain all the data to
   138  // construct the response without accessing runloop internals (i.e. task). That
   139  // is only included to allow the runloop to match a response to the task being
   140  // synced without having yet another set of maps.
   141  type bytecodeRequest struct {
   142  	peer string // Peer to which this request is assigned
   143  	id   uint64 // Request ID of this request
   144  
   145  	cancel  chan struct{} // Channel to track sync cancellation
   146  	timeout *time.Timer   // Timer to track delivery timeout
   147  	stale   chan struct{} // Channel to signal the request was dropped
   148  
   149  	hashes []common.Hash // Bytecode hashes to validate responses
   150  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   151  }
   152  
   153  // bytecodeResponse is an already verified remote response to a bytecode request.
   154  type bytecodeResponse struct {
   155  	task *accountTask // Task which this request is filling
   156  
   157  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   158  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   159  }
   160  
   161  // storageRequest tracks a pending storage ranges request to ensure responses are
   162  // to actual requests and to validate any security constraints.
   163  //
   164  // Concurrency note: storage requests and responses are handled concurrently from
   165  // the main runloop to allow Merkel proof verifications on the peer's thread and
   166  // to drop on invalid response. The request struct must contain all the data to
   167  // construct the response without accessing runloop internals (i.e. tasks). That
   168  // is only included to allow the runloop to match a response to the task being
   169  // synced without having yet another set of maps.
   170  type storageRequest struct {
   171  	peer string // Peer to which this request is assigned
   172  	id   uint64 // Request ID of this request
   173  
   174  	cancel  chan struct{} // Channel to track sync cancellation
   175  	timeout *time.Timer   // Timer to track delivery timeout
   176  	stale   chan struct{} // Channel to signal the request was dropped
   177  
   178  	accounts []common.Hash // Account hashes to validate responses
   179  	roots    []common.Hash // Storage roots to validate responses
   180  
   181  	origin common.Hash // First storage slot requested to allow continuation checks
   182  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   183  
   184  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   185  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   186  }
   187  
   188  // storageResponse is an already Merkle-verified remote response to a storage
   189  // range request. It contains the subtries for the requested storage ranges and
   190  // the databases that's going to be filled with the internal nodes on commit.
   191  type storageResponse struct {
   192  	mainTask *accountTask // Task which this response belongs to
   193  	subTask  *storageTask // Task which this response is filling
   194  
   195  	accounts []common.Hash // Account hashes requested, may be only partially filled
   196  	roots    []common.Hash // Storage roots requested, may be only partially filled
   197  
   198  	hashes [][]common.Hash       // Storage slot hashes in the returned range
   199  	slots  [][][]byte            // Storage slot values in the returned range
   200  	nodes  []ethdb.KeyValueStore // Database containing the reconstructed trie nodes
   201  	tries  []*trie.Trie          // Reconstructed tries to reject overflown slots
   202  
   203  	// Fields relevant for the last account only
   204  	bounds   map[common.Hash]struct{} // Boundary nodes to avoid persisting (incomplete)
   205  	overflow *light.NodeSet           // Overflow nodes to avoid persisting across chunk boundaries
   206  	cont     bool                     // Whether the last storage range has a continuation
   207  }
   208  
   209  // trienodeHealRequest tracks a pending state trie request to ensure responses
   210  // are to actual requests and to validate any security constraints.
   211  //
   212  // Concurrency note: trie node requests and responses are handled concurrently from
   213  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   214  // to drop on invalid response. The request struct must contain all the data to
   215  // construct the response without accessing runloop internals (i.e. task). That
   216  // is only included to allow the runloop to match a response to the task being
   217  // synced without having yet another set of maps.
   218  type trienodeHealRequest struct {
   219  	peer string // Peer to which this request is assigned
   220  	id   uint64 // Request ID of this request
   221  
   222  	cancel  chan struct{} // Channel to track sync cancellation
   223  	timeout *time.Timer   // Timer to track delivery timeout
   224  	stale   chan struct{} // Channel to signal the request was dropped
   225  
   226  	hashes []common.Hash   // Trie node hashes to validate responses
   227  	paths  []trie.SyncPath // Trie node paths requested for rescheduling
   228  
   229  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   230  }
   231  
   232  // trienodeHealResponse is an already verified remote response to a trie node request.
   233  type trienodeHealResponse struct {
   234  	task *healTask // Task which this request is filling
   235  
   236  	hashes []common.Hash   // Hashes of the trie nodes to avoid double hashing
   237  	paths  []trie.SyncPath // Trie node paths requested for rescheduling missing ones
   238  	nodes  [][]byte        // Actual trie nodes to store into the database (nil = missing)
   239  }
   240  
   241  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   242  // actual requests and to validate any security constraints.
   243  //
   244  // Concurrency note: bytecode requests and responses are handled concurrently from
   245  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   246  // to drop on invalid response. The request struct must contain all the data to
   247  // construct the response without accessing runloop internals (i.e. task). That
   248  // is only included to allow the runloop to match a response to the task being
   249  // synced without having yet another set of maps.
   250  type bytecodeHealRequest struct {
   251  	peer string // Peer to which this request is assigned
   252  	id   uint64 // Request ID of this request
   253  
   254  	cancel  chan struct{} // Channel to track sync cancellation
   255  	timeout *time.Timer   // Timer to track delivery timeout
   256  	stale   chan struct{} // Channel to signal the request was dropped
   257  
   258  	hashes []common.Hash // Bytecode hashes to validate responses
   259  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   260  }
   261  
   262  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   263  type bytecodeHealResponse struct {
   264  	task *healTask // Task which this request is filling
   265  
   266  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   267  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   268  }
   269  
   270  // accountTask represents the sync task for a chunk of the account snapshot.
   271  type accountTask struct {
   272  	// These fields get serialized to leveldb on shutdown
   273  	Next     common.Hash                    // Next account to sync in this interval
   274  	Last     common.Hash                    // Last account to sync in this interval
   275  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   276  
   277  	// These fields are internals used during runtime
   278  	req  *accountRequest  // Pending request to fill this task
   279  	res  *accountResponse // Validate response filling this task
   280  	pend int              // Number of pending subtasks for this round
   281  
   282  	needCode  []bool // Flags whether the filling accounts need code retrieval
   283  	needState []bool // Flags whether the filling accounts need storage retrieval
   284  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   285  
   286  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   287  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   288  
   289  	done bool // Flag whether the task can be removed
   290  }
   291  
   292  // storageTask represents the sync task for a chunk of the storage snapshot.
   293  type storageTask struct {
   294  	Next common.Hash // Next account to sync in this interval
   295  	Last common.Hash // Last account to sync in this interval
   296  
   297  	// These fields are internals used during runtime
   298  	root common.Hash     // Storage root hash for this instance
   299  	req  *storageRequest // Pending request to fill this task
   300  	done bool            // Flag whether the task can be removed
   301  }
   302  
   303  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   304  type healTask struct {
   305  	scheduler *trie.Sync // State trie sync scheduler defining the tasks
   306  
   307  	trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval
   308  	codeTasks map[common.Hash]struct{}      // Set of byte code tasks currently queued for retrieval
   309  }
   310  
   311  // syncProgress is a database entry to allow suspending and resuming a snapshot state
   312  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   313  // snap sync without prior knowledge of the suspension point.
   314  type syncProgress struct {
   315  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   316  
   317  	// Status report during syncing phase
   318  	AccountSynced  uint64             // Number of accounts downloaded
   319  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   320  	BytecodeSynced uint64             // Number of bytecodes downloaded
   321  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   322  	StorageSynced  uint64             // Number of storage slots downloaded
   323  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   324  
   325  	// Status report during healing phase
   326  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   327  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   328  	TrienodeHealDups   uint64             // Number of state trie nodes already processed
   329  	TrienodeHealNops   uint64             // Number of state trie nodes not requested
   330  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   331  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   332  	BytecodeHealDups   uint64             // Number of bytecodes already processed
   333  	BytecodeHealNops   uint64             // Number of bytecodes not requested
   334  }
   335  
   336  // SyncPeer abstracts out the methods required for a peer to be synced against
   337  // with the goal of allowing the construction of mock peers without the full
   338  // blown networking.
   339  type SyncPeer interface {
   340  	// ID retrieves the peer's unique identifier.
   341  	ID() string
   342  
   343  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   344  	// trie, starting with the origin.
   345  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   346  
   347  	// RequestStorageRange fetches a batch of storage slots belonging to one or
   348  	// more accounts. If slots from only one accout is requested, an origin marker
   349  	// may also be used to retrieve from there.
   350  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   351  
   352  	// RequestByteCodes fetches a batch of bytecodes by hash.
   353  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   354  
   355  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   356  	// a specificstate trie.
   357  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   358  
   359  	// Log retrieves the peer's own contextual logger.
   360  	Log() log.Logger
   361  }
   362  
   363  // Syncer is an Ethereum account and storage trie syncer based on snapshots and
   364  // the  snap protocol. It's purpose is to download all the accounts and storage
   365  // slots from remote peers and reassemble chunks of the state trie, on top of
   366  // which a state sync can be run to fix any gaps / overlaps.
   367  //
   368  // Every network request has a variety of failure events:
   369  //   - The peer disconnects after task assignment, failing to send the request
   370  //   - The peer disconnects after sending the request, before delivering on it
   371  //   - The peer remains connected, but does not deliver a response in time
   372  //   - The peer delivers a stale response after a previous timeout
   373  //   - The peer delivers a refusal to serve the requested state
   374  type Syncer struct {
   375  	db    ethdb.KeyValueStore // Database to store the trie nodes into (and dedup)
   376  	bloom *trie.SyncBloom     // Bloom filter to deduplicate nodes for state fixup
   377  
   378  	root    common.Hash    // Current state trie root being synced
   379  	tasks   []*accountTask // Current account task set being synced
   380  	snapped bool           // Flag to signal that snap phase is done
   381  	healer  *healTask      // Current state healing task being executed
   382  	update  chan struct{}  // Notification channel for possible sync progression
   383  
   384  	peers    map[string]SyncPeer // Currently active peers to download from
   385  	peerJoin *event.Feed         // Event feed to react to peers joining
   386  	peerDrop *event.Feed         // Event feed to react to peers dropping
   387  
   388  	// Request tracking during syncing phase
   389  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   390  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   391  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   392  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   393  
   394  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   395  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   396  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   397  
   398  	accountReqFails  chan *accountRequest  // Failed account range requests to revert
   399  	bytecodeReqFails chan *bytecodeRequest // Failed bytecode requests to revert
   400  	storageReqFails  chan *storageRequest  // Failed storage requests to revert
   401  
   402  	accountResps  chan *accountResponse  // Account sub-tries to integrate into the database
   403  	bytecodeResps chan *bytecodeResponse // Bytecodes to integrate into the database
   404  	storageResps  chan *storageResponse  // Storage sub-tries to integrate into the database
   405  
   406  	accountSynced  uint64             // Number of accounts downloaded
   407  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   408  	bytecodeSynced uint64             // Number of bytecodes downloaded
   409  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   410  	storageSynced  uint64             // Number of storage slots downloaded
   411  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   412  
   413  	// Request tracking during healing phase
   414  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   415  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   416  
   417  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   418  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   419  
   420  	trienodeHealReqFails chan *trienodeHealRequest // Failed trienode requests to revert
   421  	bytecodeHealReqFails chan *bytecodeHealRequest // Failed bytecode requests to revert
   422  
   423  	trienodeHealResps chan *trienodeHealResponse // Trie nodes to integrate into the database
   424  	bytecodeHealResps chan *bytecodeHealResponse // Bytecodes to integrate into the database
   425  
   426  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   427  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   428  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   429  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   430  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   431  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   432  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   433  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   434  
   435  	startTime time.Time   // Time instance when snapshot sync started
   436  	startAcc  common.Hash // Account hash where sync started from
   437  	logTime   time.Time   // Time instance when status was last reported
   438  
   439  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   440  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   441  }
   442  
   443  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   444  // snap protocol.
   445  func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer {
   446  	return &Syncer{
   447  		db:    db,
   448  		bloom: bloom,
   449  
   450  		peers:    make(map[string]SyncPeer),
   451  		peerJoin: new(event.Feed),
   452  		peerDrop: new(event.Feed),
   453  		update:   make(chan struct{}, 1),
   454  
   455  		accountIdlers:  make(map[string]struct{}),
   456  		storageIdlers:  make(map[string]struct{}),
   457  		bytecodeIdlers: make(map[string]struct{}),
   458  
   459  		accountReqs:      make(map[uint64]*accountRequest),
   460  		storageReqs:      make(map[uint64]*storageRequest),
   461  		bytecodeReqs:     make(map[uint64]*bytecodeRequest),
   462  		accountReqFails:  make(chan *accountRequest),
   463  		storageReqFails:  make(chan *storageRequest),
   464  		bytecodeReqFails: make(chan *bytecodeRequest),
   465  		accountResps:     make(chan *accountResponse),
   466  		storageResps:     make(chan *storageResponse),
   467  		bytecodeResps:    make(chan *bytecodeResponse),
   468  
   469  		trienodeHealIdlers: make(map[string]struct{}),
   470  		bytecodeHealIdlers: make(map[string]struct{}),
   471  
   472  		trienodeHealReqs:     make(map[uint64]*trienodeHealRequest),
   473  		bytecodeHealReqs:     make(map[uint64]*bytecodeHealRequest),
   474  		trienodeHealReqFails: make(chan *trienodeHealRequest),
   475  		bytecodeHealReqFails: make(chan *bytecodeHealRequest),
   476  		trienodeHealResps:    make(chan *trienodeHealResponse),
   477  		bytecodeHealResps:    make(chan *bytecodeHealResponse),
   478  	}
   479  }
   480  
   481  // Register injects a new data source into the syncer's peerset.
   482  func (s *Syncer) Register(peer SyncPeer) error {
   483  	// Make sure the peer is not registered yet
   484  	id := peer.ID()
   485  
   486  	s.lock.Lock()
   487  	if _, ok := s.peers[id]; ok {
   488  		log.Error("Snap peer already registered", "id", id)
   489  
   490  		s.lock.Unlock()
   491  		return errors.New("already registered")
   492  	}
   493  	s.peers[id] = peer
   494  
   495  	// Mark the peer as idle, even if no sync is running
   496  	s.accountIdlers[id] = struct{}{}
   497  	s.storageIdlers[id] = struct{}{}
   498  	s.bytecodeIdlers[id] = struct{}{}
   499  	s.trienodeHealIdlers[id] = struct{}{}
   500  	s.bytecodeHealIdlers[id] = struct{}{}
   501  	s.lock.Unlock()
   502  
   503  	// Notify any active syncs that a new peer can be assigned data
   504  	s.peerJoin.Send(id)
   505  	return nil
   506  }
   507  
   508  // Unregister injects a new data source into the syncer's peerset.
   509  func (s *Syncer) Unregister(id string) error {
   510  	// Remove all traces of the peer from the registry
   511  	s.lock.Lock()
   512  	if _, ok := s.peers[id]; !ok {
   513  		log.Error("Snap peer not registered", "id", id)
   514  
   515  		s.lock.Unlock()
   516  		return errors.New("not registered")
   517  	}
   518  	delete(s.peers, id)
   519  
   520  	// Remove status markers, even if no sync is running
   521  	delete(s.statelessPeers, id)
   522  
   523  	delete(s.accountIdlers, id)
   524  	delete(s.storageIdlers, id)
   525  	delete(s.bytecodeIdlers, id)
   526  	delete(s.trienodeHealIdlers, id)
   527  	delete(s.bytecodeHealIdlers, id)
   528  	s.lock.Unlock()
   529  
   530  	// Notify any active syncs that pending requests need to be reverted
   531  	s.peerDrop.Send(id)
   532  	return nil
   533  }
   534  
   535  // Sync starts (or resumes a previous) sync cycle to iterate over an state trie
   536  // with the given root and reconstruct the nodes based on the snapshot leaves.
   537  // Previously downloaded segments will not be redownloaded of fixed, rather any
   538  // errors will be healed after the leaves are fully accumulated.
   539  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   540  	// Move the trie root from any previous value, revert stateless markers for
   541  	// any peers and initialize the syncer if it was not yet run
   542  	s.lock.Lock()
   543  	s.root = root
   544  	s.healer = &healTask{
   545  		scheduler: state.NewStateSync(root, s.db, s.bloom),
   546  		trieTasks: make(map[common.Hash]trie.SyncPath),
   547  		codeTasks: make(map[common.Hash]struct{}),
   548  	}
   549  	s.statelessPeers = make(map[string]struct{})
   550  	s.lock.Unlock()
   551  
   552  	if s.startTime == (time.Time{}) {
   553  		s.startTime = time.Now()
   554  	}
   555  	// Retrieve the previous sync status from LevelDB and abort if already synced
   556  	s.loadSyncStatus()
   557  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   558  		log.Debug("Snapshot sync already completed")
   559  		return nil
   560  	}
   561  	defer func() { // Persist any progress, independent of failure
   562  		for _, task := range s.tasks {
   563  			s.forwardAccountTask(task)
   564  		}
   565  		s.cleanAccountTasks()
   566  		s.saveSyncStatus()
   567  	}()
   568  
   569  	log.Debug("Starting snapshot sync cycle", "root", root)
   570  	defer s.report(true)
   571  
   572  	// Whether sync completed or not, disregard any future packets
   573  	defer func() {
   574  		log.Debug("Terminating snapshot sync cycle", "root", root)
   575  		s.lock.Lock()
   576  		s.accountReqs = make(map[uint64]*accountRequest)
   577  		s.storageReqs = make(map[uint64]*storageRequest)
   578  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   579  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   580  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   581  		s.lock.Unlock()
   582  	}()
   583  	// Keep scheduling sync tasks
   584  	peerJoin := make(chan string, 16)
   585  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   586  	defer peerJoinSub.Unsubscribe()
   587  
   588  	peerDrop := make(chan string, 16)
   589  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   590  	defer peerDropSub.Unsubscribe()
   591  
   592  	for {
   593  		// Remove all completed tasks and terminate sync if everything's done
   594  		s.cleanStorageTasks()
   595  		s.cleanAccountTasks()
   596  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   597  			return nil
   598  		}
   599  		// Assign all the data retrieval tasks to any free peers
   600  		s.assignAccountTasks(cancel)
   601  		s.assignBytecodeTasks(cancel)
   602  		s.assignStorageTasks(cancel)
   603  
   604  		if len(s.tasks) == 0 {
   605  			// Sync phase done, run heal phase
   606  			s.assignTrienodeHealTasks(cancel)
   607  			s.assignBytecodeHealTasks(cancel)
   608  		}
   609  		// Wait for something to happen
   610  		select {
   611  		case <-s.update:
   612  			// Something happened (new peer, delivery, timeout), recheck tasks
   613  		case <-peerJoin:
   614  			// A new peer joined, try to schedule it new tasks
   615  		case id := <-peerDrop:
   616  			s.revertRequests(id)
   617  		case <-cancel:
   618  			return errCancelled
   619  
   620  		case req := <-s.accountReqFails:
   621  			s.revertAccountRequest(req)
   622  		case req := <-s.bytecodeReqFails:
   623  			s.revertBytecodeRequest(req)
   624  		case req := <-s.storageReqFails:
   625  			s.revertStorageRequest(req)
   626  		case req := <-s.trienodeHealReqFails:
   627  			s.revertTrienodeHealRequest(req)
   628  		case req := <-s.bytecodeHealReqFails:
   629  			s.revertBytecodeHealRequest(req)
   630  
   631  		case res := <-s.accountResps:
   632  			s.processAccountResponse(res)
   633  		case res := <-s.bytecodeResps:
   634  			s.processBytecodeResponse(res)
   635  		case res := <-s.storageResps:
   636  			s.processStorageResponse(res)
   637  		case res := <-s.trienodeHealResps:
   638  			s.processTrienodeHealResponse(res)
   639  		case res := <-s.bytecodeHealResps:
   640  			s.processBytecodeHealResponse(res)
   641  		}
   642  		// Report stats if something meaningful happened
   643  		s.report(false)
   644  	}
   645  }
   646  
   647  // loadSyncStatus retrieves a previously aborted sync status from the database,
   648  // or generates a fresh one if none is available.
   649  func (s *Syncer) loadSyncStatus() {
   650  	var progress syncProgress
   651  
   652  	if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil {
   653  		if err := json.Unmarshal(status, &progress); err != nil {
   654  			log.Error("Failed to decode snap sync status", "err", err)
   655  		} else {
   656  			for _, task := range progress.Tasks {
   657  				log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   658  			}
   659  			s.tasks = progress.Tasks
   660  			s.snapped = len(s.tasks) == 0
   661  
   662  			s.accountSynced = progress.AccountSynced
   663  			s.accountBytes = progress.AccountBytes
   664  			s.bytecodeSynced = progress.BytecodeSynced
   665  			s.bytecodeBytes = progress.BytecodeBytes
   666  			s.storageSynced = progress.StorageSynced
   667  			s.storageBytes = progress.StorageBytes
   668  
   669  			s.trienodeHealSynced = progress.TrienodeHealSynced
   670  			s.trienodeHealBytes = progress.TrienodeHealBytes
   671  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   672  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   673  			return
   674  		}
   675  	}
   676  	// Either we've failed to decode the previus state, or there was none.
   677  	// Start a fresh sync by chunking up the account range and scheduling
   678  	// them for retrieval.
   679  	s.tasks = nil
   680  	s.accountSynced, s.accountBytes = 0, 0
   681  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   682  	s.storageSynced, s.storageBytes = 0, 0
   683  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   684  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   685  
   686  	var next common.Hash
   687  	step := new(big.Int).Sub(
   688  		new(big.Int).Div(
   689  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   690  			big.NewInt(accountConcurrency),
   691  		), common.Big1,
   692  	)
   693  	for i := 0; i < accountConcurrency; i++ {
   694  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   695  		if i == accountConcurrency-1 {
   696  			// Make sure we don't overflow if the step is not a proper divisor
   697  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   698  		}
   699  		s.tasks = append(s.tasks, &accountTask{
   700  			Next:     next,
   701  			Last:     last,
   702  			SubTasks: make(map[common.Hash][]*storageTask),
   703  		})
   704  		log.Debug("Created account sync task", "from", next, "last", last)
   705  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   706  	}
   707  }
   708  
   709  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   710  func (s *Syncer) saveSyncStatus() {
   711  	progress := &syncProgress{
   712  		Tasks:              s.tasks,
   713  		AccountSynced:      s.accountSynced,
   714  		AccountBytes:       s.accountBytes,
   715  		BytecodeSynced:     s.bytecodeSynced,
   716  		BytecodeBytes:      s.bytecodeBytes,
   717  		StorageSynced:      s.storageSynced,
   718  		StorageBytes:       s.storageBytes,
   719  		TrienodeHealSynced: s.trienodeHealSynced,
   720  		TrienodeHealBytes:  s.trienodeHealBytes,
   721  		BytecodeHealSynced: s.bytecodeHealSynced,
   722  		BytecodeHealBytes:  s.bytecodeHealBytes,
   723  	}
   724  	status, err := json.Marshal(progress)
   725  	if err != nil {
   726  		panic(err) // This can only fail during implementation
   727  	}
   728  	rawdb.WriteSnapshotSyncStatus(s.db, status)
   729  }
   730  
   731  // cleanAccountTasks removes account range retrieval tasks that have already been
   732  // completed.
   733  func (s *Syncer) cleanAccountTasks() {
   734  	for i := 0; i < len(s.tasks); i++ {
   735  		if s.tasks[i].done {
   736  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   737  			i--
   738  		}
   739  	}
   740  	if len(s.tasks) == 0 {
   741  		s.lock.Lock()
   742  		s.snapped = true
   743  		s.lock.Unlock()
   744  	}
   745  }
   746  
   747  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   748  // within, cleaning any that have been completed.
   749  func (s *Syncer) cleanStorageTasks() {
   750  	for _, task := range s.tasks {
   751  		for account, subtasks := range task.SubTasks {
   752  			// Remove storage range retrieval tasks that completed
   753  			for j := 0; j < len(subtasks); j++ {
   754  				if subtasks[j].done {
   755  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   756  					j--
   757  				}
   758  			}
   759  			if len(subtasks) > 0 {
   760  				task.SubTasks[account] = subtasks
   761  				continue
   762  			}
   763  			// If all storage chunks are done, mark the account as done too
   764  			for j, hash := range task.res.hashes {
   765  				if hash == account {
   766  					task.needState[j] = false
   767  				}
   768  			}
   769  			delete(task.SubTasks, account)
   770  			task.pend--
   771  
   772  			// If this was the last pending task, forward the account task
   773  			if task.pend == 0 {
   774  				s.forwardAccountTask(task)
   775  			}
   776  		}
   777  	}
   778  }
   779  
   780  // assignAccountTasks attempts to match idle peers to pending account range
   781  // retrievals.
   782  func (s *Syncer) assignAccountTasks(cancel chan struct{}) {
   783  	s.lock.Lock()
   784  	defer s.lock.Unlock()
   785  
   786  	// If there are no idle peers, short circuit assignment
   787  	if len(s.accountIdlers) == 0 {
   788  		return
   789  	}
   790  	// Iterate over all the tasks and try to find a pending one
   791  	for _, task := range s.tasks {
   792  		// Skip any tasks already filling
   793  		if task.req != nil || task.res != nil {
   794  			continue
   795  		}
   796  		// Task pending retrieval, try to find an idle peer. If no such peer
   797  		// exists, we probably assigned tasks for all (or they are stateless).
   798  		// Abort the entire assignment mechanism.
   799  		var idle string
   800  		for id := range s.accountIdlers {
   801  			// If the peer rejected a query in this sync cycle, don't bother asking
   802  			// again for anything, it's either out of sync or already pruned
   803  			if _, ok := s.statelessPeers[id]; ok {
   804  				continue
   805  			}
   806  			idle = id
   807  			break
   808  		}
   809  		if idle == "" {
   810  			return
   811  		}
   812  		// Matched a pending task to an idle peer, allocate a unique request id
   813  		var reqid uint64
   814  		for {
   815  			reqid = uint64(rand.Int63())
   816  			if reqid == 0 {
   817  				continue
   818  			}
   819  			if _, ok := s.accountReqs[reqid]; ok {
   820  				continue
   821  			}
   822  			break
   823  		}
   824  		// Generate the network query and send it to the peer
   825  		req := &accountRequest{
   826  			peer:   idle,
   827  			id:     reqid,
   828  			cancel: cancel,
   829  			stale:  make(chan struct{}),
   830  			origin: task.Next,
   831  			limit:  task.Last,
   832  			task:   task,
   833  		}
   834  		req.timeout = time.AfterFunc(requestTimeout, func() {
   835  			log.Debug("Account range request timed out")
   836  			s.scheduleRevertAccountRequest(req)
   837  		})
   838  		s.accountReqs[reqid] = req
   839  		delete(s.accountIdlers, idle)
   840  
   841  		s.pend.Add(1)
   842  		go func(peer SyncPeer, root common.Hash) {
   843  			defer s.pend.Done()
   844  
   845  			// Attempt to send the remote request and revert if it fails
   846  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil {
   847  				peer.Log().Debug("Failed to request account range", "err", err)
   848  				s.scheduleRevertAccountRequest(req)
   849  			}
   850  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
   851  
   852  		// Inject the request into the task to block further assignments
   853  		task.req = req
   854  	}
   855  }
   856  
   857  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
   858  func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) {
   859  	s.lock.Lock()
   860  	defer s.lock.Unlock()
   861  
   862  	// If there are no idle peers, short circuit assignment
   863  	if len(s.bytecodeIdlers) == 0 {
   864  		return
   865  	}
   866  	// Iterate over all the tasks and try to find a pending one
   867  	for _, task := range s.tasks {
   868  		// Skip any tasks not in the bytecode retrieval phase
   869  		if task.res == nil {
   870  			continue
   871  		}
   872  		// Skip tasks that are already retrieving (or done with) all codes
   873  		if len(task.codeTasks) == 0 {
   874  			continue
   875  		}
   876  		// Task pending retrieval, try to find an idle peer. If no such peer
   877  		// exists, we probably assigned tasks for all (or they are stateless).
   878  		// Abort the entire assignment mechanism.
   879  		var idle string
   880  		for id := range s.bytecodeIdlers {
   881  			// If the peer rejected a query in this sync cycle, don't bother asking
   882  			// again for anything, it's either out of sync or already pruned
   883  			if _, ok := s.statelessPeers[id]; ok {
   884  				continue
   885  			}
   886  			idle = id
   887  			break
   888  		}
   889  		if idle == "" {
   890  			return
   891  		}
   892  		// Matched a pending task to an idle peer, allocate a unique request id
   893  		var reqid uint64
   894  		for {
   895  			reqid = uint64(rand.Int63())
   896  			if reqid == 0 {
   897  				continue
   898  			}
   899  			if _, ok := s.bytecodeReqs[reqid]; ok {
   900  				continue
   901  			}
   902  			break
   903  		}
   904  		// Generate the network query and send it to the peer
   905  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
   906  		for hash := range task.codeTasks {
   907  			delete(task.codeTasks, hash)
   908  			hashes = append(hashes, hash)
   909  			if len(hashes) >= maxCodeRequestCount {
   910  				break
   911  			}
   912  		}
   913  		req := &bytecodeRequest{
   914  			peer:   idle,
   915  			id:     reqid,
   916  			cancel: cancel,
   917  			stale:  make(chan struct{}),
   918  			hashes: hashes,
   919  			task:   task,
   920  		}
   921  		req.timeout = time.AfterFunc(requestTimeout, func() {
   922  			log.Debug("Bytecode request timed out")
   923  			s.scheduleRevertBytecodeRequest(req)
   924  		})
   925  		s.bytecodeReqs[reqid] = req
   926  		delete(s.bytecodeIdlers, idle)
   927  
   928  		s.pend.Add(1)
   929  		go func(peer SyncPeer) {
   930  			defer s.pend.Done()
   931  
   932  			// Attempt to send the remote request and revert if it fails
   933  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
   934  				log.Debug("Failed to request bytecodes", "err", err)
   935  				s.scheduleRevertBytecodeRequest(req)
   936  			}
   937  		}(s.peers[idle]) // We're in the lock, peers[id] surely exists
   938  	}
   939  }
   940  
   941  // assignStorageTasks attempts to match idle peers to pending storage range
   942  // retrievals.
   943  func (s *Syncer) assignStorageTasks(cancel chan struct{}) {
   944  	s.lock.Lock()
   945  	defer s.lock.Unlock()
   946  
   947  	// If there are no idle peers, short circuit assignment
   948  	if len(s.storageIdlers) == 0 {
   949  		return
   950  	}
   951  	// Iterate over all the tasks and try to find a pending one
   952  	for _, task := range s.tasks {
   953  		// Skip any tasks not in the storage retrieval phase
   954  		if task.res == nil {
   955  			continue
   956  		}
   957  		// Skip tasks that are already retrieving (or done with) all small states
   958  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
   959  			continue
   960  		}
   961  		// Task pending retrieval, try to find an idle peer. If no such peer
   962  		// exists, we probably assigned tasks for all (or they are stateless).
   963  		// Abort the entire assignment mechanism.
   964  		var idle string
   965  		for id := range s.storageIdlers {
   966  			// If the peer rejected a query in this sync cycle, don't bother asking
   967  			// again for anything, it's either out of sync or already pruned
   968  			if _, ok := s.statelessPeers[id]; ok {
   969  				continue
   970  			}
   971  			idle = id
   972  			break
   973  		}
   974  		if idle == "" {
   975  			return
   976  		}
   977  		// Matched a pending task to an idle peer, allocate a unique request id
   978  		var reqid uint64
   979  		for {
   980  			reqid = uint64(rand.Int63())
   981  			if reqid == 0 {
   982  				continue
   983  			}
   984  			if _, ok := s.storageReqs[reqid]; ok {
   985  				continue
   986  			}
   987  			break
   988  		}
   989  		// Generate the network query and send it to the peer. If there are
   990  		// large contract tasks pending, complete those before diving into
   991  		// even more new contracts.
   992  		var (
   993  			accounts = make([]common.Hash, 0, maxStorageSetRequestCount)
   994  			roots    = make([]common.Hash, 0, maxStorageSetRequestCount)
   995  			subtask  *storageTask
   996  		)
   997  		for account, subtasks := range task.SubTasks {
   998  			for _, st := range subtasks {
   999  				// Skip any subtasks already filling
  1000  				if st.req != nil {
  1001  					continue
  1002  				}
  1003  				// Found an incomplete storage chunk, schedule it
  1004  				accounts = append(accounts, account)
  1005  				roots = append(roots, st.root)
  1006  				subtask = st
  1007  				break // Large contract chunks are downloaded individually
  1008  			}
  1009  			if subtask != nil {
  1010  				break // Large contract chunks are downloaded individually
  1011  			}
  1012  		}
  1013  		if subtask == nil {
  1014  			// No large contract required retrieval, but small ones available
  1015  			for acccount, root := range task.stateTasks {
  1016  				delete(task.stateTasks, acccount)
  1017  
  1018  				accounts = append(accounts, acccount)
  1019  				roots = append(roots, root)
  1020  
  1021  				if len(accounts) >= maxStorageSetRequestCount {
  1022  					break
  1023  				}
  1024  			}
  1025  		}
  1026  		// If nothing was found, it means this task is actually already fully
  1027  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1028  		if len(accounts) == 0 {
  1029  			continue
  1030  		}
  1031  		req := &storageRequest{
  1032  			peer:     idle,
  1033  			id:       reqid,
  1034  			cancel:   cancel,
  1035  			stale:    make(chan struct{}),
  1036  			accounts: accounts,
  1037  			roots:    roots,
  1038  			mainTask: task,
  1039  			subTask:  subtask,
  1040  		}
  1041  		if subtask != nil {
  1042  			req.origin = subtask.Next
  1043  			req.limit = subtask.Last
  1044  		}
  1045  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1046  			log.Debug("Storage request timed out")
  1047  			s.scheduleRevertStorageRequest(req)
  1048  		})
  1049  		s.storageReqs[reqid] = req
  1050  		delete(s.storageIdlers, idle)
  1051  
  1052  		s.pend.Add(1)
  1053  		go func(peer SyncPeer, root common.Hash) {
  1054  			defer s.pend.Done()
  1055  
  1056  			// Attempt to send the remote request and revert if it fails
  1057  			var origin, limit []byte
  1058  			if subtask != nil {
  1059  				origin, limit = req.origin[:], req.limit[:]
  1060  			}
  1061  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil {
  1062  				log.Debug("Failed to request storage", "err", err)
  1063  				s.scheduleRevertStorageRequest(req)
  1064  			}
  1065  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
  1066  
  1067  		// Inject the request into the subtask to block further assignments
  1068  		if subtask != nil {
  1069  			subtask.req = req
  1070  		}
  1071  	}
  1072  }
  1073  
  1074  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1075  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1076  func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) {
  1077  	s.lock.Lock()
  1078  	defer s.lock.Unlock()
  1079  
  1080  	// If there are no idle peers, short circuit assignment
  1081  	if len(s.trienodeHealIdlers) == 0 {
  1082  		return
  1083  	}
  1084  	// Iterate over pending tasks and try to find a peer to retrieve with
  1085  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1086  		// If there are not enough trie tasks queued to fully assign, fill the
  1087  		// queue from the state sync scheduler. The trie synced schedules these
  1088  		// together with bytecodes, so we need to queue them combined.
  1089  		var (
  1090  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1091  			want = maxTrieRequestCount + maxCodeRequestCount
  1092  		)
  1093  		if have < want {
  1094  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1095  			for i, hash := range nodes {
  1096  				s.healer.trieTasks[hash] = paths[i]
  1097  			}
  1098  			for _, hash := range codes {
  1099  				s.healer.codeTasks[hash] = struct{}{}
  1100  			}
  1101  		}
  1102  		// If all the heal tasks are bytecodes or already downloading, bail
  1103  		if len(s.healer.trieTasks) == 0 {
  1104  			return
  1105  		}
  1106  		// Task pending retrieval, try to find an idle peer. If no such peer
  1107  		// exists, we probably assigned tasks for all (or they are stateless).
  1108  		// Abort the entire assignment mechanism.
  1109  		var idle string
  1110  		for id := range s.trienodeHealIdlers {
  1111  			// If the peer rejected a query in this sync cycle, don't bother asking
  1112  			// again for anything, it's either out of sync or already pruned
  1113  			if _, ok := s.statelessPeers[id]; ok {
  1114  				continue
  1115  			}
  1116  			idle = id
  1117  			break
  1118  		}
  1119  		if idle == "" {
  1120  			return
  1121  		}
  1122  		// Matched a pending task to an idle peer, allocate a unique request id
  1123  		var reqid uint64
  1124  		for {
  1125  			reqid = uint64(rand.Int63())
  1126  			if reqid == 0 {
  1127  				continue
  1128  			}
  1129  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1130  				continue
  1131  			}
  1132  			break
  1133  		}
  1134  		// Generate the network query and send it to the peer
  1135  		var (
  1136  			hashes   = make([]common.Hash, 0, maxTrieRequestCount)
  1137  			paths    = make([]trie.SyncPath, 0, maxTrieRequestCount)
  1138  			pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount)
  1139  		)
  1140  		for hash, pathset := range s.healer.trieTasks {
  1141  			delete(s.healer.trieTasks, hash)
  1142  
  1143  			hashes = append(hashes, hash)
  1144  			paths = append(paths, pathset)
  1145  			pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash
  1146  
  1147  			if len(hashes) >= maxTrieRequestCount {
  1148  				break
  1149  			}
  1150  		}
  1151  		req := &trienodeHealRequest{
  1152  			peer:   idle,
  1153  			id:     reqid,
  1154  			cancel: cancel,
  1155  			stale:  make(chan struct{}),
  1156  			hashes: hashes,
  1157  			paths:  paths,
  1158  			task:   s.healer,
  1159  		}
  1160  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1161  			log.Debug("Trienode heal request timed out")
  1162  			s.scheduleRevertTrienodeHealRequest(req)
  1163  		})
  1164  		s.trienodeHealReqs[reqid] = req
  1165  		delete(s.trienodeHealIdlers, idle)
  1166  
  1167  		s.pend.Add(1)
  1168  		go func(peer SyncPeer, root common.Hash) {
  1169  			defer s.pend.Done()
  1170  
  1171  			// Attempt to send the remote request and revert if it fails
  1172  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1173  				log.Debug("Failed to request trienode healers", "err", err)
  1174  				s.scheduleRevertTrienodeHealRequest(req)
  1175  			}
  1176  		}(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists
  1177  	}
  1178  }
  1179  
  1180  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1181  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1182  func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) {
  1183  	s.lock.Lock()
  1184  	defer s.lock.Unlock()
  1185  
  1186  	// If there are no idle peers, short circuit assignment
  1187  	if len(s.bytecodeHealIdlers) == 0 {
  1188  		return
  1189  	}
  1190  	// Iterate over pending tasks and try to find a peer to retrieve with
  1191  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1192  		// If there are not enough trie tasks queued to fully assign, fill the
  1193  		// queue from the state sync scheduler. The trie synced schedules these
  1194  		// together with trie nodes, so we need to queue them combined.
  1195  		var (
  1196  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1197  			want = maxTrieRequestCount + maxCodeRequestCount
  1198  		)
  1199  		if have < want {
  1200  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1201  			for i, hash := range nodes {
  1202  				s.healer.trieTasks[hash] = paths[i]
  1203  			}
  1204  			for _, hash := range codes {
  1205  				s.healer.codeTasks[hash] = struct{}{}
  1206  			}
  1207  		}
  1208  		// If all the heal tasks are trienodes or already downloading, bail
  1209  		if len(s.healer.codeTasks) == 0 {
  1210  			return
  1211  		}
  1212  		// Task pending retrieval, try to find an idle peer. If no such peer
  1213  		// exists, we probably assigned tasks for all (or they are stateless).
  1214  		// Abort the entire assignment mechanism.
  1215  		var idle string
  1216  		for id := range s.bytecodeHealIdlers {
  1217  			// If the peer rejected a query in this sync cycle, don't bother asking
  1218  			// again for anything, it's either out of sync or already pruned
  1219  			if _, ok := s.statelessPeers[id]; ok {
  1220  				continue
  1221  			}
  1222  			idle = id
  1223  			break
  1224  		}
  1225  		if idle == "" {
  1226  			return
  1227  		}
  1228  		// Matched a pending task to an idle peer, allocate a unique request id
  1229  		var reqid uint64
  1230  		for {
  1231  			reqid = uint64(rand.Int63())
  1232  			if reqid == 0 {
  1233  				continue
  1234  			}
  1235  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1236  				continue
  1237  			}
  1238  			break
  1239  		}
  1240  		// Generate the network query and send it to the peer
  1241  		hashes := make([]common.Hash, 0, maxCodeRequestCount)
  1242  		for hash := range s.healer.codeTasks {
  1243  			delete(s.healer.codeTasks, hash)
  1244  
  1245  			hashes = append(hashes, hash)
  1246  			if len(hashes) >= maxCodeRequestCount {
  1247  				break
  1248  			}
  1249  		}
  1250  		req := &bytecodeHealRequest{
  1251  			peer:   idle,
  1252  			id:     reqid,
  1253  			cancel: cancel,
  1254  			stale:  make(chan struct{}),
  1255  			hashes: hashes,
  1256  			task:   s.healer,
  1257  		}
  1258  		req.timeout = time.AfterFunc(requestTimeout, func() {
  1259  			log.Debug("Bytecode heal request timed out")
  1260  			s.scheduleRevertBytecodeHealRequest(req)
  1261  		})
  1262  		s.bytecodeHealReqs[reqid] = req
  1263  		delete(s.bytecodeHealIdlers, idle)
  1264  
  1265  		s.pend.Add(1)
  1266  		go func(peer SyncPeer) {
  1267  			defer s.pend.Done()
  1268  
  1269  			// Attempt to send the remote request and revert if it fails
  1270  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1271  				log.Debug("Failed to request bytecode healers", "err", err)
  1272  				s.scheduleRevertBytecodeHealRequest(req)
  1273  			}
  1274  		}(s.peers[idle]) // We're in the lock, peers[id] surely exists
  1275  	}
  1276  }
  1277  
  1278  // revertRequests locates all the currently pending reuqests from a particular
  1279  // peer and reverts them, rescheduling for others to fulfill.
  1280  func (s *Syncer) revertRequests(peer string) {
  1281  	// Gather the requests first, revertals need the lock too
  1282  	s.lock.Lock()
  1283  	var accountReqs []*accountRequest
  1284  	for _, req := range s.accountReqs {
  1285  		if req.peer == peer {
  1286  			accountReqs = append(accountReqs, req)
  1287  		}
  1288  	}
  1289  	var bytecodeReqs []*bytecodeRequest
  1290  	for _, req := range s.bytecodeReqs {
  1291  		if req.peer == peer {
  1292  			bytecodeReqs = append(bytecodeReqs, req)
  1293  		}
  1294  	}
  1295  	var storageReqs []*storageRequest
  1296  	for _, req := range s.storageReqs {
  1297  		if req.peer == peer {
  1298  			storageReqs = append(storageReqs, req)
  1299  		}
  1300  	}
  1301  	var trienodeHealReqs []*trienodeHealRequest
  1302  	for _, req := range s.trienodeHealReqs {
  1303  		if req.peer == peer {
  1304  			trienodeHealReqs = append(trienodeHealReqs, req)
  1305  		}
  1306  	}
  1307  	var bytecodeHealReqs []*bytecodeHealRequest
  1308  	for _, req := range s.bytecodeHealReqs {
  1309  		if req.peer == peer {
  1310  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1311  		}
  1312  	}
  1313  	s.lock.Unlock()
  1314  
  1315  	// Revert all the requests matching the peer
  1316  	for _, req := range accountReqs {
  1317  		s.revertAccountRequest(req)
  1318  	}
  1319  	for _, req := range bytecodeReqs {
  1320  		s.revertBytecodeRequest(req)
  1321  	}
  1322  	for _, req := range storageReqs {
  1323  		s.revertStorageRequest(req)
  1324  	}
  1325  	for _, req := range trienodeHealReqs {
  1326  		s.revertTrienodeHealRequest(req)
  1327  	}
  1328  	for _, req := range bytecodeHealReqs {
  1329  		s.revertBytecodeHealRequest(req)
  1330  	}
  1331  }
  1332  
  1333  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1334  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1335  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1336  	select {
  1337  	case s.accountReqFails <- req:
  1338  		// Sync event loop notified
  1339  	case <-req.cancel:
  1340  		// Sync cycle got cancelled
  1341  	case <-req.stale:
  1342  		// Request already reverted
  1343  	}
  1344  }
  1345  
  1346  // revertAccountRequest cleans up an account range request and returns all failed
  1347  // retrieval tasks to the scheduler for reassignment.
  1348  //
  1349  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1350  // On peer threads, use scheduleRevertAccountRequest.
  1351  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1352  	log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1353  	select {
  1354  	case <-req.stale:
  1355  		log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1356  		return
  1357  	default:
  1358  	}
  1359  	close(req.stale)
  1360  
  1361  	// Remove the request from the tracked set
  1362  	s.lock.Lock()
  1363  	delete(s.accountReqs, req.id)
  1364  	s.lock.Unlock()
  1365  
  1366  	// If there's a timeout timer still running, abort it and mark the account
  1367  	// task as not-pending, ready for resheduling
  1368  	req.timeout.Stop()
  1369  	if req.task.req == req {
  1370  		req.task.req = nil
  1371  	}
  1372  }
  1373  
  1374  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1375  // and return all failed retrieval tasks to the scheduler for reassignment.
  1376  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1377  	select {
  1378  	case s.bytecodeReqFails <- req:
  1379  		// Sync event loop notified
  1380  	case <-req.cancel:
  1381  		// Sync cycle got cancelled
  1382  	case <-req.stale:
  1383  		// Request already reverted
  1384  	}
  1385  }
  1386  
  1387  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1388  // retrieval tasks to the scheduler for reassignment.
  1389  //
  1390  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1391  // On peer threads, use scheduleRevertBytecodeRequest.
  1392  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1393  	log.Debug("Reverting bytecode request", "peer", req.peer)
  1394  	select {
  1395  	case <-req.stale:
  1396  		log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1397  		return
  1398  	default:
  1399  	}
  1400  	close(req.stale)
  1401  
  1402  	// Remove the request from the tracked set
  1403  	s.lock.Lock()
  1404  	delete(s.bytecodeReqs, req.id)
  1405  	s.lock.Unlock()
  1406  
  1407  	// If there's a timeout timer still running, abort it and mark the code
  1408  	// retrievals as not-pending, ready for resheduling
  1409  	req.timeout.Stop()
  1410  	for _, hash := range req.hashes {
  1411  		req.task.codeTasks[hash] = struct{}{}
  1412  	}
  1413  }
  1414  
  1415  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1416  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1417  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1418  	select {
  1419  	case s.storageReqFails <- req:
  1420  		// Sync event loop notified
  1421  	case <-req.cancel:
  1422  		// Sync cycle got cancelled
  1423  	case <-req.stale:
  1424  		// Request already reverted
  1425  	}
  1426  }
  1427  
  1428  // revertStorageRequest cleans up a storage range request and returns all failed
  1429  // retrieval tasks to the scheduler for reassignment.
  1430  //
  1431  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1432  // On peer threads, use scheduleRevertStorageRequest.
  1433  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1434  	log.Debug("Reverting storage request", "peer", req.peer)
  1435  	select {
  1436  	case <-req.stale:
  1437  		log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1438  		return
  1439  	default:
  1440  	}
  1441  	close(req.stale)
  1442  
  1443  	// Remove the request from the tracked set
  1444  	s.lock.Lock()
  1445  	delete(s.storageReqs, req.id)
  1446  	s.lock.Unlock()
  1447  
  1448  	// If there's a timeout timer still running, abort it and mark the storage
  1449  	// task as not-pending, ready for resheduling
  1450  	req.timeout.Stop()
  1451  	if req.subTask != nil {
  1452  		req.subTask.req = nil
  1453  	} else {
  1454  		for i, account := range req.accounts {
  1455  			req.mainTask.stateTasks[account] = req.roots[i]
  1456  		}
  1457  	}
  1458  }
  1459  
  1460  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1461  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1462  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1463  	select {
  1464  	case s.trienodeHealReqFails <- req:
  1465  		// Sync event loop notified
  1466  	case <-req.cancel:
  1467  		// Sync cycle got cancelled
  1468  	case <-req.stale:
  1469  		// Request already reverted
  1470  	}
  1471  }
  1472  
  1473  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1474  // failed retrieval tasks to the scheduler for reassignment.
  1475  //
  1476  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1477  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1478  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1479  	log.Debug("Reverting trienode heal request", "peer", req.peer)
  1480  	select {
  1481  	case <-req.stale:
  1482  		log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1483  		return
  1484  	default:
  1485  	}
  1486  	close(req.stale)
  1487  
  1488  	// Remove the request from the tracked set
  1489  	s.lock.Lock()
  1490  	delete(s.trienodeHealReqs, req.id)
  1491  	s.lock.Unlock()
  1492  
  1493  	// If there's a timeout timer still running, abort it and mark the trie node
  1494  	// retrievals as not-pending, ready for resheduling
  1495  	req.timeout.Stop()
  1496  	for i, hash := range req.hashes {
  1497  		req.task.trieTasks[hash] = req.paths[i]
  1498  	}
  1499  }
  1500  
  1501  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1502  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1503  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1504  	select {
  1505  	case s.bytecodeHealReqFails <- req:
  1506  		// Sync event loop notified
  1507  	case <-req.cancel:
  1508  		// Sync cycle got cancelled
  1509  	case <-req.stale:
  1510  		// Request already reverted
  1511  	}
  1512  }
  1513  
  1514  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1515  // failed retrieval tasks to the scheduler for reassignment.
  1516  //
  1517  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1518  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1519  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1520  	log.Debug("Reverting bytecode heal request", "peer", req.peer)
  1521  	select {
  1522  	case <-req.stale:
  1523  		log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1524  		return
  1525  	default:
  1526  	}
  1527  	close(req.stale)
  1528  
  1529  	// Remove the request from the tracked set
  1530  	s.lock.Lock()
  1531  	delete(s.bytecodeHealReqs, req.id)
  1532  	s.lock.Unlock()
  1533  
  1534  	// If there's a timeout timer still running, abort it and mark the code
  1535  	// retrievals as not-pending, ready for resheduling
  1536  	req.timeout.Stop()
  1537  	for _, hash := range req.hashes {
  1538  		req.task.codeTasks[hash] = struct{}{}
  1539  	}
  1540  }
  1541  
  1542  // processAccountResponse integrates an already validated account range response
  1543  // into the account tasks.
  1544  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1545  	// Switch the task from pending to filling
  1546  	res.task.req = nil
  1547  	res.task.res = res
  1548  
  1549  	// Ensure that the response doesn't overflow into the subsequent task
  1550  	last := res.task.Last.Big()
  1551  	for i, hash := range res.hashes {
  1552  		if hash.Big().Cmp(last) > 0 {
  1553  			// Chunk overflown, cut off excess, but also update the boundary nodes
  1554  			for j := i; j < len(res.hashes); j++ {
  1555  				if err := res.trie.Prove(res.hashes[j][:], 0, res.overflow); err != nil {
  1556  					panic(err) // Account range was already proven, what happened
  1557  				}
  1558  			}
  1559  			res.hashes = res.hashes[:i]
  1560  			res.accounts = res.accounts[:i]
  1561  			res.cont = false // Mark range completed
  1562  			break
  1563  		}
  1564  	}
  1565  	// Iterate over all the accounts and assemble which ones need further sub-
  1566  	// filling before the entire account range can be persisted.
  1567  	res.task.needCode = make([]bool, len(res.accounts))
  1568  	res.task.needState = make([]bool, len(res.accounts))
  1569  	res.task.needHeal = make([]bool, len(res.accounts))
  1570  
  1571  	res.task.codeTasks = make(map[common.Hash]struct{})
  1572  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1573  
  1574  	resumed := make(map[common.Hash]struct{})
  1575  
  1576  	res.task.pend = 0
  1577  	for i, account := range res.accounts {
  1578  		// Check if the account is a contract with an unknown code
  1579  		if !bytes.Equal(account.CodeHash, emptyCode[:]) {
  1580  			if code := rawdb.ReadCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)); code == nil {
  1581  				res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{}
  1582  				res.task.needCode[i] = true
  1583  				res.task.pend++
  1584  			}
  1585  		}
  1586  		// Check if the account is a contract with an unknown storage trie
  1587  		if account.Root != emptyRoot {
  1588  			if node, err := s.db.Get(account.Root[:]); err != nil || node == nil {
  1589  				// If there was a previous large state retrieval in progress,
  1590  				// don't restart it from scratch. This happens if a sync cycle
  1591  				// is interrupted and resumed later. However, *do* update the
  1592  				// previous root hash.
  1593  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1594  					log.Error("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root)
  1595  					for _, subtask := range subtasks {
  1596  						subtask.root = account.Root
  1597  					}
  1598  					res.task.needHeal[i] = true
  1599  					resumed[res.hashes[i]] = struct{}{}
  1600  				} else {
  1601  					res.task.stateTasks[res.hashes[i]] = account.Root
  1602  				}
  1603  				res.task.needState[i] = true
  1604  				res.task.pend++
  1605  			}
  1606  		}
  1607  	}
  1608  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1609  	// some progress if a new peer gives us less accounts than an old one, but for
  1610  	// now we have to live with that.
  1611  	for hash := range res.task.SubTasks {
  1612  		if _, ok := resumed[hash]; !ok {
  1613  			log.Error("Aborting suspended storage retrieval", "account", hash)
  1614  			delete(res.task.SubTasks, hash)
  1615  		}
  1616  	}
  1617  	// If the account range contained no contracts, or all have been fully filled
  1618  	// beforehand, short circuit storage filling and forward to the next task
  1619  	if res.task.pend == 0 {
  1620  		s.forwardAccountTask(res.task)
  1621  		return
  1622  	}
  1623  	// Some accounts are incomplete, leave as is for the storage and contract
  1624  	// task assigners to pick up and fill.
  1625  }
  1626  
  1627  // processBytecodeResponse integrates an already validated bytecode response
  1628  // into the account tasks.
  1629  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1630  	batch := s.db.NewBatch()
  1631  
  1632  	var (
  1633  		codes uint64
  1634  		bytes common.StorageSize
  1635  	)
  1636  	for i, hash := range res.hashes {
  1637  		code := res.codes[i]
  1638  
  1639  		// If the bytecode was not delivered, reschedule it
  1640  		if code == nil {
  1641  			res.task.codeTasks[hash] = struct{}{}
  1642  			continue
  1643  		}
  1644  		// Code was delivered, mark it not needed any more
  1645  		for j, account := range res.task.res.accounts {
  1646  			if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) {
  1647  				res.task.needCode[j] = false
  1648  				res.task.pend--
  1649  			}
  1650  		}
  1651  		// Push the bytecode into a database batch
  1652  		s.bytecodeSynced++
  1653  		s.bytecodeBytes += common.StorageSize(len(code))
  1654  
  1655  		codes++
  1656  		bytes += common.StorageSize(len(code))
  1657  
  1658  		rawdb.WriteCode(batch, hash, code)
  1659  		s.bloom.Add(hash[:])
  1660  	}
  1661  	if err := batch.Write(); err != nil {
  1662  		log.Crit("Failed to persist bytecodes", "err", err)
  1663  	}
  1664  	log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1665  
  1666  	// If this delivery completed the last pending task, forward the account task
  1667  	// to the next chunk
  1668  	if res.task.pend == 0 {
  1669  		s.forwardAccountTask(res.task)
  1670  		return
  1671  	}
  1672  	// Some accounts are still incomplete, leave as is for the storage and contract
  1673  	// task assigners to pick up and fill.
  1674  }
  1675  
  1676  // processStorageResponse integrates an already validated storage response
  1677  // into the account tasks.
  1678  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1679  	// Switch the suntask from pending to idle
  1680  	if res.subTask != nil {
  1681  		res.subTask.req = nil
  1682  	}
  1683  	batch := s.db.NewBatch()
  1684  
  1685  	var (
  1686  		slots   int
  1687  		nodes   int
  1688  		skipped int
  1689  		bytes   common.StorageSize
  1690  	)
  1691  	// Iterate over all the accounts and reconstruct their storage tries from the
  1692  	// delivered slots
  1693  	for i, account := range res.accounts {
  1694  		// If the account was not delivered, reschedule it
  1695  		if i >= len(res.hashes) {
  1696  			res.mainTask.stateTasks[account] = res.roots[i]
  1697  			continue
  1698  		}
  1699  		// State was delivered, if complete mark as not needed any more, otherwise
  1700  		// mark the account as needing healing
  1701  		for j, hash := range res.mainTask.res.hashes {
  1702  			if account != hash {
  1703  				continue
  1704  			}
  1705  			acc := res.mainTask.res.accounts[j]
  1706  
  1707  			// If the packet contains multiple contract storage slots, all
  1708  			// but the last are surely complete. The last contract may be
  1709  			// chunked, so check it's continuation flag.
  1710  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1711  				res.mainTask.needState[j] = false
  1712  				res.mainTask.pend--
  1713  			}
  1714  			// If the last contract was chunked, mark it as needing healing
  1715  			// to avoid writing it out to disk prematurely.
  1716  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1717  				res.mainTask.needHeal[j] = true
  1718  			}
  1719  			// If the last contract was chunked, we need to switch to large
  1720  			// contract handling mode
  1721  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1722  				// If we haven't yet started a large-contract retrieval, create
  1723  				// the subtasks for it within the main account task
  1724  				if tasks, ok := res.mainTask.SubTasks[account]; !ok {
  1725  					var (
  1726  						next common.Hash
  1727  					)
  1728  					step := new(big.Int).Sub(
  1729  						new(big.Int).Div(
  1730  							new(big.Int).Exp(common.Big2, common.Big256, nil),
  1731  							big.NewInt(storageConcurrency),
  1732  						), common.Big1,
  1733  					)
  1734  					for k := 0; k < storageConcurrency; k++ {
  1735  						last := common.BigToHash(new(big.Int).Add(next.Big(), step))
  1736  						if k == storageConcurrency-1 {
  1737  							// Make sure we don't overflow if the step is not a proper divisor
  1738  							last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
  1739  						}
  1740  						tasks = append(tasks, &storageTask{
  1741  							Next: next,
  1742  							Last: last,
  1743  							root: acc.Root,
  1744  						})
  1745  						log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last)
  1746  						next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
  1747  					}
  1748  					res.mainTask.SubTasks[account] = tasks
  1749  
  1750  					// Since we've just created the sub-tasks, this response
  1751  					// is surely for the first one (zero origin)
  1752  					res.subTask = tasks[0]
  1753  				}
  1754  			}
  1755  			// If we're in large contract delivery mode, forward the subtask
  1756  			if res.subTask != nil {
  1757  				// Ensure the response doesn't overflow into the subsequent task
  1758  				last := res.subTask.Last.Big()
  1759  				for k, hash := range res.hashes[i] {
  1760  					if hash.Big().Cmp(last) > 0 {
  1761  						// Chunk overflown, cut off excess, but also update the boundary
  1762  						for l := k; l < len(res.hashes[i]); l++ {
  1763  							if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil {
  1764  								panic(err) // Account range was already proven, what happened
  1765  							}
  1766  						}
  1767  						res.hashes[i] = res.hashes[i][:k]
  1768  						res.slots[i] = res.slots[i][:k]
  1769  						res.cont = false // Mark range completed
  1770  						break
  1771  					}
  1772  				}
  1773  				// Forward the relevant storage chunk (even if created just now)
  1774  				if res.cont {
  1775  					res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1)))
  1776  				} else {
  1777  					res.subTask.done = true
  1778  				}
  1779  			}
  1780  		}
  1781  		// Iterate over all the reconstructed trie nodes and push them to disk
  1782  		slots += len(res.hashes[i])
  1783  
  1784  		it := res.nodes[i].NewIterator(nil, nil)
  1785  		for it.Next() {
  1786  			// Boundary nodes are not written for the last result, since they are incomplete
  1787  			if i == len(res.hashes)-1 {
  1788  				if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok {
  1789  					skipped++
  1790  					continue
  1791  				}
  1792  			}
  1793  			// Node is not a boundary, persist to disk
  1794  			batch.Put(it.Key(), it.Value())
  1795  			s.bloom.Add(it.Key())
  1796  
  1797  			bytes += common.StorageSize(common.HashLength + len(it.Value()))
  1798  			nodes++
  1799  		}
  1800  		it.Release()
  1801  	}
  1802  	if err := batch.Write(); err != nil {
  1803  		log.Crit("Failed to persist storage slots", "err", err)
  1804  	}
  1805  	s.storageSynced += uint64(slots)
  1806  	s.storageBytes += bytes
  1807  
  1808  	log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "nodes", nodes, "skipped", skipped, "bytes", bytes)
  1809  
  1810  	// If this delivery completed the last pending task, forward the account task
  1811  	// to the next chunk
  1812  	if res.mainTask.pend == 0 {
  1813  		s.forwardAccountTask(res.mainTask)
  1814  		return
  1815  	}
  1816  	// Some accounts are still incomplete, leave as is for the storage and contract
  1817  	// task assigners to pick up and fill.
  1818  }
  1819  
  1820  // processTrienodeHealResponse integrates an already validated trienode response
  1821  // into the healer tasks.
  1822  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  1823  	for i, hash := range res.hashes {
  1824  		node := res.nodes[i]
  1825  
  1826  		// If the trie node was not delivered, reschedule it
  1827  		if node == nil {
  1828  			res.task.trieTasks[hash] = res.paths[i]
  1829  			continue
  1830  		}
  1831  		// Push the trie node into the state syncer
  1832  		s.trienodeHealSynced++
  1833  		s.trienodeHealBytes += common.StorageSize(len(node))
  1834  
  1835  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  1836  		switch err {
  1837  		case nil:
  1838  		case trie.ErrAlreadyProcessed:
  1839  			s.trienodeHealDups++
  1840  		case trie.ErrNotRequested:
  1841  			s.trienodeHealNops++
  1842  		default:
  1843  			log.Error("Invalid trienode processed", "hash", hash, "err", err)
  1844  		}
  1845  	}
  1846  	batch := s.db.NewBatch()
  1847  	if err := s.healer.scheduler.Commit(batch); err != nil {
  1848  		log.Error("Failed to commit healing data", "err", err)
  1849  	}
  1850  	if err := batch.Write(); err != nil {
  1851  		log.Crit("Failed to persist healing data", "err", err)
  1852  	}
  1853  	log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  1854  }
  1855  
  1856  // processBytecodeHealResponse integrates an already validated bytecode response
  1857  // into the healer tasks.
  1858  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  1859  	for i, hash := range res.hashes {
  1860  		node := res.codes[i]
  1861  
  1862  		// If the trie node was not delivered, reschedule it
  1863  		if node == nil {
  1864  			res.task.codeTasks[hash] = struct{}{}
  1865  			continue
  1866  		}
  1867  		// Push the trie node into the state syncer
  1868  		s.bytecodeHealSynced++
  1869  		s.bytecodeHealBytes += common.StorageSize(len(node))
  1870  
  1871  		err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node})
  1872  		switch err {
  1873  		case nil:
  1874  		case trie.ErrAlreadyProcessed:
  1875  			s.bytecodeHealDups++
  1876  		case trie.ErrNotRequested:
  1877  			s.bytecodeHealNops++
  1878  		default:
  1879  			log.Error("Invalid bytecode processed", "hash", hash, "err", err)
  1880  		}
  1881  	}
  1882  	batch := s.db.NewBatch()
  1883  	if err := s.healer.scheduler.Commit(batch); err != nil {
  1884  		log.Error("Failed to commit healing data", "err", err)
  1885  	}
  1886  	if err := batch.Write(); err != nil {
  1887  		log.Crit("Failed to persist healing data", "err", err)
  1888  	}
  1889  	log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  1890  }
  1891  
  1892  // forwardAccountTask takes a filled account task and persists anything available
  1893  // into the database, after which it forwards the next account marker so that the
  1894  // task's next chunk may be filled.
  1895  func (s *Syncer) forwardAccountTask(task *accountTask) {
  1896  	// Remove any pending delivery
  1897  	res := task.res
  1898  	if res == nil {
  1899  		return // nothing to forward
  1900  	}
  1901  	task.res = nil
  1902  
  1903  	// Iterate over all the accounts and gather all the incomplete trie nodes. A
  1904  	// node is incomplete if we haven't yet filled it (sync was interrupted), or
  1905  	// if we filled it in multiple chunks (storage trie), in which case the few
  1906  	// nodes on the chunk boundaries are missing.
  1907  	incompletes := light.NewNodeSet()
  1908  	for i := range res.accounts {
  1909  		// If the filling was interrupted, mark everything after as incomplete
  1910  		if task.needCode[i] || task.needState[i] {
  1911  			for j := i; j < len(res.accounts); j++ {
  1912  				if err := res.trie.Prove(res.hashes[j][:], 0, incompletes); err != nil {
  1913  					panic(err) // Account range was already proven, what happened
  1914  				}
  1915  			}
  1916  			break
  1917  		}
  1918  		// Filling not interrupted until this point, mark incomplete if needs healing
  1919  		if task.needHeal[i] {
  1920  			if err := res.trie.Prove(res.hashes[i][:], 0, incompletes); err != nil {
  1921  				panic(err) // Account range was already proven, what happened
  1922  			}
  1923  		}
  1924  	}
  1925  	// Persist every finalized trie node that's not on the boundary
  1926  	batch := s.db.NewBatch()
  1927  
  1928  	var (
  1929  		nodes   int
  1930  		skipped int
  1931  		bytes   common.StorageSize
  1932  	)
  1933  	it := res.nodes.NewIterator(nil, nil)
  1934  	for it.Next() {
  1935  		// Boundary nodes are not written, since they are incomplete
  1936  		if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok {
  1937  			skipped++
  1938  			continue
  1939  		}
  1940  		// Overflow nodes are not written, since they mess with another task
  1941  		if _, err := res.overflow.Get(it.Key()); err == nil {
  1942  			skipped++
  1943  			continue
  1944  		}
  1945  		// Accounts with split storage requests are incomplete
  1946  		if _, err := incompletes.Get(it.Key()); err == nil {
  1947  			skipped++
  1948  			continue
  1949  		}
  1950  		// Node is neither a boundary, not an incomplete account, persist to disk
  1951  		batch.Put(it.Key(), it.Value())
  1952  		s.bloom.Add(it.Key())
  1953  
  1954  		bytes += common.StorageSize(common.HashLength + len(it.Value()))
  1955  		nodes++
  1956  	}
  1957  	it.Release()
  1958  
  1959  	if err := batch.Write(); err != nil {
  1960  		log.Crit("Failed to persist accounts", "err", err)
  1961  	}
  1962  	s.accountBytes += bytes
  1963  	s.accountSynced += uint64(len(res.accounts))
  1964  
  1965  	log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "nodes", nodes, "skipped", skipped, "bytes", bytes)
  1966  
  1967  	// Task filling persisted, push it the chunk marker forward to the first
  1968  	// account still missing data.
  1969  	for i, hash := range res.hashes {
  1970  		if task.needCode[i] || task.needState[i] {
  1971  			return
  1972  		}
  1973  		task.Next = common.BigToHash(new(big.Int).Add(hash.Big(), big.NewInt(1)))
  1974  	}
  1975  	// All accounts marked as complete, track if the entire task is done
  1976  	task.done = !res.cont
  1977  }
  1978  
  1979  // OnAccounts is a callback method to invoke when a range of accounts are
  1980  // received from a remote peer.
  1981  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  1982  	size := common.StorageSize(len(hashes) * common.HashLength)
  1983  	for _, account := range accounts {
  1984  		size += common.StorageSize(len(account))
  1985  	}
  1986  	for _, node := range proof {
  1987  		size += common.StorageSize(len(node))
  1988  	}
  1989  	logger := peer.Log().New("reqid", id)
  1990  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  1991  
  1992  	// Whether or not the response is valid, we can mark the peer as idle and
  1993  	// notify the scheduler to assign a new task. If the response is invalid,
  1994  	// we'll drop the peer in a bit.
  1995  	s.lock.Lock()
  1996  	if _, ok := s.peers[peer.ID()]; ok {
  1997  		s.accountIdlers[peer.ID()] = struct{}{}
  1998  	}
  1999  	select {
  2000  	case s.update <- struct{}{}:
  2001  	default:
  2002  	}
  2003  	// Ensure the response is for a valid request
  2004  	req, ok := s.accountReqs[id]
  2005  	if !ok {
  2006  		// Request stale, perhaps the peer timed out but came through in the end
  2007  		logger.Warn("Unexpected account range packet")
  2008  		s.lock.Unlock()
  2009  		return nil
  2010  	}
  2011  	delete(s.accountReqs, id)
  2012  
  2013  	// Clean up the request timeout timer, we'll see how to proceed further based
  2014  	// on the actual delivered content
  2015  	if !req.timeout.Stop() {
  2016  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2017  		s.lock.Unlock()
  2018  		return nil
  2019  	}
  2020  
  2021  	// Response is valid, but check if peer is signalling that it does not have
  2022  	// the requested data. For account range queries that means the state being
  2023  	// retrieved was either already pruned remotely, or the peer is not yet
  2024  	// synced to our head.
  2025  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2026  		logger.Debug("Peer rejected account range request", "root", s.root)
  2027  		s.statelessPeers[peer.ID()] = struct{}{}
  2028  		s.lock.Unlock()
  2029  
  2030  		// Signal this request as failed, and ready for rescheduling
  2031  		s.scheduleRevertAccountRequest(req)
  2032  		return nil
  2033  	}
  2034  	root := s.root
  2035  	s.lock.Unlock()
  2036  
  2037  	// Reconstruct a partial trie from the response and verify it
  2038  	keys := make([][]byte, len(hashes))
  2039  	for i, key := range hashes {
  2040  		keys[i] = common.CopyBytes(key[:])
  2041  	}
  2042  	nodes := make(light.NodeList, len(proof))
  2043  	for i, node := range proof {
  2044  		nodes[i] = node
  2045  	}
  2046  	proofdb := nodes.NodeSet()
  2047  
  2048  	var end []byte
  2049  	if len(keys) > 0 {
  2050  		end = keys[len(keys)-1]
  2051  	}
  2052  	db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2053  	if err != nil {
  2054  		logger.Warn("Account range failed proof", "err", err)
  2055  		// Signal this request as failed, and ready for rescheduling
  2056  		s.scheduleRevertAccountRequest(req)
  2057  		return err
  2058  	}
  2059  	// Partial trie reconstructed, send it to the scheduler for storage filling
  2060  	bounds := make(map[common.Hash]struct{})
  2061  
  2062  	it := notary.Accessed().NewIterator(nil, nil)
  2063  	for it.Next() {
  2064  		bounds[common.BytesToHash(it.Key())] = struct{}{}
  2065  	}
  2066  	it.Release()
  2067  
  2068  	accs := make([]*state.Account, len(accounts))
  2069  	for i, account := range accounts {
  2070  		acc := new(state.Account)
  2071  		if err := rlp.DecodeBytes(account, acc); err != nil {
  2072  			panic(err) // We created these blobs, we must be able to decode them
  2073  		}
  2074  		accs[i] = acc
  2075  	}
  2076  	response := &accountResponse{
  2077  		task:     req.task,
  2078  		hashes:   hashes,
  2079  		accounts: accs,
  2080  		nodes:    db,
  2081  		trie:     tr,
  2082  		bounds:   bounds,
  2083  		overflow: light.NewNodeSet(),
  2084  		cont:     cont,
  2085  	}
  2086  	select {
  2087  	case s.accountResps <- response:
  2088  	case <-req.cancel:
  2089  	case <-req.stale:
  2090  	}
  2091  	return nil
  2092  }
  2093  
  2094  // OnByteCodes is a callback method to invoke when a batch of contract
  2095  // bytes codes are received from a remote peer.
  2096  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2097  	s.lock.RLock()
  2098  	syncing := !s.snapped
  2099  	s.lock.RUnlock()
  2100  
  2101  	if syncing {
  2102  		return s.onByteCodes(peer, id, bytecodes)
  2103  	}
  2104  	return s.onHealByteCodes(peer, id, bytecodes)
  2105  }
  2106  
  2107  // onByteCodes is a callback method to invoke when a batch of contract
  2108  // bytes codes are received from a remote peer in the syncing phase.
  2109  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2110  	var size common.StorageSize
  2111  	for _, code := range bytecodes {
  2112  		size += common.StorageSize(len(code))
  2113  	}
  2114  	logger := peer.Log().New("reqid", id)
  2115  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2116  
  2117  	// Whether or not the response is valid, we can mark the peer as idle and
  2118  	// notify the scheduler to assign a new task. If the response is invalid,
  2119  	// we'll drop the peer in a bit.
  2120  	s.lock.Lock()
  2121  	if _, ok := s.peers[peer.ID()]; ok {
  2122  		s.bytecodeIdlers[peer.ID()] = struct{}{}
  2123  	}
  2124  	select {
  2125  	case s.update <- struct{}{}:
  2126  	default:
  2127  	}
  2128  	// Ensure the response is for a valid request
  2129  	req, ok := s.bytecodeReqs[id]
  2130  	if !ok {
  2131  		// Request stale, perhaps the peer timed out but came through in the end
  2132  		logger.Warn("Unexpected bytecode packet")
  2133  		s.lock.Unlock()
  2134  		return nil
  2135  	}
  2136  	delete(s.bytecodeReqs, id)
  2137  
  2138  	// Clean up the request timeout timer, we'll see how to proceed further based
  2139  	// on the actual delivered content
  2140  	if !req.timeout.Stop() {
  2141  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2142  		s.lock.Unlock()
  2143  		return nil
  2144  	}
  2145  
  2146  	// Response is valid, but check if peer is signalling that it does not have
  2147  	// the requested data. For bytecode range queries that means the peer is not
  2148  	// yet synced.
  2149  	if len(bytecodes) == 0 {
  2150  		logger.Debug("Peer rejected bytecode request")
  2151  		s.statelessPeers[peer.ID()] = struct{}{}
  2152  		s.lock.Unlock()
  2153  
  2154  		// Signal this request as failed, and ready for rescheduling
  2155  		s.scheduleRevertBytecodeRequest(req)
  2156  		return nil
  2157  	}
  2158  	s.lock.Unlock()
  2159  
  2160  	// Cross reference the requested bytecodes with the response to find gaps
  2161  	// that the serving node is missing
  2162  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2163  	hash := make([]byte, 32)
  2164  
  2165  	codes := make([][]byte, len(req.hashes))
  2166  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2167  		// Find the next hash that we've been served, leaving misses with nils
  2168  		hasher.Reset()
  2169  		hasher.Write(bytecodes[i])
  2170  		hasher.Read(hash)
  2171  
  2172  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2173  			j++
  2174  		}
  2175  		if j < len(req.hashes) {
  2176  			codes[j] = bytecodes[i]
  2177  			j++
  2178  			continue
  2179  		}
  2180  		// We've either ran out of hashes, or got unrequested data
  2181  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2182  		// Signal this request as failed, and ready for rescheduling
  2183  		s.scheduleRevertBytecodeRequest(req)
  2184  		return errors.New("unexpected bytecode")
  2185  	}
  2186  	// Response validated, send it to the scheduler for filling
  2187  	response := &bytecodeResponse{
  2188  		task:   req.task,
  2189  		hashes: req.hashes,
  2190  		codes:  codes,
  2191  	}
  2192  	select {
  2193  	case s.bytecodeResps <- response:
  2194  	case <-req.cancel:
  2195  	case <-req.stale:
  2196  	}
  2197  	return nil
  2198  }
  2199  
  2200  // OnStorage is a callback method to invoke when ranges of storage slots
  2201  // are received from a remote peer.
  2202  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2203  	// Gather some trace stats to aid in debugging issues
  2204  	var (
  2205  		hashCount int
  2206  		slotCount int
  2207  		size      common.StorageSize
  2208  	)
  2209  	for _, hashset := range hashes {
  2210  		size += common.StorageSize(common.HashLength * len(hashset))
  2211  		hashCount += len(hashset)
  2212  	}
  2213  	for _, slotset := range slots {
  2214  		for _, slot := range slotset {
  2215  			size += common.StorageSize(len(slot))
  2216  		}
  2217  		slotCount += len(slotset)
  2218  	}
  2219  	for _, node := range proof {
  2220  		size += common.StorageSize(len(node))
  2221  	}
  2222  	logger := peer.Log().New("reqid", id)
  2223  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2224  
  2225  	// Whether or not the response is valid, we can mark the peer as idle and
  2226  	// notify the scheduler to assign a new task. If the response is invalid,
  2227  	// we'll drop the peer in a bit.
  2228  	s.lock.Lock()
  2229  	if _, ok := s.peers[peer.ID()]; ok {
  2230  		s.storageIdlers[peer.ID()] = struct{}{}
  2231  	}
  2232  	select {
  2233  	case s.update <- struct{}{}:
  2234  	default:
  2235  	}
  2236  	// Ensure the response is for a valid request
  2237  	req, ok := s.storageReqs[id]
  2238  	if !ok {
  2239  		// Request stale, perhaps the peer timed out but came through in the end
  2240  		logger.Warn("Unexpected storage ranges packet")
  2241  		s.lock.Unlock()
  2242  		return nil
  2243  	}
  2244  	delete(s.storageReqs, id)
  2245  
  2246  	// Clean up the request timeout timer, we'll see how to proceed further based
  2247  	// on the actual delivered content
  2248  	if !req.timeout.Stop() {
  2249  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2250  		s.lock.Unlock()
  2251  		return nil
  2252  	}
  2253  
  2254  	// Reject the response if the hash sets and slot sets don't match, or if the
  2255  	// peer sent more data than requested.
  2256  	if len(hashes) != len(slots) {
  2257  		s.lock.Unlock()
  2258  		s.scheduleRevertStorageRequest(req) // reschedule request
  2259  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2260  		return errors.New("hash and slot set size mismatch")
  2261  	}
  2262  	if len(hashes) > len(req.accounts) {
  2263  		s.lock.Unlock()
  2264  		s.scheduleRevertStorageRequest(req) // reschedule request
  2265  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2266  		return errors.New("hash set larger than requested")
  2267  	}
  2268  	// Response is valid, but check if peer is signalling that it does not have
  2269  	// the requested data. For storage range queries that means the state being
  2270  	// retrieved was either already pruned remotely, or the peer is not yet
  2271  	// synced to our head.
  2272  	if len(hashes) == 0 {
  2273  		logger.Debug("Peer rejected storage request")
  2274  		s.statelessPeers[peer.ID()] = struct{}{}
  2275  		s.lock.Unlock()
  2276  		s.scheduleRevertStorageRequest(req) // reschedule request
  2277  		return nil
  2278  	}
  2279  	s.lock.Unlock()
  2280  
  2281  	// Reconstruct the partial tries from the response and verify them
  2282  	var (
  2283  		dbs    = make([]ethdb.KeyValueStore, len(hashes))
  2284  		tries  = make([]*trie.Trie, len(hashes))
  2285  		notary *trie.KeyValueNotary
  2286  		cont   bool
  2287  	)
  2288  	for i := 0; i < len(hashes); i++ {
  2289  		// Convert the keys and proofs into an internal format
  2290  		keys := make([][]byte, len(hashes[i]))
  2291  		for j, key := range hashes[i] {
  2292  			keys[j] = common.CopyBytes(key[:])
  2293  		}
  2294  		nodes := make(light.NodeList, 0, len(proof))
  2295  		if i == len(hashes)-1 {
  2296  			for _, node := range proof {
  2297  				nodes = append(nodes, node)
  2298  			}
  2299  		}
  2300  		var err error
  2301  		if len(nodes) == 0 {
  2302  			// No proof has been attached, the response must cover the entire key
  2303  			// space and hash to the origin root.
  2304  			dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2305  			if err != nil {
  2306  				s.scheduleRevertStorageRequest(req) // reschedule request
  2307  				logger.Warn("Storage slots failed proof", "err", err)
  2308  				return err
  2309  			}
  2310  		} else {
  2311  			// A proof was attached, the response is only partial, check that the
  2312  			// returned data is indeed part of the storage trie
  2313  			proofdb := nodes.NodeSet()
  2314  
  2315  			var end []byte
  2316  			if len(keys) > 0 {
  2317  				end = keys[len(keys)-1]
  2318  			}
  2319  			dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2320  			if err != nil {
  2321  				s.scheduleRevertStorageRequest(req) // reschedule request
  2322  				logger.Warn("Storage range failed proof", "err", err)
  2323  				return err
  2324  			}
  2325  		}
  2326  	}
  2327  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2328  	bounds := make(map[common.Hash]struct{})
  2329  
  2330  	if notary != nil { // if all contract storages are delivered in full, no notary will be created
  2331  		it := notary.Accessed().NewIterator(nil, nil)
  2332  		for it.Next() {
  2333  			bounds[common.BytesToHash(it.Key())] = struct{}{}
  2334  		}
  2335  		it.Release()
  2336  	}
  2337  	response := &storageResponse{
  2338  		mainTask: req.mainTask,
  2339  		subTask:  req.subTask,
  2340  		accounts: req.accounts,
  2341  		roots:    req.roots,
  2342  		hashes:   hashes,
  2343  		slots:    slots,
  2344  		nodes:    dbs,
  2345  		tries:    tries,
  2346  		bounds:   bounds,
  2347  		overflow: light.NewNodeSet(),
  2348  		cont:     cont,
  2349  	}
  2350  	select {
  2351  	case s.storageResps <- response:
  2352  	case <-req.cancel:
  2353  	case <-req.stale:
  2354  	}
  2355  	return nil
  2356  }
  2357  
  2358  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2359  // are received from a remote peer.
  2360  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2361  	var size common.StorageSize
  2362  	for _, node := range trienodes {
  2363  		size += common.StorageSize(len(node))
  2364  	}
  2365  	logger := peer.Log().New("reqid", id)
  2366  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2367  
  2368  	// Whether or not the response is valid, we can mark the peer as idle and
  2369  	// notify the scheduler to assign a new task. If the response is invalid,
  2370  	// we'll drop the peer in a bit.
  2371  	s.lock.Lock()
  2372  	if _, ok := s.peers[peer.ID()]; ok {
  2373  		s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2374  	}
  2375  	select {
  2376  	case s.update <- struct{}{}:
  2377  	default:
  2378  	}
  2379  	// Ensure the response is for a valid request
  2380  	req, ok := s.trienodeHealReqs[id]
  2381  	if !ok {
  2382  		// Request stale, perhaps the peer timed out but came through in the end
  2383  		logger.Warn("Unexpected trienode heal packet")
  2384  		s.lock.Unlock()
  2385  		return nil
  2386  	}
  2387  	delete(s.trienodeHealReqs, id)
  2388  
  2389  	// Clean up the request timeout timer, we'll see how to proceed further based
  2390  	// on the actual delivered content
  2391  	if !req.timeout.Stop() {
  2392  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2393  		s.lock.Unlock()
  2394  		return nil
  2395  	}
  2396  
  2397  	// Response is valid, but check if peer is signalling that it does not have
  2398  	// the requested data. For bytecode range queries that means the peer is not
  2399  	// yet synced.
  2400  	if len(trienodes) == 0 {
  2401  		logger.Debug("Peer rejected trienode heal request")
  2402  		s.statelessPeers[peer.ID()] = struct{}{}
  2403  		s.lock.Unlock()
  2404  
  2405  		// Signal this request as failed, and ready for rescheduling
  2406  		s.scheduleRevertTrienodeHealRequest(req)
  2407  		return nil
  2408  	}
  2409  	s.lock.Unlock()
  2410  
  2411  	// Cross reference the requested trienodes with the response to find gaps
  2412  	// that the serving node is missing
  2413  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2414  	hash := make([]byte, 32)
  2415  
  2416  	nodes := make([][]byte, len(req.hashes))
  2417  	for i, j := 0, 0; i < len(trienodes); i++ {
  2418  		// Find the next hash that we've been served, leaving misses with nils
  2419  		hasher.Reset()
  2420  		hasher.Write(trienodes[i])
  2421  		hasher.Read(hash)
  2422  
  2423  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2424  			j++
  2425  		}
  2426  		if j < len(req.hashes) {
  2427  			nodes[j] = trienodes[i]
  2428  			j++
  2429  			continue
  2430  		}
  2431  		// We've either ran out of hashes, or got unrequested data
  2432  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2433  		// Signal this request as failed, and ready for rescheduling
  2434  		s.scheduleRevertTrienodeHealRequest(req)
  2435  		return errors.New("unexpected healing trienode")
  2436  	}
  2437  	// Response validated, send it to the scheduler for filling
  2438  	response := &trienodeHealResponse{
  2439  		task:   req.task,
  2440  		hashes: req.hashes,
  2441  		paths:  req.paths,
  2442  		nodes:  nodes,
  2443  	}
  2444  	select {
  2445  	case s.trienodeHealResps <- response:
  2446  	case <-req.cancel:
  2447  	case <-req.stale:
  2448  	}
  2449  	return nil
  2450  }
  2451  
  2452  // onHealByteCodes is a callback method to invoke when a batch of contract
  2453  // bytes codes are received from a remote peer in the healing phase.
  2454  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2455  	var size common.StorageSize
  2456  	for _, code := range bytecodes {
  2457  		size += common.StorageSize(len(code))
  2458  	}
  2459  	logger := peer.Log().New("reqid", id)
  2460  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2461  
  2462  	// Whether or not the response is valid, we can mark the peer as idle and
  2463  	// notify the scheduler to assign a new task. If the response is invalid,
  2464  	// we'll drop the peer in a bit.
  2465  	s.lock.Lock()
  2466  	if _, ok := s.peers[peer.ID()]; ok {
  2467  		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2468  	}
  2469  	select {
  2470  	case s.update <- struct{}{}:
  2471  	default:
  2472  	}
  2473  	// Ensure the response is for a valid request
  2474  	req, ok := s.bytecodeHealReqs[id]
  2475  	if !ok {
  2476  		// Request stale, perhaps the peer timed out but came through in the end
  2477  		logger.Warn("Unexpected bytecode heal packet")
  2478  		s.lock.Unlock()
  2479  		return nil
  2480  	}
  2481  	delete(s.bytecodeHealReqs, id)
  2482  
  2483  	// Clean up the request timeout timer, we'll see how to proceed further based
  2484  	// on the actual delivered content
  2485  	if !req.timeout.Stop() {
  2486  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2487  		s.lock.Unlock()
  2488  		return nil
  2489  	}
  2490  
  2491  	// Response is valid, but check if peer is signalling that it does not have
  2492  	// the requested data. For bytecode range queries that means the peer is not
  2493  	// yet synced.
  2494  	if len(bytecodes) == 0 {
  2495  		logger.Debug("Peer rejected bytecode heal request")
  2496  		s.statelessPeers[peer.ID()] = struct{}{}
  2497  		s.lock.Unlock()
  2498  
  2499  		// Signal this request as failed, and ready for rescheduling
  2500  		s.scheduleRevertBytecodeHealRequest(req)
  2501  		return nil
  2502  	}
  2503  	s.lock.Unlock()
  2504  
  2505  	// Cross reference the requested bytecodes with the response to find gaps
  2506  	// that the serving node is missing
  2507  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2508  	hash := make([]byte, 32)
  2509  
  2510  	codes := make([][]byte, len(req.hashes))
  2511  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2512  		// Find the next hash that we've been served, leaving misses with nils
  2513  		hasher.Reset()
  2514  		hasher.Write(bytecodes[i])
  2515  		hasher.Read(hash)
  2516  
  2517  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2518  			j++
  2519  		}
  2520  		if j < len(req.hashes) {
  2521  			codes[j] = bytecodes[i]
  2522  			j++
  2523  			continue
  2524  		}
  2525  		// We've either ran out of hashes, or got unrequested data
  2526  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2527  		// Signal this request as failed, and ready for rescheduling
  2528  		s.scheduleRevertBytecodeHealRequest(req)
  2529  		return errors.New("unexpected healing bytecode")
  2530  	}
  2531  	// Response validated, send it to the scheduler for filling
  2532  	response := &bytecodeHealResponse{
  2533  		task:   req.task,
  2534  		hashes: req.hashes,
  2535  		codes:  codes,
  2536  	}
  2537  	select {
  2538  	case s.bytecodeHealResps <- response:
  2539  	case <-req.cancel:
  2540  	case <-req.stale:
  2541  	}
  2542  	return nil
  2543  }
  2544  
  2545  // hashSpace is the total size of the 256 bit hash space for accounts.
  2546  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2547  
  2548  // report calculates various status reports and provides it to the user.
  2549  func (s *Syncer) report(force bool) {
  2550  	if len(s.tasks) > 0 {
  2551  		s.reportSyncProgress(force)
  2552  		return
  2553  	}
  2554  	s.reportHealProgress(force)
  2555  }
  2556  
  2557  // reportSyncProgress calculates various status reports and provides it to the user.
  2558  func (s *Syncer) reportSyncProgress(force bool) {
  2559  	// Don't report all the events, just occasionally
  2560  	if !force && time.Since(s.logTime) < 3*time.Second {
  2561  		return
  2562  	}
  2563  	// Don't report anything until we have a meaningful progress
  2564  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2565  	if synced == 0 {
  2566  		return
  2567  	}
  2568  	accountGaps := new(big.Int)
  2569  	for _, task := range s.tasks {
  2570  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2571  	}
  2572  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2573  	if accountFills.BitLen() == 0 {
  2574  		return
  2575  	}
  2576  	s.logTime = time.Now()
  2577  	estBytes := float64(new(big.Int).Div(
  2578  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2579  		accountFills,
  2580  	).Uint64())
  2581  
  2582  	elapsed := time.Since(s.startTime)
  2583  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2584  
  2585  	// Create a mega progress report
  2586  	var (
  2587  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2588  		accounts = fmt.Sprintf("%d@%v", s.accountSynced, s.accountBytes.TerminalString())
  2589  		storage  = fmt.Sprintf("%d@%v", s.storageSynced, s.storageBytes.TerminalString())
  2590  		bytecode = fmt.Sprintf("%d@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString())
  2591  	)
  2592  	log.Info("State sync in progress", "synced", progress, "state", synced,
  2593  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2594  }
  2595  
  2596  // reportHealProgress calculates various status reports and provides it to the user.
  2597  func (s *Syncer) reportHealProgress(force bool) {
  2598  	// Don't report all the events, just occasionally
  2599  	if !force && time.Since(s.logTime) < 3*time.Second {
  2600  		return
  2601  	}
  2602  	s.logTime = time.Now()
  2603  
  2604  	// Create a mega progress report
  2605  	var (
  2606  		trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString())
  2607  		bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString())
  2608  	)
  2609  	log.Info("State heal in progress", "nodes", trienode, "codes", bytecode,
  2610  		"pending", s.healer.scheduler.Pending())
  2611  }