github.com/MikyChow/arbitrum-go-ethereum@v0.0.0-20230306102812-078da49636de/eth/protocols/snap/sync.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package snap
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	gomath "math"
    25  	"math/big"
    26  	"math/rand"
    27  	"sort"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"github.com/MikyChow/arbitrum-go-ethereum/common"
    33  	"github.com/MikyChow/arbitrum-go-ethereum/common/math"
    34  	"github.com/MikyChow/arbitrum-go-ethereum/core/rawdb"
    35  	"github.com/MikyChow/arbitrum-go-ethereum/core/state"
    36  	"github.com/MikyChow/arbitrum-go-ethereum/core/state/snapshot"
    37  	"github.com/MikyChow/arbitrum-go-ethereum/core/types"
    38  	"github.com/MikyChow/arbitrum-go-ethereum/crypto"
    39  	"github.com/MikyChow/arbitrum-go-ethereum/ethdb"
    40  	"github.com/MikyChow/arbitrum-go-ethereum/event"
    41  	"github.com/MikyChow/arbitrum-go-ethereum/light"
    42  	"github.com/MikyChow/arbitrum-go-ethereum/log"
    43  	"github.com/MikyChow/arbitrum-go-ethereum/p2p/msgrate"
    44  	"github.com/MikyChow/arbitrum-go-ethereum/rlp"
    45  	"github.com/MikyChow/arbitrum-go-ethereum/trie"
    46  	"golang.org/x/crypto/sha3"
    47  )
    48  
    49  var (
    50  	// emptyRoot is the known root hash of an empty trie.
    51  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    52  
    53  	// emptyCode is the known hash of the empty EVM bytecode.
    54  	emptyCode = crypto.Keccak256Hash(nil)
    55  )
    56  
    57  const (
    58  	// minRequestSize is the minimum number of bytes to request from a remote peer.
    59  	// This number is used as the low cap for account and storage range requests.
    60  	// Bytecode and trienode are limited inherently by item count (1).
    61  	minRequestSize = 64 * 1024
    62  
    63  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    64  	// This number is used as the high cap for account and storage range requests.
    65  	// Bytecode and trienode are limited more explicitly by the caps below.
    66  	maxRequestSize = 512 * 1024
    67  
    68  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    69  	// single query. If this number is too low, we're not filling responses fully
    70  	// and waste round trip times. If it's too high, we're capping responses and
    71  	// waste bandwidth.
    72  	//
    73  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    74  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    75  	// come close to that, requesting 4x should be a good approximation.
    76  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    77  
    78  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    79  	// a single query. If this number is too low, we're not filling responses fully
    80  	// and waste round trip times. If it's too high, we're capping responses and
    81  	// waste bandwidth.
    82  	maxTrieRequestCount = maxRequestSize / 512
    83  
    84  	// trienodeHealRateMeasurementImpact is the impact a single measurement has on
    85  	// the local node's trienode processing capacity. A value closer to 0 reacts
    86  	// slower to sudden changes, but it is also more stable against temporary hiccups.
    87  	trienodeHealRateMeasurementImpact = 0.005
    88  
    89  	// minTrienodeHealThrottle is the minimum divisor for throttling trie node
    90  	// heal requests to avoid overloading the local node and exessively expanding
    91  	// the state trie bedth wise.
    92  	minTrienodeHealThrottle = 1
    93  
    94  	// maxTrienodeHealThrottle is the maximum divisor for throttling trie node
    95  	// heal requests to avoid overloading the local node and exessively expanding
    96  	// the state trie bedth wise.
    97  	maxTrienodeHealThrottle = maxTrieRequestCount
    98  
    99  	// trienodeHealThrottleIncrease is the multiplier for the throttle when the
   100  	// rate of arriving data is higher than the rate of processing it.
   101  	trienodeHealThrottleIncrease = 1.33
   102  
   103  	// trienodeHealThrottleDecrease is the divisor for the throttle when the
   104  	// rate of arriving data is lower than the rate of processing it.
   105  	trienodeHealThrottleDecrease = 1.25
   106  )
   107  
   108  var (
   109  	// accountConcurrency is the number of chunks to split the account trie into
   110  	// to allow concurrent retrievals.
   111  	accountConcurrency = 16
   112  
   113  	// storageConcurrency is the number of chunks to split the a large contract
   114  	// storage trie into to allow concurrent retrievals.
   115  	storageConcurrency = 16
   116  )
   117  
   118  // ErrCancelled is returned from snap syncing if the operation was prematurely
   119  // terminated.
   120  var ErrCancelled = errors.New("sync cancelled")
   121  
   122  // accountRequest tracks a pending account range request to ensure responses are
   123  // to actual requests and to validate any security constraints.
   124  //
   125  // Concurrency note: account requests and responses are handled concurrently from
   126  // the main runloop to allow Merkle proof verifications on the peer's thread and
   127  // to drop on invalid response. The request struct must contain all the data to
   128  // construct the response without accessing runloop internals (i.e. task). That
   129  // is only included to allow the runloop to match a response to the task being
   130  // synced without having yet another set of maps.
   131  type accountRequest struct {
   132  	peer string    // Peer to which this request is assigned
   133  	id   uint64    // Request ID of this request
   134  	time time.Time // Timestamp when the request was sent
   135  
   136  	deliver chan *accountResponse // Channel to deliver successful response on
   137  	revert  chan *accountRequest  // Channel to deliver request failure on
   138  	cancel  chan struct{}         // Channel to track sync cancellation
   139  	timeout *time.Timer           // Timer to track delivery timeout
   140  	stale   chan struct{}         // Channel to signal the request was dropped
   141  
   142  	origin common.Hash // First account requested to allow continuation checks
   143  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   144  
   145  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   146  }
   147  
   148  // accountResponse is an already Merkle-verified remote response to an account
   149  // range request. It contains the subtrie for the requested account range and
   150  // the database that's going to be filled with the internal nodes on commit.
   151  type accountResponse struct {
   152  	task *accountTask // Task which this request is filling
   153  
   154  	hashes   []common.Hash         // Account hashes in the returned range
   155  	accounts []*types.StateAccount // Expanded accounts in the returned range
   156  
   157  	cont bool // Whether the account range has a continuation
   158  }
   159  
   160  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   161  // actual requests and to validate any security constraints.
   162  //
   163  // Concurrency note: bytecode requests and responses are handled concurrently from
   164  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   165  // to drop on invalid response. The request struct must contain all the data to
   166  // construct the response without accessing runloop internals (i.e. task). That
   167  // is only included to allow the runloop to match a response to the task being
   168  // synced without having yet another set of maps.
   169  type bytecodeRequest struct {
   170  	peer string    // Peer to which this request is assigned
   171  	id   uint64    // Request ID of this request
   172  	time time.Time // Timestamp when the request was sent
   173  
   174  	deliver chan *bytecodeResponse // Channel to deliver successful response on
   175  	revert  chan *bytecodeRequest  // Channel to deliver request failure on
   176  	cancel  chan struct{}          // Channel to track sync cancellation
   177  	timeout *time.Timer            // Timer to track delivery timeout
   178  	stale   chan struct{}          // Channel to signal the request was dropped
   179  
   180  	hashes []common.Hash // Bytecode hashes to validate responses
   181  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   182  }
   183  
   184  // bytecodeResponse is an already verified remote response to a bytecode request.
   185  type bytecodeResponse struct {
   186  	task *accountTask // Task which this request is filling
   187  
   188  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   189  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   190  }
   191  
   192  // storageRequest tracks a pending storage ranges request to ensure responses are
   193  // to actual requests and to validate any security constraints.
   194  //
   195  // Concurrency note: storage requests and responses are handled concurrently from
   196  // the main runloop to allow Merkle proof verifications on the peer's thread and
   197  // to drop on invalid response. The request struct must contain all the data to
   198  // construct the response without accessing runloop internals (i.e. tasks). That
   199  // is only included to allow the runloop to match a response to the task being
   200  // synced without having yet another set of maps.
   201  type storageRequest struct {
   202  	peer string    // Peer to which this request is assigned
   203  	id   uint64    // Request ID of this request
   204  	time time.Time // Timestamp when the request was sent
   205  
   206  	deliver chan *storageResponse // Channel to deliver successful response on
   207  	revert  chan *storageRequest  // Channel to deliver request failure on
   208  	cancel  chan struct{}         // Channel to track sync cancellation
   209  	timeout *time.Timer           // Timer to track delivery timeout
   210  	stale   chan struct{}         // Channel to signal the request was dropped
   211  
   212  	accounts []common.Hash // Account hashes to validate responses
   213  	roots    []common.Hash // Storage roots to validate responses
   214  
   215  	origin common.Hash // First storage slot requested to allow continuation checks
   216  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   217  
   218  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   219  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   220  }
   221  
   222  // storageResponse is an already Merkle-verified remote response to a storage
   223  // range request. It contains the subtries for the requested storage ranges and
   224  // the databases that's going to be filled with the internal nodes on commit.
   225  type storageResponse struct {
   226  	mainTask *accountTask // Task which this response belongs to
   227  	subTask  *storageTask // Task which this response is filling
   228  
   229  	accounts []common.Hash // Account hashes requested, may be only partially filled
   230  	roots    []common.Hash // Storage roots requested, may be only partially filled
   231  
   232  	hashes [][]common.Hash // Storage slot hashes in the returned range
   233  	slots  [][][]byte      // Storage slot values in the returned range
   234  
   235  	cont bool // Whether the last storage range has a continuation
   236  }
   237  
   238  // trienodeHealRequest tracks a pending state trie request to ensure responses
   239  // are to actual requests and to validate any security constraints.
   240  //
   241  // Concurrency note: trie node requests and responses are handled concurrently from
   242  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   243  // to drop on invalid response. The request struct must contain all the data to
   244  // construct the response without accessing runloop internals (i.e. task). That
   245  // is only included to allow the runloop to match a response to the task being
   246  // synced without having yet another set of maps.
   247  type trienodeHealRequest struct {
   248  	peer string    // Peer to which this request is assigned
   249  	id   uint64    // Request ID of this request
   250  	time time.Time // Timestamp when the request was sent
   251  
   252  	deliver chan *trienodeHealResponse // Channel to deliver successful response on
   253  	revert  chan *trienodeHealRequest  // Channel to deliver request failure on
   254  	cancel  chan struct{}              // Channel to track sync cancellation
   255  	timeout *time.Timer                // Timer to track delivery timeout
   256  	stale   chan struct{}              // Channel to signal the request was dropped
   257  
   258  	paths  []string      // Trie node paths for identifying trie node
   259  	hashes []common.Hash // Trie node hashes to validate responses
   260  
   261  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   262  }
   263  
   264  // trienodeHealResponse is an already verified remote response to a trie node request.
   265  type trienodeHealResponse struct {
   266  	task *healTask // Task which this request is filling
   267  
   268  	paths  []string      // Paths of the trie nodes
   269  	hashes []common.Hash // Hashes of the trie nodes to avoid double hashing
   270  	nodes  [][]byte      // Actual trie nodes to store into the database (nil = missing)
   271  }
   272  
   273  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   274  // actual requests and to validate any security constraints.
   275  //
   276  // Concurrency note: bytecode requests and responses are handled concurrently from
   277  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   278  // to drop on invalid response. The request struct must contain all the data to
   279  // construct the response without accessing runloop internals (i.e. task). That
   280  // is only included to allow the runloop to match a response to the task being
   281  // synced without having yet another set of maps.
   282  type bytecodeHealRequest struct {
   283  	peer string    // Peer to which this request is assigned
   284  	id   uint64    // Request ID of this request
   285  	time time.Time // Timestamp when the request was sent
   286  
   287  	deliver chan *bytecodeHealResponse // Channel to deliver successful response on
   288  	revert  chan *bytecodeHealRequest  // Channel to deliver request failure on
   289  	cancel  chan struct{}              // Channel to track sync cancellation
   290  	timeout *time.Timer                // Timer to track delivery timeout
   291  	stale   chan struct{}              // Channel to signal the request was dropped
   292  
   293  	hashes []common.Hash // Bytecode hashes to validate responses
   294  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   295  }
   296  
   297  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   298  type bytecodeHealResponse struct {
   299  	task *healTask // Task which this request is filling
   300  
   301  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   302  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   303  }
   304  
   305  // accountTask represents the sync task for a chunk of the account snapshot.
   306  type accountTask struct {
   307  	// These fields get serialized to leveldb on shutdown
   308  	Next     common.Hash                    // Next account to sync in this interval
   309  	Last     common.Hash                    // Last account to sync in this interval
   310  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   311  
   312  	// These fields are internals used during runtime
   313  	req  *accountRequest  // Pending request to fill this task
   314  	res  *accountResponse // Validate response filling this task
   315  	pend int              // Number of pending subtasks for this round
   316  
   317  	needCode  []bool // Flags whether the filling accounts need code retrieval
   318  	needState []bool // Flags whether the filling accounts need storage retrieval
   319  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   320  
   321  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   322  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   323  
   324  	genBatch ethdb.Batch     // Batch used by the node generator
   325  	genTrie  *trie.StackTrie // Node generator from storage slots
   326  
   327  	done bool // Flag whether the task can be removed
   328  }
   329  
   330  // storageTask represents the sync task for a chunk of the storage snapshot.
   331  type storageTask struct {
   332  	Next common.Hash // Next account to sync in this interval
   333  	Last common.Hash // Last account to sync in this interval
   334  
   335  	// These fields are internals used during runtime
   336  	root common.Hash     // Storage root hash for this instance
   337  	req  *storageRequest // Pending request to fill this task
   338  
   339  	genBatch ethdb.Batch     // Batch used by the node generator
   340  	genTrie  *trie.StackTrie // Node generator from storage slots
   341  
   342  	done bool // Flag whether the task can be removed
   343  }
   344  
   345  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   346  type healTask struct {
   347  	scheduler *trie.Sync // State trie sync scheduler defining the tasks
   348  
   349  	trieTasks map[string]common.Hash   // Set of trie node tasks currently queued for retrieval, indexed by node path
   350  	codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash
   351  }
   352  
   353  // SyncProgress is a database entry to allow suspending and resuming a snapshot state
   354  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   355  // snap sync without prior knowledge of the suspension point.
   356  type SyncProgress struct {
   357  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   358  
   359  	// Status report during syncing phase
   360  	AccountSynced  uint64             // Number of accounts downloaded
   361  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   362  	BytecodeSynced uint64             // Number of bytecodes downloaded
   363  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   364  	StorageSynced  uint64             // Number of storage slots downloaded
   365  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   366  
   367  	// Status report during healing phase
   368  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   369  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   370  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   371  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   372  }
   373  
   374  // SyncPending is analogous to SyncProgress, but it's used to report on pending
   375  // ephemeral sync progress that doesn't get persisted into the database.
   376  type SyncPending struct {
   377  	TrienodeHeal uint64 // Number of state trie nodes pending
   378  	BytecodeHeal uint64 // Number of bytecodes pending
   379  }
   380  
   381  // SyncPeer abstracts out the methods required for a peer to be synced against
   382  // with the goal of allowing the construction of mock peers without the full
   383  // blown networking.
   384  type SyncPeer interface {
   385  	// ID retrieves the peer's unique identifier.
   386  	ID() string
   387  
   388  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   389  	// trie, starting with the origin.
   390  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   391  
   392  	// RequestStorageRanges fetches a batch of storage slots belonging to one or
   393  	// more accounts. If slots from only one account is requested, an origin marker
   394  	// may also be used to retrieve from there.
   395  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   396  
   397  	// RequestByteCodes fetches a batch of bytecodes by hash.
   398  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   399  
   400  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   401  	// a specific state trie.
   402  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   403  
   404  	// Log retrieves the peer's own contextual logger.
   405  	Log() log.Logger
   406  }
   407  
   408  // Syncer is an Ethereum account and storage trie syncer based on snapshots and
   409  // the  snap protocol. It's purpose is to download all the accounts and storage
   410  // slots from remote peers and reassemble chunks of the state trie, on top of
   411  // which a state sync can be run to fix any gaps / overlaps.
   412  //
   413  // Every network request has a variety of failure events:
   414  //   - The peer disconnects after task assignment, failing to send the request
   415  //   - The peer disconnects after sending the request, before delivering on it
   416  //   - The peer remains connected, but does not deliver a response in time
   417  //   - The peer delivers a stale response after a previous timeout
   418  //   - The peer delivers a refusal to serve the requested state
   419  type Syncer struct {
   420  	db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup)
   421  
   422  	root    common.Hash    // Current state trie root being synced
   423  	tasks   []*accountTask // Current account task set being synced
   424  	snapped bool           // Flag to signal that snap phase is done
   425  	healer  *healTask      // Current state healing task being executed
   426  	update  chan struct{}  // Notification channel for possible sync progression
   427  
   428  	peers    map[string]SyncPeer // Currently active peers to download from
   429  	peerJoin *event.Feed         // Event feed to react to peers joining
   430  	peerDrop *event.Feed         // Event feed to react to peers dropping
   431  	rates    *msgrate.Trackers   // Message throughput rates for peers
   432  
   433  	// Request tracking during syncing phase
   434  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   435  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   436  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   437  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   438  
   439  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   440  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   441  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   442  
   443  	accountSynced  uint64             // Number of accounts downloaded
   444  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   445  	bytecodeSynced uint64             // Number of bytecodes downloaded
   446  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   447  	storageSynced  uint64             // Number of storage slots downloaded
   448  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   449  
   450  	extProgress *SyncProgress // progress that can be exposed to external caller.
   451  
   452  	// Request tracking during healing phase
   453  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   454  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   455  
   456  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   457  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   458  
   459  	trienodeHealRate      float64   // Average heal rate for processing trie node data
   460  	trienodeHealPend      uint64    // Number of trie nodes currently pending for processing
   461  	trienodeHealThrottle  float64   // Divisor for throttling the amount of trienode heal data requested
   462  	trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated
   463  
   464  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   465  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   466  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   467  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   468  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   469  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   470  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   471  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   472  
   473  	stateWriter        ethdb.Batch        // Shared batch writer used for persisting raw states
   474  	accountHealed      uint64             // Number of accounts downloaded during the healing stage
   475  	accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage
   476  	storageHealed      uint64             // Number of storage slots downloaded during the healing stage
   477  	storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage
   478  
   479  	startTime time.Time // Time instance when snapshot sync started
   480  	logTime   time.Time // Time instance when status was last reported
   481  
   482  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   483  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   484  }
   485  
   486  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   487  // snap protocol.
   488  func NewSyncer(db ethdb.KeyValueStore) *Syncer {
   489  	return &Syncer{
   490  		db: db,
   491  
   492  		peers:    make(map[string]SyncPeer),
   493  		peerJoin: new(event.Feed),
   494  		peerDrop: new(event.Feed),
   495  		rates:    msgrate.NewTrackers(log.New("proto", "snap")),
   496  		update:   make(chan struct{}, 1),
   497  
   498  		accountIdlers:  make(map[string]struct{}),
   499  		storageIdlers:  make(map[string]struct{}),
   500  		bytecodeIdlers: make(map[string]struct{}),
   501  
   502  		accountReqs:  make(map[uint64]*accountRequest),
   503  		storageReqs:  make(map[uint64]*storageRequest),
   504  		bytecodeReqs: make(map[uint64]*bytecodeRequest),
   505  
   506  		trienodeHealIdlers: make(map[string]struct{}),
   507  		bytecodeHealIdlers: make(map[string]struct{}),
   508  
   509  		trienodeHealReqs:     make(map[uint64]*trienodeHealRequest),
   510  		bytecodeHealReqs:     make(map[uint64]*bytecodeHealRequest),
   511  		trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk
   512  		stateWriter:          db.NewBatch(),
   513  
   514  		extProgress: new(SyncProgress),
   515  	}
   516  }
   517  
   518  // Register injects a new data source into the syncer's peerset.
   519  func (s *Syncer) Register(peer SyncPeer) error {
   520  	// Make sure the peer is not registered yet
   521  	id := peer.ID()
   522  
   523  	s.lock.Lock()
   524  	if _, ok := s.peers[id]; ok {
   525  		log.Error("Snap peer already registered", "id", id)
   526  
   527  		s.lock.Unlock()
   528  		return errors.New("already registered")
   529  	}
   530  	s.peers[id] = peer
   531  	s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip()))
   532  
   533  	// Mark the peer as idle, even if no sync is running
   534  	s.accountIdlers[id] = struct{}{}
   535  	s.storageIdlers[id] = struct{}{}
   536  	s.bytecodeIdlers[id] = struct{}{}
   537  	s.trienodeHealIdlers[id] = struct{}{}
   538  	s.bytecodeHealIdlers[id] = struct{}{}
   539  	s.lock.Unlock()
   540  
   541  	// Notify any active syncs that a new peer can be assigned data
   542  	s.peerJoin.Send(id)
   543  	return nil
   544  }
   545  
   546  // Unregister injects a new data source into the syncer's peerset.
   547  func (s *Syncer) Unregister(id string) error {
   548  	// Remove all traces of the peer from the registry
   549  	s.lock.Lock()
   550  	if _, ok := s.peers[id]; !ok {
   551  		log.Error("Snap peer not registered", "id", id)
   552  
   553  		s.lock.Unlock()
   554  		return errors.New("not registered")
   555  	}
   556  	delete(s.peers, id)
   557  	s.rates.Untrack(id)
   558  
   559  	// Remove status markers, even if no sync is running
   560  	delete(s.statelessPeers, id)
   561  
   562  	delete(s.accountIdlers, id)
   563  	delete(s.storageIdlers, id)
   564  	delete(s.bytecodeIdlers, id)
   565  	delete(s.trienodeHealIdlers, id)
   566  	delete(s.bytecodeHealIdlers, id)
   567  	s.lock.Unlock()
   568  
   569  	// Notify any active syncs that pending requests need to be reverted
   570  	s.peerDrop.Send(id)
   571  	return nil
   572  }
   573  
   574  // Sync starts (or resumes a previous) sync cycle to iterate over a state trie
   575  // with the given root and reconstruct the nodes based on the snapshot leaves.
   576  // Previously downloaded segments will not be redownloaded of fixed, rather any
   577  // errors will be healed after the leaves are fully accumulated.
   578  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   579  	// Move the trie root from any previous value, revert stateless markers for
   580  	// any peers and initialize the syncer if it was not yet run
   581  	s.lock.Lock()
   582  	s.root = root
   583  	s.healer = &healTask{
   584  		scheduler: state.NewStateSync(root, s.db, s.onHealState),
   585  		trieTasks: make(map[string]common.Hash),
   586  		codeTasks: make(map[common.Hash]struct{}),
   587  	}
   588  	s.statelessPeers = make(map[string]struct{})
   589  	s.lock.Unlock()
   590  
   591  	if s.startTime == (time.Time{}) {
   592  		s.startTime = time.Now()
   593  	}
   594  	// Retrieve the previous sync status from LevelDB and abort if already synced
   595  	s.loadSyncStatus()
   596  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   597  		log.Debug("Snapshot sync already completed")
   598  		return nil
   599  	}
   600  	defer func() { // Persist any progress, independent of failure
   601  		for _, task := range s.tasks {
   602  			s.forwardAccountTask(task)
   603  		}
   604  		s.cleanAccountTasks()
   605  		s.saveSyncStatus()
   606  	}()
   607  
   608  	log.Debug("Starting snapshot sync cycle", "root", root)
   609  
   610  	// Flush out the last committed raw states
   611  	defer func() {
   612  		if s.stateWriter.ValueSize() > 0 {
   613  			s.stateWriter.Write()
   614  			s.stateWriter.Reset()
   615  		}
   616  	}()
   617  	defer s.report(true)
   618  
   619  	// Whether sync completed or not, disregard any future packets
   620  	defer func() {
   621  		log.Debug("Terminating snapshot sync cycle", "root", root)
   622  		s.lock.Lock()
   623  		s.accountReqs = make(map[uint64]*accountRequest)
   624  		s.storageReqs = make(map[uint64]*storageRequest)
   625  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   626  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   627  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   628  		s.lock.Unlock()
   629  	}()
   630  	// Keep scheduling sync tasks
   631  	peerJoin := make(chan string, 16)
   632  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   633  	defer peerJoinSub.Unsubscribe()
   634  
   635  	peerDrop := make(chan string, 16)
   636  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   637  	defer peerDropSub.Unsubscribe()
   638  
   639  	// Create a set of unique channels for this sync cycle. We need these to be
   640  	// ephemeral so a data race doesn't accidentally deliver something stale on
   641  	// a persistent channel across syncs (yup, this happened)
   642  	var (
   643  		accountReqFails      = make(chan *accountRequest)
   644  		storageReqFails      = make(chan *storageRequest)
   645  		bytecodeReqFails     = make(chan *bytecodeRequest)
   646  		accountResps         = make(chan *accountResponse)
   647  		storageResps         = make(chan *storageResponse)
   648  		bytecodeResps        = make(chan *bytecodeResponse)
   649  		trienodeHealReqFails = make(chan *trienodeHealRequest)
   650  		bytecodeHealReqFails = make(chan *bytecodeHealRequest)
   651  		trienodeHealResps    = make(chan *trienodeHealResponse)
   652  		bytecodeHealResps    = make(chan *bytecodeHealResponse)
   653  	)
   654  	for {
   655  		// Remove all completed tasks and terminate sync if everything's done
   656  		s.cleanStorageTasks()
   657  		s.cleanAccountTasks()
   658  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   659  			return nil
   660  		}
   661  		// Assign all the data retrieval tasks to any free peers
   662  		s.assignAccountTasks(accountResps, accountReqFails, cancel)
   663  		s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel)
   664  		s.assignStorageTasks(storageResps, storageReqFails, cancel)
   665  
   666  		if len(s.tasks) == 0 {
   667  			// Sync phase done, run heal phase
   668  			s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
   669  			s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
   670  		}
   671  		// Update sync progress
   672  		s.lock.Lock()
   673  		s.extProgress = &SyncProgress{
   674  			AccountSynced:      s.accountSynced,
   675  			AccountBytes:       s.accountBytes,
   676  			BytecodeSynced:     s.bytecodeSynced,
   677  			BytecodeBytes:      s.bytecodeBytes,
   678  			StorageSynced:      s.storageSynced,
   679  			StorageBytes:       s.storageBytes,
   680  			TrienodeHealSynced: s.trienodeHealSynced,
   681  			TrienodeHealBytes:  s.trienodeHealBytes,
   682  			BytecodeHealSynced: s.bytecodeHealSynced,
   683  			BytecodeHealBytes:  s.bytecodeHealBytes,
   684  		}
   685  		s.lock.Unlock()
   686  		// Wait for something to happen
   687  		select {
   688  		case <-s.update:
   689  			// Something happened (new peer, delivery, timeout), recheck tasks
   690  		case <-peerJoin:
   691  			// A new peer joined, try to schedule it new tasks
   692  		case id := <-peerDrop:
   693  			s.revertRequests(id)
   694  		case <-cancel:
   695  			return ErrCancelled
   696  
   697  		case req := <-accountReqFails:
   698  			s.revertAccountRequest(req)
   699  		case req := <-bytecodeReqFails:
   700  			s.revertBytecodeRequest(req)
   701  		case req := <-storageReqFails:
   702  			s.revertStorageRequest(req)
   703  		case req := <-trienodeHealReqFails:
   704  			s.revertTrienodeHealRequest(req)
   705  		case req := <-bytecodeHealReqFails:
   706  			s.revertBytecodeHealRequest(req)
   707  
   708  		case res := <-accountResps:
   709  			s.processAccountResponse(res)
   710  		case res := <-bytecodeResps:
   711  			s.processBytecodeResponse(res)
   712  		case res := <-storageResps:
   713  			s.processStorageResponse(res)
   714  		case res := <-trienodeHealResps:
   715  			s.processTrienodeHealResponse(res)
   716  		case res := <-bytecodeHealResps:
   717  			s.processBytecodeHealResponse(res)
   718  		}
   719  		// Report stats if something meaningful happened
   720  		s.report(false)
   721  	}
   722  }
   723  
   724  // loadSyncStatus retrieves a previously aborted sync status from the database,
   725  // or generates a fresh one if none is available.
   726  func (s *Syncer) loadSyncStatus() {
   727  	var progress SyncProgress
   728  
   729  	if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil {
   730  		if err := json.Unmarshal(status, &progress); err != nil {
   731  			log.Error("Failed to decode snap sync status", "err", err)
   732  		} else {
   733  			for _, task := range progress.Tasks {
   734  				log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   735  			}
   736  			s.tasks = progress.Tasks
   737  			for _, task := range s.tasks {
   738  				task.genBatch = ethdb.HookedBatch{
   739  					Batch: s.db.NewBatch(),
   740  					OnPut: func(key []byte, value []byte) {
   741  						s.accountBytes += common.StorageSize(len(key) + len(value))
   742  					},
   743  				}
   744  				task.genTrie = trie.NewStackTrie(task.genBatch)
   745  
   746  				for accountHash, subtasks := range task.SubTasks {
   747  					for _, subtask := range subtasks {
   748  						subtask.genBatch = ethdb.HookedBatch{
   749  							Batch: s.db.NewBatch(),
   750  							OnPut: func(key []byte, value []byte) {
   751  								s.storageBytes += common.StorageSize(len(key) + len(value))
   752  							},
   753  						}
   754  						subtask.genTrie = trie.NewStackTrieWithOwner(subtask.genBatch, accountHash)
   755  					}
   756  				}
   757  			}
   758  			s.lock.Lock()
   759  			defer s.lock.Unlock()
   760  
   761  			s.snapped = len(s.tasks) == 0
   762  
   763  			s.accountSynced = progress.AccountSynced
   764  			s.accountBytes = progress.AccountBytes
   765  			s.bytecodeSynced = progress.BytecodeSynced
   766  			s.bytecodeBytes = progress.BytecodeBytes
   767  			s.storageSynced = progress.StorageSynced
   768  			s.storageBytes = progress.StorageBytes
   769  
   770  			s.trienodeHealSynced = progress.TrienodeHealSynced
   771  			s.trienodeHealBytes = progress.TrienodeHealBytes
   772  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   773  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   774  			return
   775  		}
   776  	}
   777  	// Either we've failed to decode the previous state, or there was none.
   778  	// Start a fresh sync by chunking up the account range and scheduling
   779  	// them for retrieval.
   780  	s.tasks = nil
   781  	s.accountSynced, s.accountBytes = 0, 0
   782  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   783  	s.storageSynced, s.storageBytes = 0, 0
   784  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   785  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   786  
   787  	var next common.Hash
   788  	step := new(big.Int).Sub(
   789  		new(big.Int).Div(
   790  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   791  			big.NewInt(int64(accountConcurrency)),
   792  		), common.Big1,
   793  	)
   794  	for i := 0; i < accountConcurrency; i++ {
   795  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   796  		if i == accountConcurrency-1 {
   797  			// Make sure we don't overflow if the step is not a proper divisor
   798  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   799  		}
   800  		batch := ethdb.HookedBatch{
   801  			Batch: s.db.NewBatch(),
   802  			OnPut: func(key []byte, value []byte) {
   803  				s.accountBytes += common.StorageSize(len(key) + len(value))
   804  			},
   805  		}
   806  		s.tasks = append(s.tasks, &accountTask{
   807  			Next:     next,
   808  			Last:     last,
   809  			SubTasks: make(map[common.Hash][]*storageTask),
   810  			genBatch: batch,
   811  			genTrie:  trie.NewStackTrie(batch),
   812  		})
   813  		log.Debug("Created account sync task", "from", next, "last", last)
   814  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   815  	}
   816  }
   817  
   818  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   819  func (s *Syncer) saveSyncStatus() {
   820  	// Serialize any partial progress to disk before spinning down
   821  	for _, task := range s.tasks {
   822  		if err := task.genBatch.Write(); err != nil {
   823  			log.Error("Failed to persist account slots", "err", err)
   824  		}
   825  		for _, subtasks := range task.SubTasks {
   826  			for _, subtask := range subtasks {
   827  				if err := subtask.genBatch.Write(); err != nil {
   828  					log.Error("Failed to persist storage slots", "err", err)
   829  				}
   830  			}
   831  		}
   832  	}
   833  	// Store the actual progress markers
   834  	progress := &SyncProgress{
   835  		Tasks:              s.tasks,
   836  		AccountSynced:      s.accountSynced,
   837  		AccountBytes:       s.accountBytes,
   838  		BytecodeSynced:     s.bytecodeSynced,
   839  		BytecodeBytes:      s.bytecodeBytes,
   840  		StorageSynced:      s.storageSynced,
   841  		StorageBytes:       s.storageBytes,
   842  		TrienodeHealSynced: s.trienodeHealSynced,
   843  		TrienodeHealBytes:  s.trienodeHealBytes,
   844  		BytecodeHealSynced: s.bytecodeHealSynced,
   845  		BytecodeHealBytes:  s.bytecodeHealBytes,
   846  	}
   847  	status, err := json.Marshal(progress)
   848  	if err != nil {
   849  		panic(err) // This can only fail during implementation
   850  	}
   851  	rawdb.WriteSnapshotSyncStatus(s.db, status)
   852  }
   853  
   854  // Progress returns the snap sync status statistics.
   855  func (s *Syncer) Progress() (*SyncProgress, *SyncPending) {
   856  	s.lock.Lock()
   857  	defer s.lock.Unlock()
   858  	pending := new(SyncPending)
   859  	if s.healer != nil {
   860  		pending.TrienodeHeal = uint64(len(s.healer.trieTasks))
   861  		pending.BytecodeHeal = uint64(len(s.healer.codeTasks))
   862  	}
   863  	return s.extProgress, pending
   864  }
   865  
   866  // cleanAccountTasks removes account range retrieval tasks that have already been
   867  // completed.
   868  func (s *Syncer) cleanAccountTasks() {
   869  	// If the sync was already done before, don't even bother
   870  	if len(s.tasks) == 0 {
   871  		return
   872  	}
   873  	// Sync wasn't finished previously, check for any task that can be finalized
   874  	for i := 0; i < len(s.tasks); i++ {
   875  		if s.tasks[i].done {
   876  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   877  			i--
   878  		}
   879  	}
   880  	// If everything was just finalized just, generate the account trie and start heal
   881  	if len(s.tasks) == 0 {
   882  		s.lock.Lock()
   883  		s.snapped = true
   884  		s.lock.Unlock()
   885  
   886  		// Push the final sync report
   887  		s.reportSyncProgress(true)
   888  	}
   889  }
   890  
   891  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   892  // within, cleaning any that have been completed.
   893  func (s *Syncer) cleanStorageTasks() {
   894  	for _, task := range s.tasks {
   895  		for account, subtasks := range task.SubTasks {
   896  			// Remove storage range retrieval tasks that completed
   897  			for j := 0; j < len(subtasks); j++ {
   898  				if subtasks[j].done {
   899  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   900  					j--
   901  				}
   902  			}
   903  			if len(subtasks) > 0 {
   904  				task.SubTasks[account] = subtasks
   905  				continue
   906  			}
   907  			// If all storage chunks are done, mark the account as done too
   908  			for j, hash := range task.res.hashes {
   909  				if hash == account {
   910  					task.needState[j] = false
   911  				}
   912  			}
   913  			delete(task.SubTasks, account)
   914  			task.pend--
   915  
   916  			// If this was the last pending task, forward the account task
   917  			if task.pend == 0 {
   918  				s.forwardAccountTask(task)
   919  			}
   920  		}
   921  	}
   922  }
   923  
   924  // assignAccountTasks attempts to match idle peers to pending account range
   925  // retrievals.
   926  func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) {
   927  	s.lock.Lock()
   928  	defer s.lock.Unlock()
   929  
   930  	// Sort the peers by download capacity to use faster ones if many available
   931  	idlers := &capacitySort{
   932  		ids:  make([]string, 0, len(s.accountIdlers)),
   933  		caps: make([]int, 0, len(s.accountIdlers)),
   934  	}
   935  	targetTTL := s.rates.TargetTimeout()
   936  	for id := range s.accountIdlers {
   937  		if _, ok := s.statelessPeers[id]; ok {
   938  			continue
   939  		}
   940  		idlers.ids = append(idlers.ids, id)
   941  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL))
   942  	}
   943  	if len(idlers.ids) == 0 {
   944  		return
   945  	}
   946  	sort.Sort(sort.Reverse(idlers))
   947  
   948  	// Iterate over all the tasks and try to find a pending one
   949  	for _, task := range s.tasks {
   950  		// Skip any tasks already filling
   951  		if task.req != nil || task.res != nil {
   952  			continue
   953  		}
   954  		// Task pending retrieval, try to find an idle peer. If no such peer
   955  		// exists, we probably assigned tasks for all (or they are stateless).
   956  		// Abort the entire assignment mechanism.
   957  		if len(idlers.ids) == 0 {
   958  			return
   959  		}
   960  		var (
   961  			idle = idlers.ids[0]
   962  			peer = s.peers[idle]
   963  			cap  = idlers.caps[0]
   964  		)
   965  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
   966  
   967  		// Matched a pending task to an idle peer, allocate a unique request id
   968  		var reqid uint64
   969  		for {
   970  			reqid = uint64(rand.Int63())
   971  			if reqid == 0 {
   972  				continue
   973  			}
   974  			if _, ok := s.accountReqs[reqid]; ok {
   975  				continue
   976  			}
   977  			break
   978  		}
   979  		// Generate the network query and send it to the peer
   980  		req := &accountRequest{
   981  			peer:    idle,
   982  			id:      reqid,
   983  			time:    time.Now(),
   984  			deliver: success,
   985  			revert:  fail,
   986  			cancel:  cancel,
   987  			stale:   make(chan struct{}),
   988  			origin:  task.Next,
   989  			limit:   task.Last,
   990  			task:    task,
   991  		}
   992  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
   993  			peer.Log().Debug("Account range request timed out", "reqid", reqid)
   994  			s.rates.Update(idle, AccountRangeMsg, 0, 0)
   995  			s.scheduleRevertAccountRequest(req)
   996  		})
   997  		s.accountReqs[reqid] = req
   998  		delete(s.accountIdlers, idle)
   999  
  1000  		s.pend.Add(1)
  1001  		go func(root common.Hash) {
  1002  			defer s.pend.Done()
  1003  
  1004  			// Attempt to send the remote request and revert if it fails
  1005  			if cap > maxRequestSize {
  1006  				cap = maxRequestSize
  1007  			}
  1008  			if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
  1009  				cap = minRequestSize
  1010  			}
  1011  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil {
  1012  				peer.Log().Debug("Failed to request account range", "err", err)
  1013  				s.scheduleRevertAccountRequest(req)
  1014  			}
  1015  		}(s.root)
  1016  
  1017  		// Inject the request into the task to block further assignments
  1018  		task.req = req
  1019  	}
  1020  }
  1021  
  1022  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
  1023  func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) {
  1024  	s.lock.Lock()
  1025  	defer s.lock.Unlock()
  1026  
  1027  	// Sort the peers by download capacity to use faster ones if many available
  1028  	idlers := &capacitySort{
  1029  		ids:  make([]string, 0, len(s.bytecodeIdlers)),
  1030  		caps: make([]int, 0, len(s.bytecodeIdlers)),
  1031  	}
  1032  	targetTTL := s.rates.TargetTimeout()
  1033  	for id := range s.bytecodeIdlers {
  1034  		if _, ok := s.statelessPeers[id]; ok {
  1035  			continue
  1036  		}
  1037  		idlers.ids = append(idlers.ids, id)
  1038  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1039  	}
  1040  	if len(idlers.ids) == 0 {
  1041  		return
  1042  	}
  1043  	sort.Sort(sort.Reverse(idlers))
  1044  
  1045  	// Iterate over all the tasks and try to find a pending one
  1046  	for _, task := range s.tasks {
  1047  		// Skip any tasks not in the bytecode retrieval phase
  1048  		if task.res == nil {
  1049  			continue
  1050  		}
  1051  		// Skip tasks that are already retrieving (or done with) all codes
  1052  		if len(task.codeTasks) == 0 {
  1053  			continue
  1054  		}
  1055  		// Task pending retrieval, try to find an idle peer. If no such peer
  1056  		// exists, we probably assigned tasks for all (or they are stateless).
  1057  		// Abort the entire assignment mechanism.
  1058  		if len(idlers.ids) == 0 {
  1059  			return
  1060  		}
  1061  		var (
  1062  			idle = idlers.ids[0]
  1063  			peer = s.peers[idle]
  1064  			cap  = idlers.caps[0]
  1065  		)
  1066  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1067  
  1068  		// Matched a pending task to an idle peer, allocate a unique request id
  1069  		var reqid uint64
  1070  		for {
  1071  			reqid = uint64(rand.Int63())
  1072  			if reqid == 0 {
  1073  				continue
  1074  			}
  1075  			if _, ok := s.bytecodeReqs[reqid]; ok {
  1076  				continue
  1077  			}
  1078  			break
  1079  		}
  1080  		// Generate the network query and send it to the peer
  1081  		if cap > maxCodeRequestCount {
  1082  			cap = maxCodeRequestCount
  1083  		}
  1084  		hashes := make([]common.Hash, 0, cap)
  1085  		for hash := range task.codeTasks {
  1086  			delete(task.codeTasks, hash)
  1087  			hashes = append(hashes, hash)
  1088  			if len(hashes) >= cap {
  1089  				break
  1090  			}
  1091  		}
  1092  		req := &bytecodeRequest{
  1093  			peer:    idle,
  1094  			id:      reqid,
  1095  			time:    time.Now(),
  1096  			deliver: success,
  1097  			revert:  fail,
  1098  			cancel:  cancel,
  1099  			stale:   make(chan struct{}),
  1100  			hashes:  hashes,
  1101  			task:    task,
  1102  		}
  1103  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1104  			peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
  1105  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1106  			s.scheduleRevertBytecodeRequest(req)
  1107  		})
  1108  		s.bytecodeReqs[reqid] = req
  1109  		delete(s.bytecodeIdlers, idle)
  1110  
  1111  		s.pend.Add(1)
  1112  		go func() {
  1113  			defer s.pend.Done()
  1114  
  1115  			// Attempt to send the remote request and revert if it fails
  1116  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1117  				log.Debug("Failed to request bytecodes", "err", err)
  1118  				s.scheduleRevertBytecodeRequest(req)
  1119  			}
  1120  		}()
  1121  	}
  1122  }
  1123  
  1124  // assignStorageTasks attempts to match idle peers to pending storage range
  1125  // retrievals.
  1126  func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) {
  1127  	s.lock.Lock()
  1128  	defer s.lock.Unlock()
  1129  
  1130  	// Sort the peers by download capacity to use faster ones if many available
  1131  	idlers := &capacitySort{
  1132  		ids:  make([]string, 0, len(s.storageIdlers)),
  1133  		caps: make([]int, 0, len(s.storageIdlers)),
  1134  	}
  1135  	targetTTL := s.rates.TargetTimeout()
  1136  	for id := range s.storageIdlers {
  1137  		if _, ok := s.statelessPeers[id]; ok {
  1138  			continue
  1139  		}
  1140  		idlers.ids = append(idlers.ids, id)
  1141  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL))
  1142  	}
  1143  	if len(idlers.ids) == 0 {
  1144  		return
  1145  	}
  1146  	sort.Sort(sort.Reverse(idlers))
  1147  
  1148  	// Iterate over all the tasks and try to find a pending one
  1149  	for _, task := range s.tasks {
  1150  		// Skip any tasks not in the storage retrieval phase
  1151  		if task.res == nil {
  1152  			continue
  1153  		}
  1154  		// Skip tasks that are already retrieving (or done with) all small states
  1155  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
  1156  			continue
  1157  		}
  1158  		// Task pending retrieval, try to find an idle peer. If no such peer
  1159  		// exists, we probably assigned tasks for all (or they are stateless).
  1160  		// Abort the entire assignment mechanism.
  1161  		if len(idlers.ids) == 0 {
  1162  			return
  1163  		}
  1164  		var (
  1165  			idle = idlers.ids[0]
  1166  			peer = s.peers[idle]
  1167  			cap  = idlers.caps[0]
  1168  		)
  1169  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1170  
  1171  		// Matched a pending task to an idle peer, allocate a unique request id
  1172  		var reqid uint64
  1173  		for {
  1174  			reqid = uint64(rand.Int63())
  1175  			if reqid == 0 {
  1176  				continue
  1177  			}
  1178  			if _, ok := s.storageReqs[reqid]; ok {
  1179  				continue
  1180  			}
  1181  			break
  1182  		}
  1183  		// Generate the network query and send it to the peer. If there are
  1184  		// large contract tasks pending, complete those before diving into
  1185  		// even more new contracts.
  1186  		if cap > maxRequestSize {
  1187  			cap = maxRequestSize
  1188  		}
  1189  		if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
  1190  			cap = minRequestSize
  1191  		}
  1192  		storageSets := cap / 1024
  1193  
  1194  		var (
  1195  			accounts = make([]common.Hash, 0, storageSets)
  1196  			roots    = make([]common.Hash, 0, storageSets)
  1197  			subtask  *storageTask
  1198  		)
  1199  		for account, subtasks := range task.SubTasks {
  1200  			for _, st := range subtasks {
  1201  				// Skip any subtasks already filling
  1202  				if st.req != nil {
  1203  					continue
  1204  				}
  1205  				// Found an incomplete storage chunk, schedule it
  1206  				accounts = append(accounts, account)
  1207  				roots = append(roots, st.root)
  1208  				subtask = st
  1209  				break // Large contract chunks are downloaded individually
  1210  			}
  1211  			if subtask != nil {
  1212  				break // Large contract chunks are downloaded individually
  1213  			}
  1214  		}
  1215  		if subtask == nil {
  1216  			// No large contract required retrieval, but small ones available
  1217  			for account, root := range task.stateTasks {
  1218  				delete(task.stateTasks, account)
  1219  
  1220  				accounts = append(accounts, account)
  1221  				roots = append(roots, root)
  1222  
  1223  				if len(accounts) >= storageSets {
  1224  					break
  1225  				}
  1226  			}
  1227  		}
  1228  		// If nothing was found, it means this task is actually already fully
  1229  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1230  		if len(accounts) == 0 {
  1231  			continue
  1232  		}
  1233  		req := &storageRequest{
  1234  			peer:     idle,
  1235  			id:       reqid,
  1236  			time:     time.Now(),
  1237  			deliver:  success,
  1238  			revert:   fail,
  1239  			cancel:   cancel,
  1240  			stale:    make(chan struct{}),
  1241  			accounts: accounts,
  1242  			roots:    roots,
  1243  			mainTask: task,
  1244  			subTask:  subtask,
  1245  		}
  1246  		if subtask != nil {
  1247  			req.origin = subtask.Next
  1248  			req.limit = subtask.Last
  1249  		}
  1250  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1251  			peer.Log().Debug("Storage request timed out", "reqid", reqid)
  1252  			s.rates.Update(idle, StorageRangesMsg, 0, 0)
  1253  			s.scheduleRevertStorageRequest(req)
  1254  		})
  1255  		s.storageReqs[reqid] = req
  1256  		delete(s.storageIdlers, idle)
  1257  
  1258  		s.pend.Add(1)
  1259  		go func(root common.Hash) {
  1260  			defer s.pend.Done()
  1261  
  1262  			// Attempt to send the remote request and revert if it fails
  1263  			var origin, limit []byte
  1264  			if subtask != nil {
  1265  				origin, limit = req.origin[:], req.limit[:]
  1266  			}
  1267  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil {
  1268  				log.Debug("Failed to request storage", "err", err)
  1269  				s.scheduleRevertStorageRequest(req)
  1270  			}
  1271  		}(s.root)
  1272  
  1273  		// Inject the request into the subtask to block further assignments
  1274  		if subtask != nil {
  1275  			subtask.req = req
  1276  		}
  1277  	}
  1278  }
  1279  
  1280  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1281  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1282  func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) {
  1283  	s.lock.Lock()
  1284  	defer s.lock.Unlock()
  1285  
  1286  	// Sort the peers by download capacity to use faster ones if many available
  1287  	idlers := &capacitySort{
  1288  		ids:  make([]string, 0, len(s.trienodeHealIdlers)),
  1289  		caps: make([]int, 0, len(s.trienodeHealIdlers)),
  1290  	}
  1291  	targetTTL := s.rates.TargetTimeout()
  1292  	for id := range s.trienodeHealIdlers {
  1293  		if _, ok := s.statelessPeers[id]; ok {
  1294  			continue
  1295  		}
  1296  		idlers.ids = append(idlers.ids, id)
  1297  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL))
  1298  	}
  1299  	if len(idlers.ids) == 0 {
  1300  		return
  1301  	}
  1302  	sort.Sort(sort.Reverse(idlers))
  1303  
  1304  	// Iterate over pending tasks and try to find a peer to retrieve with
  1305  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1306  		// If there are not enough trie tasks queued to fully assign, fill the
  1307  		// queue from the state sync scheduler. The trie synced schedules these
  1308  		// together with bytecodes, so we need to queue them combined.
  1309  		var (
  1310  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1311  			want = maxTrieRequestCount + maxCodeRequestCount
  1312  		)
  1313  		if have < want {
  1314  			paths, hashes, codes := s.healer.scheduler.Missing(want - have)
  1315  			for i, path := range paths {
  1316  				s.healer.trieTasks[path] = hashes[i]
  1317  			}
  1318  			for _, hash := range codes {
  1319  				s.healer.codeTasks[hash] = struct{}{}
  1320  			}
  1321  		}
  1322  		// If all the heal tasks are bytecodes or already downloading, bail
  1323  		if len(s.healer.trieTasks) == 0 {
  1324  			return
  1325  		}
  1326  		// Task pending retrieval, try to find an idle peer. If no such peer
  1327  		// exists, we probably assigned tasks for all (or they are stateless).
  1328  		// Abort the entire assignment mechanism.
  1329  		if len(idlers.ids) == 0 {
  1330  			return
  1331  		}
  1332  		var (
  1333  			idle = idlers.ids[0]
  1334  			peer = s.peers[idle]
  1335  			cap  = idlers.caps[0]
  1336  		)
  1337  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1338  
  1339  		// Matched a pending task to an idle peer, allocate a unique request id
  1340  		var reqid uint64
  1341  		for {
  1342  			reqid = uint64(rand.Int63())
  1343  			if reqid == 0 {
  1344  				continue
  1345  			}
  1346  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1347  				continue
  1348  			}
  1349  			break
  1350  		}
  1351  		// Generate the network query and send it to the peer
  1352  		if cap > maxTrieRequestCount {
  1353  			cap = maxTrieRequestCount
  1354  		}
  1355  		cap = int(float64(cap) / s.trienodeHealThrottle)
  1356  		if cap <= 0 {
  1357  			cap = 1
  1358  		}
  1359  		var (
  1360  			hashes   = make([]common.Hash, 0, cap)
  1361  			paths    = make([]string, 0, cap)
  1362  			pathsets = make([]TrieNodePathSet, 0, cap)
  1363  		)
  1364  		for path, hash := range s.healer.trieTasks {
  1365  			delete(s.healer.trieTasks, path)
  1366  
  1367  			paths = append(paths, path)
  1368  			hashes = append(hashes, hash)
  1369  			if len(paths) >= cap {
  1370  				break
  1371  			}
  1372  		}
  1373  		// Group requests by account hash
  1374  		paths, hashes, _, pathsets = sortByAccountPath(paths, hashes)
  1375  		req := &trienodeHealRequest{
  1376  			peer:    idle,
  1377  			id:      reqid,
  1378  			time:    time.Now(),
  1379  			deliver: success,
  1380  			revert:  fail,
  1381  			cancel:  cancel,
  1382  			stale:   make(chan struct{}),
  1383  			paths:   paths,
  1384  			hashes:  hashes,
  1385  			task:    s.healer,
  1386  		}
  1387  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1388  			peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
  1389  			s.rates.Update(idle, TrieNodesMsg, 0, 0)
  1390  			s.scheduleRevertTrienodeHealRequest(req)
  1391  		})
  1392  		s.trienodeHealReqs[reqid] = req
  1393  		delete(s.trienodeHealIdlers, idle)
  1394  
  1395  		s.pend.Add(1)
  1396  		go func(root common.Hash) {
  1397  			defer s.pend.Done()
  1398  
  1399  			// Attempt to send the remote request and revert if it fails
  1400  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1401  				log.Debug("Failed to request trienode healers", "err", err)
  1402  				s.scheduleRevertTrienodeHealRequest(req)
  1403  			}
  1404  		}(s.root)
  1405  	}
  1406  }
  1407  
  1408  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1409  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1410  func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) {
  1411  	s.lock.Lock()
  1412  	defer s.lock.Unlock()
  1413  
  1414  	// Sort the peers by download capacity to use faster ones if many available
  1415  	idlers := &capacitySort{
  1416  		ids:  make([]string, 0, len(s.bytecodeHealIdlers)),
  1417  		caps: make([]int, 0, len(s.bytecodeHealIdlers)),
  1418  	}
  1419  	targetTTL := s.rates.TargetTimeout()
  1420  	for id := range s.bytecodeHealIdlers {
  1421  		if _, ok := s.statelessPeers[id]; ok {
  1422  			continue
  1423  		}
  1424  		idlers.ids = append(idlers.ids, id)
  1425  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1426  	}
  1427  	if len(idlers.ids) == 0 {
  1428  		return
  1429  	}
  1430  	sort.Sort(sort.Reverse(idlers))
  1431  
  1432  	// Iterate over pending tasks and try to find a peer to retrieve with
  1433  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1434  		// If there are not enough trie tasks queued to fully assign, fill the
  1435  		// queue from the state sync scheduler. The trie synced schedules these
  1436  		// together with trie nodes, so we need to queue them combined.
  1437  		var (
  1438  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1439  			want = maxTrieRequestCount + maxCodeRequestCount
  1440  		)
  1441  		if have < want {
  1442  			paths, hashes, codes := s.healer.scheduler.Missing(want - have)
  1443  			for i, path := range paths {
  1444  				s.healer.trieTasks[path] = hashes[i]
  1445  			}
  1446  			for _, hash := range codes {
  1447  				s.healer.codeTasks[hash] = struct{}{}
  1448  			}
  1449  		}
  1450  		// If all the heal tasks are trienodes or already downloading, bail
  1451  		if len(s.healer.codeTasks) == 0 {
  1452  			return
  1453  		}
  1454  		// Task pending retrieval, try to find an idle peer. If no such peer
  1455  		// exists, we probably assigned tasks for all (or they are stateless).
  1456  		// Abort the entire assignment mechanism.
  1457  		if len(idlers.ids) == 0 {
  1458  			return
  1459  		}
  1460  		var (
  1461  			idle = idlers.ids[0]
  1462  			peer = s.peers[idle]
  1463  			cap  = idlers.caps[0]
  1464  		)
  1465  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1466  
  1467  		// Matched a pending task to an idle peer, allocate a unique request id
  1468  		var reqid uint64
  1469  		for {
  1470  			reqid = uint64(rand.Int63())
  1471  			if reqid == 0 {
  1472  				continue
  1473  			}
  1474  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1475  				continue
  1476  			}
  1477  			break
  1478  		}
  1479  		// Generate the network query and send it to the peer
  1480  		if cap > maxCodeRequestCount {
  1481  			cap = maxCodeRequestCount
  1482  		}
  1483  		hashes := make([]common.Hash, 0, cap)
  1484  		for hash := range s.healer.codeTasks {
  1485  			delete(s.healer.codeTasks, hash)
  1486  
  1487  			hashes = append(hashes, hash)
  1488  			if len(hashes) >= cap {
  1489  				break
  1490  			}
  1491  		}
  1492  		req := &bytecodeHealRequest{
  1493  			peer:    idle,
  1494  			id:      reqid,
  1495  			time:    time.Now(),
  1496  			deliver: success,
  1497  			revert:  fail,
  1498  			cancel:  cancel,
  1499  			stale:   make(chan struct{}),
  1500  			hashes:  hashes,
  1501  			task:    s.healer,
  1502  		}
  1503  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1504  			peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
  1505  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1506  			s.scheduleRevertBytecodeHealRequest(req)
  1507  		})
  1508  		s.bytecodeHealReqs[reqid] = req
  1509  		delete(s.bytecodeHealIdlers, idle)
  1510  
  1511  		s.pend.Add(1)
  1512  		go func() {
  1513  			defer s.pend.Done()
  1514  
  1515  			// Attempt to send the remote request and revert if it fails
  1516  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1517  				log.Debug("Failed to request bytecode healers", "err", err)
  1518  				s.scheduleRevertBytecodeHealRequest(req)
  1519  			}
  1520  		}()
  1521  	}
  1522  }
  1523  
  1524  // revertRequests locates all the currently pending requests from a particular
  1525  // peer and reverts them, rescheduling for others to fulfill.
  1526  func (s *Syncer) revertRequests(peer string) {
  1527  	// Gather the requests first, revertals need the lock too
  1528  	s.lock.Lock()
  1529  	var accountReqs []*accountRequest
  1530  	for _, req := range s.accountReqs {
  1531  		if req.peer == peer {
  1532  			accountReqs = append(accountReqs, req)
  1533  		}
  1534  	}
  1535  	var bytecodeReqs []*bytecodeRequest
  1536  	for _, req := range s.bytecodeReqs {
  1537  		if req.peer == peer {
  1538  			bytecodeReqs = append(bytecodeReqs, req)
  1539  		}
  1540  	}
  1541  	var storageReqs []*storageRequest
  1542  	for _, req := range s.storageReqs {
  1543  		if req.peer == peer {
  1544  			storageReqs = append(storageReqs, req)
  1545  		}
  1546  	}
  1547  	var trienodeHealReqs []*trienodeHealRequest
  1548  	for _, req := range s.trienodeHealReqs {
  1549  		if req.peer == peer {
  1550  			trienodeHealReqs = append(trienodeHealReqs, req)
  1551  		}
  1552  	}
  1553  	var bytecodeHealReqs []*bytecodeHealRequest
  1554  	for _, req := range s.bytecodeHealReqs {
  1555  		if req.peer == peer {
  1556  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1557  		}
  1558  	}
  1559  	s.lock.Unlock()
  1560  
  1561  	// Revert all the requests matching the peer
  1562  	for _, req := range accountReqs {
  1563  		s.revertAccountRequest(req)
  1564  	}
  1565  	for _, req := range bytecodeReqs {
  1566  		s.revertBytecodeRequest(req)
  1567  	}
  1568  	for _, req := range storageReqs {
  1569  		s.revertStorageRequest(req)
  1570  	}
  1571  	for _, req := range trienodeHealReqs {
  1572  		s.revertTrienodeHealRequest(req)
  1573  	}
  1574  	for _, req := range bytecodeHealReqs {
  1575  		s.revertBytecodeHealRequest(req)
  1576  	}
  1577  }
  1578  
  1579  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1580  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1581  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1582  	select {
  1583  	case req.revert <- req:
  1584  		// Sync event loop notified
  1585  	case <-req.cancel:
  1586  		// Sync cycle got cancelled
  1587  	case <-req.stale:
  1588  		// Request already reverted
  1589  	}
  1590  }
  1591  
  1592  // revertAccountRequest cleans up an account range request and returns all failed
  1593  // retrieval tasks to the scheduler for reassignment.
  1594  //
  1595  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1596  // On peer threads, use scheduleRevertAccountRequest.
  1597  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1598  	log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1599  	select {
  1600  	case <-req.stale:
  1601  		log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1602  		return
  1603  	default:
  1604  	}
  1605  	close(req.stale)
  1606  
  1607  	// Remove the request from the tracked set
  1608  	s.lock.Lock()
  1609  	delete(s.accountReqs, req.id)
  1610  	s.lock.Unlock()
  1611  
  1612  	// If there's a timeout timer still running, abort it and mark the account
  1613  	// task as not-pending, ready for rescheduling
  1614  	req.timeout.Stop()
  1615  	if req.task.req == req {
  1616  		req.task.req = nil
  1617  	}
  1618  }
  1619  
  1620  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1621  // and return all failed retrieval tasks to the scheduler for reassignment.
  1622  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1623  	select {
  1624  	case req.revert <- req:
  1625  		// Sync event loop notified
  1626  	case <-req.cancel:
  1627  		// Sync cycle got cancelled
  1628  	case <-req.stale:
  1629  		// Request already reverted
  1630  	}
  1631  }
  1632  
  1633  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1634  // retrieval tasks to the scheduler for reassignment.
  1635  //
  1636  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1637  // On peer threads, use scheduleRevertBytecodeRequest.
  1638  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1639  	log.Debug("Reverting bytecode request", "peer", req.peer)
  1640  	select {
  1641  	case <-req.stale:
  1642  		log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1643  		return
  1644  	default:
  1645  	}
  1646  	close(req.stale)
  1647  
  1648  	// Remove the request from the tracked set
  1649  	s.lock.Lock()
  1650  	delete(s.bytecodeReqs, req.id)
  1651  	s.lock.Unlock()
  1652  
  1653  	// If there's a timeout timer still running, abort it and mark the code
  1654  	// retrievals as not-pending, ready for rescheduling
  1655  	req.timeout.Stop()
  1656  	for _, hash := range req.hashes {
  1657  		req.task.codeTasks[hash] = struct{}{}
  1658  	}
  1659  }
  1660  
  1661  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1662  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1663  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1664  	select {
  1665  	case req.revert <- req:
  1666  		// Sync event loop notified
  1667  	case <-req.cancel:
  1668  		// Sync cycle got cancelled
  1669  	case <-req.stale:
  1670  		// Request already reverted
  1671  	}
  1672  }
  1673  
  1674  // revertStorageRequest cleans up a storage range request and returns all failed
  1675  // retrieval tasks to the scheduler for reassignment.
  1676  //
  1677  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1678  // On peer threads, use scheduleRevertStorageRequest.
  1679  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1680  	log.Debug("Reverting storage request", "peer", req.peer)
  1681  	select {
  1682  	case <-req.stale:
  1683  		log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1684  		return
  1685  	default:
  1686  	}
  1687  	close(req.stale)
  1688  
  1689  	// Remove the request from the tracked set
  1690  	s.lock.Lock()
  1691  	delete(s.storageReqs, req.id)
  1692  	s.lock.Unlock()
  1693  
  1694  	// If there's a timeout timer still running, abort it and mark the storage
  1695  	// task as not-pending, ready for rescheduling
  1696  	req.timeout.Stop()
  1697  	if req.subTask != nil {
  1698  		req.subTask.req = nil
  1699  	} else {
  1700  		for i, account := range req.accounts {
  1701  			req.mainTask.stateTasks[account] = req.roots[i]
  1702  		}
  1703  	}
  1704  }
  1705  
  1706  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1707  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1708  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1709  	select {
  1710  	case req.revert <- req:
  1711  		// Sync event loop notified
  1712  	case <-req.cancel:
  1713  		// Sync cycle got cancelled
  1714  	case <-req.stale:
  1715  		// Request already reverted
  1716  	}
  1717  }
  1718  
  1719  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1720  // failed retrieval tasks to the scheduler for reassignment.
  1721  //
  1722  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1723  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1724  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1725  	log.Debug("Reverting trienode heal request", "peer", req.peer)
  1726  	select {
  1727  	case <-req.stale:
  1728  		log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1729  		return
  1730  	default:
  1731  	}
  1732  	close(req.stale)
  1733  
  1734  	// Remove the request from the tracked set
  1735  	s.lock.Lock()
  1736  	delete(s.trienodeHealReqs, req.id)
  1737  	s.lock.Unlock()
  1738  
  1739  	// If there's a timeout timer still running, abort it and mark the trie node
  1740  	// retrievals as not-pending, ready for rescheduling
  1741  	req.timeout.Stop()
  1742  	for i, path := range req.paths {
  1743  		req.task.trieTasks[path] = req.hashes[i]
  1744  	}
  1745  }
  1746  
  1747  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1748  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1749  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1750  	select {
  1751  	case req.revert <- req:
  1752  		// Sync event loop notified
  1753  	case <-req.cancel:
  1754  		// Sync cycle got cancelled
  1755  	case <-req.stale:
  1756  		// Request already reverted
  1757  	}
  1758  }
  1759  
  1760  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1761  // failed retrieval tasks to the scheduler for reassignment.
  1762  //
  1763  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1764  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1765  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1766  	log.Debug("Reverting bytecode heal request", "peer", req.peer)
  1767  	select {
  1768  	case <-req.stale:
  1769  		log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1770  		return
  1771  	default:
  1772  	}
  1773  	close(req.stale)
  1774  
  1775  	// Remove the request from the tracked set
  1776  	s.lock.Lock()
  1777  	delete(s.bytecodeHealReqs, req.id)
  1778  	s.lock.Unlock()
  1779  
  1780  	// If there's a timeout timer still running, abort it and mark the code
  1781  	// retrievals as not-pending, ready for rescheduling
  1782  	req.timeout.Stop()
  1783  	for _, hash := range req.hashes {
  1784  		req.task.codeTasks[hash] = struct{}{}
  1785  	}
  1786  }
  1787  
  1788  // processAccountResponse integrates an already validated account range response
  1789  // into the account tasks.
  1790  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1791  	// Switch the task from pending to filling
  1792  	res.task.req = nil
  1793  	res.task.res = res
  1794  
  1795  	// Ensure that the response doesn't overflow into the subsequent task
  1796  	last := res.task.Last.Big()
  1797  	for i, hash := range res.hashes {
  1798  		// Mark the range complete if the last is already included.
  1799  		// Keep iteration to delete the extra states if exists.
  1800  		cmp := hash.Big().Cmp(last)
  1801  		if cmp == 0 {
  1802  			res.cont = false
  1803  			continue
  1804  		}
  1805  		if cmp > 0 {
  1806  			// Chunk overflown, cut off excess
  1807  			res.hashes = res.hashes[:i]
  1808  			res.accounts = res.accounts[:i]
  1809  			res.cont = false // Mark range completed
  1810  			break
  1811  		}
  1812  	}
  1813  	// Iterate over all the accounts and assemble which ones need further sub-
  1814  	// filling before the entire account range can be persisted.
  1815  	res.task.needCode = make([]bool, len(res.accounts))
  1816  	res.task.needState = make([]bool, len(res.accounts))
  1817  	res.task.needHeal = make([]bool, len(res.accounts))
  1818  
  1819  	res.task.codeTasks = make(map[common.Hash]struct{})
  1820  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1821  
  1822  	resumed := make(map[common.Hash]struct{})
  1823  
  1824  	res.task.pend = 0
  1825  	for i, account := range res.accounts {
  1826  		// Check if the account is a contract with an unknown code
  1827  		if !bytes.Equal(account.CodeHash, emptyCode[:]) {
  1828  			if !rawdb.HasCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)) {
  1829  				res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{}
  1830  				res.task.needCode[i] = true
  1831  				res.task.pend++
  1832  			}
  1833  		}
  1834  		// Check if the account is a contract with an unknown storage trie
  1835  		if account.Root != emptyRoot {
  1836  			if ok, err := s.db.Has(account.Root[:]); err != nil || !ok {
  1837  				// If there was a previous large state retrieval in progress,
  1838  				// don't restart it from scratch. This happens if a sync cycle
  1839  				// is interrupted and resumed later. However, *do* update the
  1840  				// previous root hash.
  1841  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1842  					log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root)
  1843  					for _, subtask := range subtasks {
  1844  						subtask.root = account.Root
  1845  					}
  1846  					res.task.needHeal[i] = true
  1847  					resumed[res.hashes[i]] = struct{}{}
  1848  				} else {
  1849  					res.task.stateTasks[res.hashes[i]] = account.Root
  1850  				}
  1851  				res.task.needState[i] = true
  1852  				res.task.pend++
  1853  			}
  1854  		}
  1855  	}
  1856  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1857  	// some progress if a new peer gives us less accounts than an old one, but for
  1858  	// now we have to live with that.
  1859  	for hash := range res.task.SubTasks {
  1860  		if _, ok := resumed[hash]; !ok {
  1861  			log.Debug("Aborting suspended storage retrieval", "account", hash)
  1862  			delete(res.task.SubTasks, hash)
  1863  		}
  1864  	}
  1865  	// If the account range contained no contracts, or all have been fully filled
  1866  	// beforehand, short circuit storage filling and forward to the next task
  1867  	if res.task.pend == 0 {
  1868  		s.forwardAccountTask(res.task)
  1869  		return
  1870  	}
  1871  	// Some accounts are incomplete, leave as is for the storage and contract
  1872  	// task assigners to pick up and fill.
  1873  }
  1874  
  1875  // processBytecodeResponse integrates an already validated bytecode response
  1876  // into the account tasks.
  1877  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1878  	batch := s.db.NewBatch()
  1879  
  1880  	var (
  1881  		codes uint64
  1882  	)
  1883  	for i, hash := range res.hashes {
  1884  		code := res.codes[i]
  1885  
  1886  		// If the bytecode was not delivered, reschedule it
  1887  		if code == nil {
  1888  			res.task.codeTasks[hash] = struct{}{}
  1889  			continue
  1890  		}
  1891  		// Code was delivered, mark it not needed any more
  1892  		for j, account := range res.task.res.accounts {
  1893  			if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) {
  1894  				res.task.needCode[j] = false
  1895  				res.task.pend--
  1896  			}
  1897  		}
  1898  		// Push the bytecode into a database batch
  1899  		codes++
  1900  		rawdb.WriteCode(batch, hash, code)
  1901  	}
  1902  	bytes := common.StorageSize(batch.ValueSize())
  1903  	if err := batch.Write(); err != nil {
  1904  		log.Crit("Failed to persist bytecodes", "err", err)
  1905  	}
  1906  	s.bytecodeSynced += codes
  1907  	s.bytecodeBytes += bytes
  1908  
  1909  	log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1910  
  1911  	// If this delivery completed the last pending task, forward the account task
  1912  	// to the next chunk
  1913  	if res.task.pend == 0 {
  1914  		s.forwardAccountTask(res.task)
  1915  		return
  1916  	}
  1917  	// Some accounts are still incomplete, leave as is for the storage and contract
  1918  	// task assigners to pick up and fill.
  1919  }
  1920  
  1921  // processStorageResponse integrates an already validated storage response
  1922  // into the account tasks.
  1923  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1924  	// Switch the subtask from pending to idle
  1925  	if res.subTask != nil {
  1926  		res.subTask.req = nil
  1927  	}
  1928  	batch := ethdb.HookedBatch{
  1929  		Batch: s.db.NewBatch(),
  1930  		OnPut: func(key []byte, value []byte) {
  1931  			s.storageBytes += common.StorageSize(len(key) + len(value))
  1932  		},
  1933  	}
  1934  	var (
  1935  		slots           int
  1936  		oldStorageBytes = s.storageBytes
  1937  	)
  1938  	// Iterate over all the accounts and reconstruct their storage tries from the
  1939  	// delivered slots
  1940  	for i, account := range res.accounts {
  1941  		// If the account was not delivered, reschedule it
  1942  		if i >= len(res.hashes) {
  1943  			res.mainTask.stateTasks[account] = res.roots[i]
  1944  			continue
  1945  		}
  1946  		// State was delivered, if complete mark as not needed any more, otherwise
  1947  		// mark the account as needing healing
  1948  		for j, hash := range res.mainTask.res.hashes {
  1949  			if account != hash {
  1950  				continue
  1951  			}
  1952  			acc := res.mainTask.res.accounts[j]
  1953  
  1954  			// If the packet contains multiple contract storage slots, all
  1955  			// but the last are surely complete. The last contract may be
  1956  			// chunked, so check it's continuation flag.
  1957  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1958  				res.mainTask.needState[j] = false
  1959  				res.mainTask.pend--
  1960  			}
  1961  			// If the last contract was chunked, mark it as needing healing
  1962  			// to avoid writing it out to disk prematurely.
  1963  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1964  				res.mainTask.needHeal[j] = true
  1965  			}
  1966  			// If the last contract was chunked, we need to switch to large
  1967  			// contract handling mode
  1968  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1969  				// If we haven't yet started a large-contract retrieval, create
  1970  				// the subtasks for it within the main account task
  1971  				if tasks, ok := res.mainTask.SubTasks[account]; !ok {
  1972  					var (
  1973  						keys    = res.hashes[i]
  1974  						chunks  = uint64(storageConcurrency)
  1975  						lastKey common.Hash
  1976  					)
  1977  					if len(keys) > 0 {
  1978  						lastKey = keys[len(keys)-1]
  1979  					}
  1980  					// If the number of slots remaining is low, decrease the
  1981  					// number of chunks. Somewhere on the order of 10-15K slots
  1982  					// fit into a packet of 500KB. A key/slot pair is maximum 64
  1983  					// bytes, so pessimistically maxRequestSize/64 = 8K.
  1984  					//
  1985  					// Chunk so that at least 2 packets are needed to fill a task.
  1986  					if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil {
  1987  						if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks {
  1988  							chunks = n + 1
  1989  						}
  1990  						log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks)
  1991  					} else {
  1992  						log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks)
  1993  					}
  1994  					r := newHashRange(lastKey, chunks)
  1995  
  1996  					// Our first task is the one that was just filled by this response.
  1997  					batch := ethdb.HookedBatch{
  1998  						Batch: s.db.NewBatch(),
  1999  						OnPut: func(key []byte, value []byte) {
  2000  							s.storageBytes += common.StorageSize(len(key) + len(value))
  2001  						},
  2002  					}
  2003  					tasks = append(tasks, &storageTask{
  2004  						Next:     common.Hash{},
  2005  						Last:     r.End(),
  2006  						root:     acc.Root,
  2007  						genBatch: batch,
  2008  						genTrie:  trie.NewStackTrieWithOwner(batch, account),
  2009  					})
  2010  					for r.Next() {
  2011  						batch := ethdb.HookedBatch{
  2012  							Batch: s.db.NewBatch(),
  2013  							OnPut: func(key []byte, value []byte) {
  2014  								s.storageBytes += common.StorageSize(len(key) + len(value))
  2015  							},
  2016  						}
  2017  						tasks = append(tasks, &storageTask{
  2018  							Next:     r.Start(),
  2019  							Last:     r.End(),
  2020  							root:     acc.Root,
  2021  							genBatch: batch,
  2022  							genTrie:  trie.NewStackTrieWithOwner(batch, account),
  2023  						})
  2024  					}
  2025  					for _, task := range tasks {
  2026  						log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", task.Next, "last", task.Last)
  2027  					}
  2028  					res.mainTask.SubTasks[account] = tasks
  2029  
  2030  					// Since we've just created the sub-tasks, this response
  2031  					// is surely for the first one (zero origin)
  2032  					res.subTask = tasks[0]
  2033  				}
  2034  			}
  2035  			// If we're in large contract delivery mode, forward the subtask
  2036  			if res.subTask != nil {
  2037  				// Ensure the response doesn't overflow into the subsequent task
  2038  				last := res.subTask.Last.Big()
  2039  				// Find the first overflowing key. While at it, mark res as complete
  2040  				// if we find the range to include or pass the 'last'
  2041  				index := sort.Search(len(res.hashes[i]), func(k int) bool {
  2042  					cmp := res.hashes[i][k].Big().Cmp(last)
  2043  					if cmp >= 0 {
  2044  						res.cont = false
  2045  					}
  2046  					return cmp > 0
  2047  				})
  2048  				if index >= 0 {
  2049  					// cut off excess
  2050  					res.hashes[i] = res.hashes[i][:index]
  2051  					res.slots[i] = res.slots[i][:index]
  2052  				}
  2053  				// Forward the relevant storage chunk (even if created just now)
  2054  				if res.cont {
  2055  					res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1])
  2056  				} else {
  2057  					res.subTask.done = true
  2058  				}
  2059  			}
  2060  		}
  2061  		// Iterate over all the complete contracts, reconstruct the trie nodes and
  2062  		// push them to disk. If the contract is chunked, the trie nodes will be
  2063  		// reconstructed later.
  2064  		slots += len(res.hashes[i])
  2065  
  2066  		if i < len(res.hashes)-1 || res.subTask == nil {
  2067  			tr := trie.NewStackTrieWithOwner(batch, account)
  2068  			for j := 0; j < len(res.hashes[i]); j++ {
  2069  				tr.Update(res.hashes[i][j][:], res.slots[i][j])
  2070  			}
  2071  			tr.Commit()
  2072  		}
  2073  		// Persist the received storage segments. These flat state maybe
  2074  		// outdated during the sync, but it can be fixed later during the
  2075  		// snapshot generation.
  2076  		for j := 0; j < len(res.hashes[i]); j++ {
  2077  			rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j])
  2078  
  2079  			// If we're storing large contracts, generate the trie nodes
  2080  			// on the fly to not trash the gluing points
  2081  			if i == len(res.hashes)-1 && res.subTask != nil {
  2082  				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
  2083  			}
  2084  		}
  2085  	}
  2086  	// Large contracts could have generated new trie nodes, flush them to disk
  2087  	if res.subTask != nil {
  2088  		if res.subTask.done {
  2089  			if root, err := res.subTask.genTrie.Commit(); err != nil {
  2090  				log.Error("Failed to commit stack slots", "err", err)
  2091  			} else if root == res.subTask.root {
  2092  				// If the chunk's root is an overflown but full delivery, clear the heal request
  2093  				for i, account := range res.mainTask.res.hashes {
  2094  					if account == res.accounts[len(res.accounts)-1] {
  2095  						res.mainTask.needHeal[i] = false
  2096  					}
  2097  				}
  2098  			}
  2099  		}
  2100  		if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || res.subTask.done {
  2101  			if err := res.subTask.genBatch.Write(); err != nil {
  2102  				log.Error("Failed to persist stack slots", "err", err)
  2103  			}
  2104  			res.subTask.genBatch.Reset()
  2105  		}
  2106  	}
  2107  	// Flush anything written just now and update the stats
  2108  	if err := batch.Write(); err != nil {
  2109  		log.Crit("Failed to persist storage slots", "err", err)
  2110  	}
  2111  	s.storageSynced += uint64(slots)
  2112  
  2113  	log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes)
  2114  
  2115  	// If this delivery completed the last pending task, forward the account task
  2116  	// to the next chunk
  2117  	if res.mainTask.pend == 0 {
  2118  		s.forwardAccountTask(res.mainTask)
  2119  		return
  2120  	}
  2121  	// Some accounts are still incomplete, leave as is for the storage and contract
  2122  	// task assigners to pick up and fill.
  2123  }
  2124  
  2125  // processTrienodeHealResponse integrates an already validated trienode response
  2126  // into the healer tasks.
  2127  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  2128  	var (
  2129  		start = time.Now()
  2130  		fills int
  2131  	)
  2132  	for i, hash := range res.hashes {
  2133  		node := res.nodes[i]
  2134  
  2135  		// If the trie node was not delivered, reschedule it
  2136  		if node == nil {
  2137  			res.task.trieTasks[res.paths[i]] = res.hashes[i]
  2138  			continue
  2139  		}
  2140  		fills++
  2141  
  2142  		// Push the trie node into the state syncer
  2143  		s.trienodeHealSynced++
  2144  		s.trienodeHealBytes += common.StorageSize(len(node))
  2145  
  2146  		err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node})
  2147  		switch err {
  2148  		case nil:
  2149  		case trie.ErrAlreadyProcessed:
  2150  			s.trienodeHealDups++
  2151  		case trie.ErrNotRequested:
  2152  			s.trienodeHealNops++
  2153  		default:
  2154  			log.Error("Invalid trienode processed", "hash", hash, "err", err)
  2155  		}
  2156  	}
  2157  	batch := s.db.NewBatch()
  2158  	if err := s.healer.scheduler.Commit(batch); err != nil {
  2159  		log.Error("Failed to commit healing data", "err", err)
  2160  	}
  2161  	if err := batch.Write(); err != nil {
  2162  		log.Crit("Failed to persist healing data", "err", err)
  2163  	}
  2164  	log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  2165  
  2166  	// Calculate the processing rate of one filled trie node
  2167  	rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second))
  2168  
  2169  	// Update the currently measured trienode queueing and processing throughput.
  2170  	//
  2171  	// The processing rate needs to be updated uniformly independent if we've
  2172  	// processed 1x100 trie nodes or 100x1 to keep the rate consistent even in
  2173  	// the face of varying network packets. As such, we cannot just measure the
  2174  	// time it took to process N trie nodes and update once, we need one update
  2175  	// per trie node.
  2176  	//
  2177  	// Naively, that would be:
  2178  	//
  2179  	//   for i:=0; i<fills; i++ {
  2180  	//     healRate = (1-measurementImpact)*oldRate + measurementImpact*newRate
  2181  	//   }
  2182  	//
  2183  	// Essentially, a recursive expansion of HR = (1-MI)*HR + MI*NR.
  2184  	//
  2185  	// We can expand that formula for the Nth item as:
  2186  	//   HR(N) = (1-MI)^N*OR + (1-MI)^(N-1)*MI*NR + (1-MI)^(N-2)*MI*NR + ... + (1-MI)^0*MI*NR
  2187  	//
  2188  	// The above is a geometric sequence that can be summed to:
  2189  	//   HR(N) = (1-MI)^N*(OR-NR) + NR
  2190  	s.trienodeHealRate = gomath.Pow(1-trienodeHealRateMeasurementImpact, float64(fills))*(s.trienodeHealRate-rate) + rate
  2191  
  2192  	pending := atomic.LoadUint64(&s.trienodeHealPend)
  2193  	if time.Since(s.trienodeHealThrottled) > time.Second {
  2194  		// Periodically adjust the trie node throttler
  2195  		if float64(pending) > 2*s.trienodeHealRate {
  2196  			s.trienodeHealThrottle *= trienodeHealThrottleIncrease
  2197  		} else {
  2198  			s.trienodeHealThrottle /= trienodeHealThrottleDecrease
  2199  		}
  2200  		if s.trienodeHealThrottle > maxTrienodeHealThrottle {
  2201  			s.trienodeHealThrottle = maxTrienodeHealThrottle
  2202  		} else if s.trienodeHealThrottle < minTrienodeHealThrottle {
  2203  			s.trienodeHealThrottle = minTrienodeHealThrottle
  2204  		}
  2205  		s.trienodeHealThrottled = time.Now()
  2206  
  2207  		log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle)
  2208  	}
  2209  }
  2210  
  2211  // processBytecodeHealResponse integrates an already validated bytecode response
  2212  // into the healer tasks.
  2213  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  2214  	for i, hash := range res.hashes {
  2215  		node := res.codes[i]
  2216  
  2217  		// If the trie node was not delivered, reschedule it
  2218  		if node == nil {
  2219  			res.task.codeTasks[hash] = struct{}{}
  2220  			continue
  2221  		}
  2222  		// Push the trie node into the state syncer
  2223  		s.bytecodeHealSynced++
  2224  		s.bytecodeHealBytes += common.StorageSize(len(node))
  2225  
  2226  		err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node})
  2227  		switch err {
  2228  		case nil:
  2229  		case trie.ErrAlreadyProcessed:
  2230  			s.bytecodeHealDups++
  2231  		case trie.ErrNotRequested:
  2232  			s.bytecodeHealNops++
  2233  		default:
  2234  			log.Error("Invalid bytecode processed", "hash", hash, "err", err)
  2235  		}
  2236  	}
  2237  	batch := s.db.NewBatch()
  2238  	if err := s.healer.scheduler.Commit(batch); err != nil {
  2239  		log.Error("Failed to commit healing data", "err", err)
  2240  	}
  2241  	if err := batch.Write(); err != nil {
  2242  		log.Crit("Failed to persist healing data", "err", err)
  2243  	}
  2244  	log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  2245  }
  2246  
  2247  // forwardAccountTask takes a filled account task and persists anything available
  2248  // into the database, after which it forwards the next account marker so that the
  2249  // task's next chunk may be filled.
  2250  func (s *Syncer) forwardAccountTask(task *accountTask) {
  2251  	// Remove any pending delivery
  2252  	res := task.res
  2253  	if res == nil {
  2254  		return // nothing to forward
  2255  	}
  2256  	task.res = nil
  2257  
  2258  	// Persist the received account segments. These flat state maybe
  2259  	// outdated during the sync, but it can be fixed later during the
  2260  	// snapshot generation.
  2261  	oldAccountBytes := s.accountBytes
  2262  
  2263  	batch := ethdb.HookedBatch{
  2264  		Batch: s.db.NewBatch(),
  2265  		OnPut: func(key []byte, value []byte) {
  2266  			s.accountBytes += common.StorageSize(len(key) + len(value))
  2267  		},
  2268  	}
  2269  	for i, hash := range res.hashes {
  2270  		if task.needCode[i] || task.needState[i] {
  2271  			break
  2272  		}
  2273  		slim := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash)
  2274  		rawdb.WriteAccountSnapshot(batch, hash, slim)
  2275  
  2276  		// If the task is complete, drop it into the stack trie to generate
  2277  		// account trie nodes for it
  2278  		if !task.needHeal[i] {
  2279  			full, err := snapshot.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted
  2280  			if err != nil {
  2281  				panic(err) // Really shouldn't ever happen
  2282  			}
  2283  			task.genTrie.Update(hash[:], full)
  2284  		}
  2285  	}
  2286  	// Flush anything written just now and update the stats
  2287  	if err := batch.Write(); err != nil {
  2288  		log.Crit("Failed to persist accounts", "err", err)
  2289  	}
  2290  	s.accountSynced += uint64(len(res.accounts))
  2291  
  2292  	// Task filling persisted, push it the chunk marker forward to the first
  2293  	// account still missing data.
  2294  	for i, hash := range res.hashes {
  2295  		if task.needCode[i] || task.needState[i] {
  2296  			return
  2297  		}
  2298  		task.Next = incHash(hash)
  2299  	}
  2300  	// All accounts marked as complete, track if the entire task is done
  2301  	task.done = !res.cont
  2302  
  2303  	// Stack trie could have generated trie nodes, push them to disk (we need to
  2304  	// flush after finalizing task.done. It's fine even if we crash and lose this
  2305  	// write as it will only cause more data to be downloaded during heal.
  2306  	if task.done {
  2307  		if _, err := task.genTrie.Commit(); err != nil {
  2308  			log.Error("Failed to commit stack account", "err", err)
  2309  		}
  2310  	}
  2311  	if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done {
  2312  		if err := task.genBatch.Write(); err != nil {
  2313  			log.Error("Failed to persist stack account", "err", err)
  2314  		}
  2315  		task.genBatch.Reset()
  2316  	}
  2317  	log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes)
  2318  }
  2319  
  2320  // OnAccounts is a callback method to invoke when a range of accounts are
  2321  // received from a remote peer.
  2322  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  2323  	size := common.StorageSize(len(hashes) * common.HashLength)
  2324  	for _, account := range accounts {
  2325  		size += common.StorageSize(len(account))
  2326  	}
  2327  	for _, node := range proof {
  2328  		size += common.StorageSize(len(node))
  2329  	}
  2330  	logger := peer.Log().New("reqid", id)
  2331  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  2332  
  2333  	// Whether or not the response is valid, we can mark the peer as idle and
  2334  	// notify the scheduler to assign a new task. If the response is invalid,
  2335  	// we'll drop the peer in a bit.
  2336  	defer func() {
  2337  		s.lock.Lock()
  2338  		defer s.lock.Unlock()
  2339  		if _, ok := s.peers[peer.ID()]; ok {
  2340  			s.accountIdlers[peer.ID()] = struct{}{}
  2341  		}
  2342  		select {
  2343  		case s.update <- struct{}{}:
  2344  		default:
  2345  		}
  2346  	}()
  2347  	s.lock.Lock()
  2348  	// Ensure the response is for a valid request
  2349  	req, ok := s.accountReqs[id]
  2350  	if !ok {
  2351  		// Request stale, perhaps the peer timed out but came through in the end
  2352  		logger.Warn("Unexpected account range packet")
  2353  		s.lock.Unlock()
  2354  		return nil
  2355  	}
  2356  	delete(s.accountReqs, id)
  2357  	s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size))
  2358  
  2359  	// Clean up the request timeout timer, we'll see how to proceed further based
  2360  	// on the actual delivered content
  2361  	if !req.timeout.Stop() {
  2362  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2363  		s.lock.Unlock()
  2364  		return nil
  2365  	}
  2366  	// Response is valid, but check if peer is signalling that it does not have
  2367  	// the requested data. For account range queries that means the state being
  2368  	// retrieved was either already pruned remotely, or the peer is not yet
  2369  	// synced to our head.
  2370  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2371  		logger.Debug("Peer rejected account range request", "root", s.root)
  2372  		s.statelessPeers[peer.ID()] = struct{}{}
  2373  		s.lock.Unlock()
  2374  
  2375  		// Signal this request as failed, and ready for rescheduling
  2376  		s.scheduleRevertAccountRequest(req)
  2377  		return nil
  2378  	}
  2379  	root := s.root
  2380  	s.lock.Unlock()
  2381  
  2382  	// Reconstruct a partial trie from the response and verify it
  2383  	keys := make([][]byte, len(hashes))
  2384  	for i, key := range hashes {
  2385  		keys[i] = common.CopyBytes(key[:])
  2386  	}
  2387  	nodes := make(light.NodeList, len(proof))
  2388  	for i, node := range proof {
  2389  		nodes[i] = node
  2390  	}
  2391  	proofdb := nodes.NodeSet()
  2392  
  2393  	var end []byte
  2394  	if len(keys) > 0 {
  2395  		end = keys[len(keys)-1]
  2396  	}
  2397  	cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2398  	if err != nil {
  2399  		logger.Warn("Account range failed proof", "err", err)
  2400  		// Signal this request as failed, and ready for rescheduling
  2401  		s.scheduleRevertAccountRequest(req)
  2402  		return err
  2403  	}
  2404  	accs := make([]*types.StateAccount, len(accounts))
  2405  	for i, account := range accounts {
  2406  		acc := new(types.StateAccount)
  2407  		if err := rlp.DecodeBytes(account, acc); err != nil {
  2408  			panic(err) // We created these blobs, we must be able to decode them
  2409  		}
  2410  		accs[i] = acc
  2411  	}
  2412  	response := &accountResponse{
  2413  		task:     req.task,
  2414  		hashes:   hashes,
  2415  		accounts: accs,
  2416  		cont:     cont,
  2417  	}
  2418  	select {
  2419  	case req.deliver <- response:
  2420  	case <-req.cancel:
  2421  	case <-req.stale:
  2422  	}
  2423  	return nil
  2424  }
  2425  
  2426  // OnByteCodes is a callback method to invoke when a batch of contract
  2427  // bytes codes are received from a remote peer.
  2428  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2429  	s.lock.RLock()
  2430  	syncing := !s.snapped
  2431  	s.lock.RUnlock()
  2432  
  2433  	if syncing {
  2434  		return s.onByteCodes(peer, id, bytecodes)
  2435  	}
  2436  	return s.onHealByteCodes(peer, id, bytecodes)
  2437  }
  2438  
  2439  // onByteCodes is a callback method to invoke when a batch of contract
  2440  // bytes codes are received from a remote peer in the syncing phase.
  2441  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2442  	var size common.StorageSize
  2443  	for _, code := range bytecodes {
  2444  		size += common.StorageSize(len(code))
  2445  	}
  2446  	logger := peer.Log().New("reqid", id)
  2447  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2448  
  2449  	// Whether or not the response is valid, we can mark the peer as idle and
  2450  	// notify the scheduler to assign a new task. If the response is invalid,
  2451  	// we'll drop the peer in a bit.
  2452  	defer func() {
  2453  		s.lock.Lock()
  2454  		defer s.lock.Unlock()
  2455  		if _, ok := s.peers[peer.ID()]; ok {
  2456  			s.bytecodeIdlers[peer.ID()] = struct{}{}
  2457  		}
  2458  		select {
  2459  		case s.update <- struct{}{}:
  2460  		default:
  2461  		}
  2462  	}()
  2463  	s.lock.Lock()
  2464  	// Ensure the response is for a valid request
  2465  	req, ok := s.bytecodeReqs[id]
  2466  	if !ok {
  2467  		// Request stale, perhaps the peer timed out but came through in the end
  2468  		logger.Warn("Unexpected bytecode packet")
  2469  		s.lock.Unlock()
  2470  		return nil
  2471  	}
  2472  	delete(s.bytecodeReqs, id)
  2473  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2474  
  2475  	// Clean up the request timeout timer, we'll see how to proceed further based
  2476  	// on the actual delivered content
  2477  	if !req.timeout.Stop() {
  2478  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2479  		s.lock.Unlock()
  2480  		return nil
  2481  	}
  2482  
  2483  	// Response is valid, but check if peer is signalling that it does not have
  2484  	// the requested data. For bytecode range queries that means the peer is not
  2485  	// yet synced.
  2486  	if len(bytecodes) == 0 {
  2487  		logger.Debug("Peer rejected bytecode request")
  2488  		s.statelessPeers[peer.ID()] = struct{}{}
  2489  		s.lock.Unlock()
  2490  
  2491  		// Signal this request as failed, and ready for rescheduling
  2492  		s.scheduleRevertBytecodeRequest(req)
  2493  		return nil
  2494  	}
  2495  	s.lock.Unlock()
  2496  
  2497  	// Cross reference the requested bytecodes with the response to find gaps
  2498  	// that the serving node is missing
  2499  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2500  	hash := make([]byte, 32)
  2501  
  2502  	codes := make([][]byte, len(req.hashes))
  2503  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2504  		// Find the next hash that we've been served, leaving misses with nils
  2505  		hasher.Reset()
  2506  		hasher.Write(bytecodes[i])
  2507  		hasher.Read(hash)
  2508  
  2509  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2510  			j++
  2511  		}
  2512  		if j < len(req.hashes) {
  2513  			codes[j] = bytecodes[i]
  2514  			j++
  2515  			continue
  2516  		}
  2517  		// We've either ran out of hashes, or got unrequested data
  2518  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2519  		// Signal this request as failed, and ready for rescheduling
  2520  		s.scheduleRevertBytecodeRequest(req)
  2521  		return errors.New("unexpected bytecode")
  2522  	}
  2523  	// Response validated, send it to the scheduler for filling
  2524  	response := &bytecodeResponse{
  2525  		task:   req.task,
  2526  		hashes: req.hashes,
  2527  		codes:  codes,
  2528  	}
  2529  	select {
  2530  	case req.deliver <- response:
  2531  	case <-req.cancel:
  2532  	case <-req.stale:
  2533  	}
  2534  	return nil
  2535  }
  2536  
  2537  // OnStorage is a callback method to invoke when ranges of storage slots
  2538  // are received from a remote peer.
  2539  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2540  	// Gather some trace stats to aid in debugging issues
  2541  	var (
  2542  		hashCount int
  2543  		slotCount int
  2544  		size      common.StorageSize
  2545  	)
  2546  	for _, hashset := range hashes {
  2547  		size += common.StorageSize(common.HashLength * len(hashset))
  2548  		hashCount += len(hashset)
  2549  	}
  2550  	for _, slotset := range slots {
  2551  		for _, slot := range slotset {
  2552  			size += common.StorageSize(len(slot))
  2553  		}
  2554  		slotCount += len(slotset)
  2555  	}
  2556  	for _, node := range proof {
  2557  		size += common.StorageSize(len(node))
  2558  	}
  2559  	logger := peer.Log().New("reqid", id)
  2560  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2561  
  2562  	// Whether or not the response is valid, we can mark the peer as idle and
  2563  	// notify the scheduler to assign a new task. If the response is invalid,
  2564  	// we'll drop the peer in a bit.
  2565  	defer func() {
  2566  		s.lock.Lock()
  2567  		defer s.lock.Unlock()
  2568  		if _, ok := s.peers[peer.ID()]; ok {
  2569  			s.storageIdlers[peer.ID()] = struct{}{}
  2570  		}
  2571  		select {
  2572  		case s.update <- struct{}{}:
  2573  		default:
  2574  		}
  2575  	}()
  2576  	s.lock.Lock()
  2577  	// Ensure the response is for a valid request
  2578  	req, ok := s.storageReqs[id]
  2579  	if !ok {
  2580  		// Request stale, perhaps the peer timed out but came through in the end
  2581  		logger.Warn("Unexpected storage ranges packet")
  2582  		s.lock.Unlock()
  2583  		return nil
  2584  	}
  2585  	delete(s.storageReqs, id)
  2586  	s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size))
  2587  
  2588  	// Clean up the request timeout timer, we'll see how to proceed further based
  2589  	// on the actual delivered content
  2590  	if !req.timeout.Stop() {
  2591  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2592  		s.lock.Unlock()
  2593  		return nil
  2594  	}
  2595  
  2596  	// Reject the response if the hash sets and slot sets don't match, or if the
  2597  	// peer sent more data than requested.
  2598  	if len(hashes) != len(slots) {
  2599  		s.lock.Unlock()
  2600  		s.scheduleRevertStorageRequest(req) // reschedule request
  2601  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2602  		return errors.New("hash and slot set size mismatch")
  2603  	}
  2604  	if len(hashes) > len(req.accounts) {
  2605  		s.lock.Unlock()
  2606  		s.scheduleRevertStorageRequest(req) // reschedule request
  2607  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2608  		return errors.New("hash set larger than requested")
  2609  	}
  2610  	// Response is valid, but check if peer is signalling that it does not have
  2611  	// the requested data. For storage range queries that means the state being
  2612  	// retrieved was either already pruned remotely, or the peer is not yet
  2613  	// synced to our head.
  2614  	if len(hashes) == 0 {
  2615  		logger.Debug("Peer rejected storage request")
  2616  		s.statelessPeers[peer.ID()] = struct{}{}
  2617  		s.lock.Unlock()
  2618  		s.scheduleRevertStorageRequest(req) // reschedule request
  2619  		return nil
  2620  	}
  2621  	s.lock.Unlock()
  2622  
  2623  	// Reconstruct the partial tries from the response and verify them
  2624  	var cont bool
  2625  
  2626  	for i := 0; i < len(hashes); i++ {
  2627  		// Convert the keys and proofs into an internal format
  2628  		keys := make([][]byte, len(hashes[i]))
  2629  		for j, key := range hashes[i] {
  2630  			keys[j] = common.CopyBytes(key[:])
  2631  		}
  2632  		nodes := make(light.NodeList, 0, len(proof))
  2633  		if i == len(hashes)-1 {
  2634  			for _, node := range proof {
  2635  				nodes = append(nodes, node)
  2636  			}
  2637  		}
  2638  		var err error
  2639  		if len(nodes) == 0 {
  2640  			// No proof has been attached, the response must cover the entire key
  2641  			// space and hash to the origin root.
  2642  			_, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2643  			if err != nil {
  2644  				s.scheduleRevertStorageRequest(req) // reschedule request
  2645  				logger.Warn("Storage slots failed proof", "err", err)
  2646  				return err
  2647  			}
  2648  		} else {
  2649  			// A proof was attached, the response is only partial, check that the
  2650  			// returned data is indeed part of the storage trie
  2651  			proofdb := nodes.NodeSet()
  2652  
  2653  			var end []byte
  2654  			if len(keys) > 0 {
  2655  				end = keys[len(keys)-1]
  2656  			}
  2657  			cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2658  			if err != nil {
  2659  				s.scheduleRevertStorageRequest(req) // reschedule request
  2660  				logger.Warn("Storage range failed proof", "err", err)
  2661  				return err
  2662  			}
  2663  		}
  2664  	}
  2665  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2666  	response := &storageResponse{
  2667  		mainTask: req.mainTask,
  2668  		subTask:  req.subTask,
  2669  		accounts: req.accounts,
  2670  		roots:    req.roots,
  2671  		hashes:   hashes,
  2672  		slots:    slots,
  2673  		cont:     cont,
  2674  	}
  2675  	select {
  2676  	case req.deliver <- response:
  2677  	case <-req.cancel:
  2678  	case <-req.stale:
  2679  	}
  2680  	return nil
  2681  }
  2682  
  2683  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2684  // are received from a remote peer.
  2685  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2686  	var size common.StorageSize
  2687  	for _, node := range trienodes {
  2688  		size += common.StorageSize(len(node))
  2689  	}
  2690  	logger := peer.Log().New("reqid", id)
  2691  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2692  
  2693  	// Whether or not the response is valid, we can mark the peer as idle and
  2694  	// notify the scheduler to assign a new task. If the response is invalid,
  2695  	// we'll drop the peer in a bit.
  2696  	defer func() {
  2697  		s.lock.Lock()
  2698  		defer s.lock.Unlock()
  2699  		if _, ok := s.peers[peer.ID()]; ok {
  2700  			s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2701  		}
  2702  		select {
  2703  		case s.update <- struct{}{}:
  2704  		default:
  2705  		}
  2706  	}()
  2707  	s.lock.Lock()
  2708  	// Ensure the response is for a valid request
  2709  	req, ok := s.trienodeHealReqs[id]
  2710  	if !ok {
  2711  		// Request stale, perhaps the peer timed out but came through in the end
  2712  		logger.Warn("Unexpected trienode heal packet")
  2713  		s.lock.Unlock()
  2714  		return nil
  2715  	}
  2716  	delete(s.trienodeHealReqs, id)
  2717  	s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes))
  2718  
  2719  	// Clean up the request timeout timer, we'll see how to proceed further based
  2720  	// on the actual delivered content
  2721  	if !req.timeout.Stop() {
  2722  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2723  		s.lock.Unlock()
  2724  		return nil
  2725  	}
  2726  
  2727  	// Response is valid, but check if peer is signalling that it does not have
  2728  	// the requested data. For bytecode range queries that means the peer is not
  2729  	// yet synced.
  2730  	if len(trienodes) == 0 {
  2731  		logger.Debug("Peer rejected trienode heal request")
  2732  		s.statelessPeers[peer.ID()] = struct{}{}
  2733  		s.lock.Unlock()
  2734  
  2735  		// Signal this request as failed, and ready for rescheduling
  2736  		s.scheduleRevertTrienodeHealRequest(req)
  2737  		return nil
  2738  	}
  2739  	s.lock.Unlock()
  2740  
  2741  	// Cross reference the requested trienodes with the response to find gaps
  2742  	// that the serving node is missing
  2743  	var (
  2744  		hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2745  		hash   = make([]byte, 32)
  2746  		nodes  = make([][]byte, len(req.hashes))
  2747  		fills  uint64
  2748  	)
  2749  	for i, j := 0, 0; i < len(trienodes); i++ {
  2750  		// Find the next hash that we've been served, leaving misses with nils
  2751  		hasher.Reset()
  2752  		hasher.Write(trienodes[i])
  2753  		hasher.Read(hash)
  2754  
  2755  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2756  			j++
  2757  		}
  2758  		if j < len(req.hashes) {
  2759  			nodes[j] = trienodes[i]
  2760  			fills++
  2761  			j++
  2762  			continue
  2763  		}
  2764  		// We've either ran out of hashes, or got unrequested data
  2765  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2766  
  2767  		// Signal this request as failed, and ready for rescheduling
  2768  		s.scheduleRevertTrienodeHealRequest(req)
  2769  		return errors.New("unexpected healing trienode")
  2770  	}
  2771  	// Response validated, send it to the scheduler for filling
  2772  	atomic.AddUint64(&s.trienodeHealPend, fills)
  2773  	defer func() {
  2774  		atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1))
  2775  	}()
  2776  	response := &trienodeHealResponse{
  2777  		paths:  req.paths,
  2778  		task:   req.task,
  2779  		hashes: req.hashes,
  2780  		nodes:  nodes,
  2781  	}
  2782  	select {
  2783  	case req.deliver <- response:
  2784  	case <-req.cancel:
  2785  	case <-req.stale:
  2786  	}
  2787  	return nil
  2788  }
  2789  
  2790  // onHealByteCodes is a callback method to invoke when a batch of contract
  2791  // bytes codes are received from a remote peer in the healing phase.
  2792  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2793  	var size common.StorageSize
  2794  	for _, code := range bytecodes {
  2795  		size += common.StorageSize(len(code))
  2796  	}
  2797  	logger := peer.Log().New("reqid", id)
  2798  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2799  
  2800  	// Whether or not the response is valid, we can mark the peer as idle and
  2801  	// notify the scheduler to assign a new task. If the response is invalid,
  2802  	// we'll drop the peer in a bit.
  2803  	defer func() {
  2804  		s.lock.Lock()
  2805  		defer s.lock.Unlock()
  2806  		if _, ok := s.peers[peer.ID()]; ok {
  2807  			s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2808  		}
  2809  		select {
  2810  		case s.update <- struct{}{}:
  2811  		default:
  2812  		}
  2813  	}()
  2814  	s.lock.Lock()
  2815  	// Ensure the response is for a valid request
  2816  	req, ok := s.bytecodeHealReqs[id]
  2817  	if !ok {
  2818  		// Request stale, perhaps the peer timed out but came through in the end
  2819  		logger.Warn("Unexpected bytecode heal packet")
  2820  		s.lock.Unlock()
  2821  		return nil
  2822  	}
  2823  	delete(s.bytecodeHealReqs, id)
  2824  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2825  
  2826  	// Clean up the request timeout timer, we'll see how to proceed further based
  2827  	// on the actual delivered content
  2828  	if !req.timeout.Stop() {
  2829  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2830  		s.lock.Unlock()
  2831  		return nil
  2832  	}
  2833  
  2834  	// Response is valid, but check if peer is signalling that it does not have
  2835  	// the requested data. For bytecode range queries that means the peer is not
  2836  	// yet synced.
  2837  	if len(bytecodes) == 0 {
  2838  		logger.Debug("Peer rejected bytecode heal request")
  2839  		s.statelessPeers[peer.ID()] = struct{}{}
  2840  		s.lock.Unlock()
  2841  
  2842  		// Signal this request as failed, and ready for rescheduling
  2843  		s.scheduleRevertBytecodeHealRequest(req)
  2844  		return nil
  2845  	}
  2846  	s.lock.Unlock()
  2847  
  2848  	// Cross reference the requested bytecodes with the response to find gaps
  2849  	// that the serving node is missing
  2850  	hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState)
  2851  	hash := make([]byte, 32)
  2852  
  2853  	codes := make([][]byte, len(req.hashes))
  2854  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2855  		// Find the next hash that we've been served, leaving misses with nils
  2856  		hasher.Reset()
  2857  		hasher.Write(bytecodes[i])
  2858  		hasher.Read(hash)
  2859  
  2860  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2861  			j++
  2862  		}
  2863  		if j < len(req.hashes) {
  2864  			codes[j] = bytecodes[i]
  2865  			j++
  2866  			continue
  2867  		}
  2868  		// We've either ran out of hashes, or got unrequested data
  2869  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2870  		// Signal this request as failed, and ready for rescheduling
  2871  		s.scheduleRevertBytecodeHealRequest(req)
  2872  		return errors.New("unexpected healing bytecode")
  2873  	}
  2874  	// Response validated, send it to the scheduler for filling
  2875  	response := &bytecodeHealResponse{
  2876  		task:   req.task,
  2877  		hashes: req.hashes,
  2878  		codes:  codes,
  2879  	}
  2880  	select {
  2881  	case req.deliver <- response:
  2882  	case <-req.cancel:
  2883  	case <-req.stale:
  2884  	}
  2885  	return nil
  2886  }
  2887  
  2888  // onHealState is a callback method to invoke when a flat state(account
  2889  // or storage slot) is downloaded during the healing stage. The flat states
  2890  // can be persisted blindly and can be fixed later in the generation stage.
  2891  // Note it's not concurrent safe, please handle the concurrent issue outside.
  2892  func (s *Syncer) onHealState(paths [][]byte, value []byte) error {
  2893  	if len(paths) == 1 {
  2894  		var account types.StateAccount
  2895  		if err := rlp.DecodeBytes(value, &account); err != nil {
  2896  			return nil // Returning the error here would drop the remote peer
  2897  		}
  2898  		blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash)
  2899  		rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob)
  2900  		s.accountHealed += 1
  2901  		s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob))
  2902  	}
  2903  	if len(paths) == 2 {
  2904  		rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value)
  2905  		s.storageHealed += 1
  2906  		s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value))
  2907  	}
  2908  	if s.stateWriter.ValueSize() > ethdb.IdealBatchSize {
  2909  		s.stateWriter.Write() // It's fine to ignore the error here
  2910  		s.stateWriter.Reset()
  2911  	}
  2912  	return nil
  2913  }
  2914  
  2915  // hashSpace is the total size of the 256 bit hash space for accounts.
  2916  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2917  
  2918  // report calculates various status reports and provides it to the user.
  2919  func (s *Syncer) report(force bool) {
  2920  	if len(s.tasks) > 0 {
  2921  		s.reportSyncProgress(force)
  2922  		return
  2923  	}
  2924  	s.reportHealProgress(force)
  2925  }
  2926  
  2927  // reportSyncProgress calculates various status reports and provides it to the user.
  2928  func (s *Syncer) reportSyncProgress(force bool) {
  2929  	// Don't report all the events, just occasionally
  2930  	if !force && time.Since(s.logTime) < 8*time.Second {
  2931  		return
  2932  	}
  2933  	// Don't report anything until we have a meaningful progress
  2934  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2935  	if synced == 0 {
  2936  		return
  2937  	}
  2938  	accountGaps := new(big.Int)
  2939  	for _, task := range s.tasks {
  2940  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2941  	}
  2942  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2943  	if accountFills.BitLen() == 0 {
  2944  		return
  2945  	}
  2946  	s.logTime = time.Now()
  2947  	estBytes := float64(new(big.Int).Div(
  2948  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2949  		accountFills,
  2950  	).Uint64())
  2951  	// Don't report anything until we have a meaningful progress
  2952  	if estBytes < 1.0 {
  2953  		return
  2954  	}
  2955  	elapsed := time.Since(s.startTime)
  2956  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2957  
  2958  	// Create a mega progress report
  2959  	var (
  2960  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2961  		accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountSynced), s.accountBytes.TerminalString())
  2962  		storage  = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageSynced), s.storageBytes.TerminalString())
  2963  		bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeSynced), s.bytecodeBytes.TerminalString())
  2964  	)
  2965  	log.Info("State sync in progress", "synced", progress, "state", synced,
  2966  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2967  }
  2968  
  2969  // reportHealProgress calculates various status reports and provides it to the user.
  2970  func (s *Syncer) reportHealProgress(force bool) {
  2971  	// Don't report all the events, just occasionally
  2972  	if !force && time.Since(s.logTime) < 8*time.Second {
  2973  		return
  2974  	}
  2975  	s.logTime = time.Now()
  2976  
  2977  	// Create a mega progress report
  2978  	var (
  2979  		trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString())
  2980  		bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString())
  2981  		accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString())
  2982  		storage  = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString())
  2983  	)
  2984  	log.Info("State heal in progress", "accounts", accounts, "slots", storage,
  2985  		"codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending())
  2986  }
  2987  
  2988  // estimateRemainingSlots tries to determine roughly how many slots are left in
  2989  // a contract storage, based on the number of keys and the last hash. This method
  2990  // assumes that the hashes are lexicographically ordered and evenly distributed.
  2991  func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
  2992  	if last == (common.Hash{}) {
  2993  		return 0, errors.New("last hash empty")
  2994  	}
  2995  	space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes)))
  2996  	space.Div(space, last.Big())
  2997  	if !space.IsUint64() {
  2998  		// Gigantic address space probably due to too few or malicious slots
  2999  		return 0, errors.New("too few slots for estimation")
  3000  	}
  3001  	return space.Uint64() - uint64(hashes), nil
  3002  }
  3003  
  3004  // capacitySort implements the Sort interface, allowing sorting by peer message
  3005  // throughput. Note, callers should use sort.Reverse to get the desired effect
  3006  // of highest capacity being at the front.
  3007  type capacitySort struct {
  3008  	ids  []string
  3009  	caps []int
  3010  }
  3011  
  3012  func (s *capacitySort) Len() int {
  3013  	return len(s.ids)
  3014  }
  3015  
  3016  func (s *capacitySort) Less(i, j int) bool {
  3017  	return s.caps[i] < s.caps[j]
  3018  }
  3019  
  3020  func (s *capacitySort) Swap(i, j int) {
  3021  	s.ids[i], s.ids[j] = s.ids[j], s.ids[i]
  3022  	s.caps[i], s.caps[j] = s.caps[j], s.caps[i]
  3023  }
  3024  
  3025  // healRequestSort implements the Sort interface, allowing sorting trienode
  3026  // heal requests, which is a prerequisite for merging storage-requests.
  3027  type healRequestSort struct {
  3028  	paths     []string
  3029  	hashes    []common.Hash
  3030  	syncPaths []trie.SyncPath
  3031  }
  3032  
  3033  func (t *healRequestSort) Len() int {
  3034  	return len(t.hashes)
  3035  }
  3036  
  3037  func (t *healRequestSort) Less(i, j int) bool {
  3038  	a := t.syncPaths[i]
  3039  	b := t.syncPaths[j]
  3040  	switch bytes.Compare(a[0], b[0]) {
  3041  	case -1:
  3042  		return true
  3043  	case 1:
  3044  		return false
  3045  	}
  3046  	// identical first part
  3047  	if len(a) < len(b) {
  3048  		return true
  3049  	}
  3050  	if len(b) < len(a) {
  3051  		return false
  3052  	}
  3053  	if len(a) == 2 {
  3054  		return bytes.Compare(a[1], b[1]) < 0
  3055  	}
  3056  	return false
  3057  }
  3058  
  3059  func (t *healRequestSort) Swap(i, j int) {
  3060  	t.paths[i], t.paths[j] = t.paths[j], t.paths[i]
  3061  	t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i]
  3062  	t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i]
  3063  }
  3064  
  3065  // Merge merges the pathsets, so that several storage requests concerning the
  3066  // same account are merged into one, to reduce bandwidth.
  3067  // OBS: This operation is moot if t has not first been sorted.
  3068  func (t *healRequestSort) Merge() []TrieNodePathSet {
  3069  	var result []TrieNodePathSet
  3070  	for _, path := range t.syncPaths {
  3071  		pathset := TrieNodePathSet([][]byte(path))
  3072  		if len(path) == 1 {
  3073  			// It's an account reference.
  3074  			result = append(result, pathset)
  3075  		} else {
  3076  			// It's a storage reference.
  3077  			end := len(result) - 1
  3078  			if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) {
  3079  				// The account doesn't match last, create a new entry.
  3080  				result = append(result, pathset)
  3081  			} else {
  3082  				// It's the same account as the previous one, add to the storage
  3083  				// paths of that request.
  3084  				result[end] = append(result[end], pathset[1])
  3085  			}
  3086  		}
  3087  	}
  3088  	return result
  3089  }
  3090  
  3091  // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates
  3092  // the TrieNodePaths and merges paths which belongs to the same account path.
  3093  func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) {
  3094  	var syncPaths []trie.SyncPath
  3095  	for _, path := range paths {
  3096  		syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path)))
  3097  	}
  3098  	n := &healRequestSort{paths, hashes, syncPaths}
  3099  	sort.Sort(n)
  3100  	pathsets := n.Merge()
  3101  	return n.paths, n.hashes, n.syncPaths, pathsets
  3102  }