github.com/klaytn/klaytn@v1.10.2/node/cn/snap/sync.go (about)

     1  // Modifications Copyright 2022 The klaytn Authors
     2  // Copyright 2020 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from eth/protocols/snap/sync.go (2022/06/29).
    19  // Modified and improved for the klaytn development.
    20  
    21  package snap
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  	"math/big"
    29  	"math/rand"
    30  	"sort"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/klaytn/klaytn/blockchain/state"
    35  	"github.com/klaytn/klaytn/blockchain/types/account"
    36  	"github.com/klaytn/klaytn/common"
    37  	"github.com/klaytn/klaytn/common/math"
    38  	"github.com/klaytn/klaytn/crypto"
    39  	"github.com/klaytn/klaytn/event"
    40  	"github.com/klaytn/klaytn/log"
    41  	"github.com/klaytn/klaytn/networks/p2p/msgrate"
    42  	"github.com/klaytn/klaytn/rlp"
    43  	"github.com/klaytn/klaytn/storage/database"
    44  	"github.com/klaytn/klaytn/storage/statedb"
    45  	"golang.org/x/crypto/sha3"
    46  )
    47  
    48  var logger = log.NewModuleLogger(log.SnapshotSync)
    49  
    50  var (
    51  	// emptyRoot is the known root hash of an empty trie.
    52  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    53  
    54  	// emptyCode is the known hash of the empty EVM bytecode.
    55  	emptyCode = crypto.Keccak256Hash(nil)
    56  )
    57  
    58  const (
    59  	// minRequestSize is the minimum number of bytes to request from a remote peer.
    60  	// This number is used as the low cap for account and storage range requests.
    61  	// Bytecode and trienode are limited inherently by item count (1).
    62  	minRequestSize = 64 * 1024
    63  
    64  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    65  	// This number is used as the high cap for account and storage range requests.
    66  	// Bytecode and trienode are limited more explicitly by the caps below.
    67  	maxRequestSize = 512 * 1024
    68  
    69  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    70  	// single query. If this number is too low, we're not filling responses fully
    71  	// and waste round trip times. If it's too high, we're capping responses and
    72  	// waste bandwidth.
    73  	//
    74  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    75  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    76  	// come close to that, requesting 4x should be a good approximation.
    77  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    78  
    79  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    80  	// a single query. If this number is too low, we're not filling responses fully
    81  	// and waste round trip times. If it's too high, we're capping responses and
    82  	// waste bandwidth.
    83  	maxTrieRequestCount = maxRequestSize / 512
    84  )
    85  
    86  var (
    87  	// accountConcurrency is the number of chunks to split the account trie into
    88  	// to allow concurrent retrievals.
    89  	accountConcurrency = 16
    90  
    91  	// storageConcurrency is the number of chunks to split the a large contract
    92  	// storage trie into to allow concurrent retrievals.
    93  	storageConcurrency = 16
    94  )
    95  
    96  // ErrCancelled is returned from snap syncing if the operation was prematurely
    97  // terminated.
    98  var ErrCancelled = errors.New("sync cancelled")
    99  
   100  // accountRequest tracks a pending account range request to ensure responses are
   101  // to actual requests and to validate any security constraints.
   102  //
   103  // Concurrency note: account requests and responses are handled concurrently from
   104  // the main runloop to allow Merkle proof verifications on the peer's thread and
   105  // to drop on invalid response. The request struct must contain all the data to
   106  // construct the response without accessing runloop internals (i.e. task). That
   107  // is only included to allow the runloop to match a response to the task being
   108  // synced without having yet another set of maps.
   109  type accountRequest struct {
   110  	peer string    // Peer to which this request is assigned
   111  	id   uint64    // Request ID of this request
   112  	time time.Time // Timestamp when the request was sent
   113  
   114  	deliver chan *accountResponse // Channel to deliver successful response on
   115  	revert  chan *accountRequest  // Channel to deliver request failure on
   116  	cancel  chan struct{}         // Channel to track sync cancellation
   117  	timeout *time.Timer           // Timer to track delivery timeout
   118  	stale   chan struct{}         // Channel to signal the request was dropped
   119  
   120  	origin common.Hash // First account requested to allow continuation checks
   121  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   122  
   123  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   124  }
   125  
   126  // accountResponse is an already Merkle-verified remote response to an account
   127  // range request. It contains the subtrie for the requested account range and
   128  // the database that's going to be filled with the internal nodes on commit.
   129  type accountResponse struct {
   130  	task *accountTask // Task which this request is filling
   131  
   132  	hashes   []common.Hash     // Account hashes in the returned range
   133  	accounts []account.Account // Expanded accounts in the returned range
   134  
   135  	cont bool // Whether the account range has a continuation
   136  }
   137  
   138  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   139  // actual requests and to validate any security constraints.
   140  //
   141  // Concurrency note: bytecode requests and responses are handled concurrently from
   142  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   143  // to drop on invalid response. The request struct must contain all the data to
   144  // construct the response without accessing runloop internals (i.e. task). That
   145  // is only included to allow the runloop to match a response to the task being
   146  // synced without having yet another set of maps.
   147  type bytecodeRequest struct {
   148  	peer string    // Peer to which this request is assigned
   149  	id   uint64    // Request ID of this request
   150  	time time.Time // Timestamp when the request was sent
   151  
   152  	deliver chan *bytecodeResponse // Channel to deliver successful response on
   153  	revert  chan *bytecodeRequest  // Channel to deliver request failure on
   154  	cancel  chan struct{}          // Channel to track sync cancellation
   155  	timeout *time.Timer            // Timer to track delivery timeout
   156  	stale   chan struct{}          // Channel to signal the request was dropped
   157  
   158  	hashes []common.Hash // Bytecode hashes to validate responses
   159  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   160  }
   161  
   162  // bytecodeResponse is an already verified remote response to a bytecode request.
   163  type bytecodeResponse struct {
   164  	task *accountTask // Task which this request is filling
   165  
   166  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   167  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   168  }
   169  
   170  // storageRequest tracks a pending storage ranges request to ensure responses are
   171  // to actual requests and to validate any security constraints.
   172  //
   173  // Concurrency note: storage requests and responses are handled concurrently from
   174  // the main runloop to allow Merkle proof verifications on the peer's thread and
   175  // to drop on invalid response. The request struct must contain all the data to
   176  // construct the response without accessing runloop internals (i.e. tasks). That
   177  // is only included to allow the runloop to match a response to the task being
   178  // synced without having yet another set of maps.
   179  type storageRequest struct {
   180  	peer string    // Peer to which this request is assigned
   181  	id   uint64    // Request ID of this request
   182  	time time.Time // Timestamp when the request was sent
   183  
   184  	deliver chan *storageResponse // Channel to deliver successful response on
   185  	revert  chan *storageRequest  // Channel to deliver request failure on
   186  	cancel  chan struct{}         // Channel to track sync cancellation
   187  	timeout *time.Timer           // Timer to track delivery timeout
   188  	stale   chan struct{}         // Channel to signal the request was dropped
   189  
   190  	accounts []common.Hash // Account hashes to validate responses
   191  	roots    []common.Hash // Storage roots to validate responses
   192  
   193  	origin common.Hash // First storage slot requested to allow continuation checks
   194  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   195  
   196  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   197  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   198  }
   199  
   200  // storageResponse is an already Merkle-verified remote response to a storage
   201  // range request. It contains the subtries for the requested storage ranges and
   202  // the databases that's going to be filled with the internal nodes on commit.
   203  type storageResponse struct {
   204  	mainTask *accountTask // Task which this response belongs to
   205  	subTask  *storageTask // Task which this response is filling
   206  
   207  	accounts []common.Hash // Account hashes requested, may be only partially filled
   208  	roots    []common.Hash // Storage roots requested, may be only partially filled
   209  
   210  	hashes [][]common.Hash // Storage slot hashes in the returned range
   211  	slots  [][][]byte      // Storage slot values in the returned range
   212  
   213  	cont bool // Whether the last storage range has a continuation
   214  }
   215  
   216  // trienodeHealRequest tracks a pending state trie request to ensure responses
   217  // are to actual requests and to validate any security constraints.
   218  //
   219  // Concurrency note: trie node requests and responses are handled concurrently from
   220  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   221  // to drop on invalid response. The request struct must contain all the data to
   222  // construct the response without accessing runloop internals (i.e. task). That
   223  // is only included to allow the runloop to match a response to the task being
   224  // synced without having yet another set of maps.
   225  type trienodeHealRequest struct {
   226  	peer string    // Peer to which this request is assigned
   227  	id   uint64    // Request ID of this request
   228  	time time.Time // Timestamp when the request was sent
   229  
   230  	deliver chan *trienodeHealResponse // Channel to deliver successful response on
   231  	revert  chan *trienodeHealRequest  // Channel to deliver request failure on
   232  	cancel  chan struct{}              // Channel to track sync cancellation
   233  	timeout *time.Timer                // Timer to track delivery timeout
   234  	stale   chan struct{}              // Channel to signal the request was dropped
   235  
   236  	hashes []common.Hash      // Trie node hashes to validate responses
   237  	paths  []statedb.SyncPath // Trie node paths requested for rescheduling
   238  
   239  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   240  }
   241  
   242  // trienodeHealResponse is an already verified remote response to a trie node request.
   243  type trienodeHealResponse struct {
   244  	task *healTask // Task which this request is filling
   245  
   246  	hashes []common.Hash      // Hashes of the trie nodes to avoid double hashing
   247  	paths  []statedb.SyncPath // Trie node paths requested for rescheduling missing ones
   248  	nodes  [][]byte           // Actual trie nodes to store into the database (nil = missing)
   249  }
   250  
   251  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   252  // actual requests and to validate any security constraints.
   253  //
   254  // Concurrency note: bytecode requests and responses are handled concurrently from
   255  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   256  // to drop on invalid response. The request struct must contain all the data to
   257  // construct the response without accessing runloop internals (i.e. task). That
   258  // is only included to allow the runloop to match a response to the task being
   259  // synced without having yet another set of maps.
   260  type bytecodeHealRequest struct {
   261  	peer string    // Peer to which this request is assigned
   262  	id   uint64    // Request ID of this request
   263  	time time.Time // Timestamp when the request was sent
   264  
   265  	deliver chan *bytecodeHealResponse // Channel to deliver successful response on
   266  	revert  chan *bytecodeHealRequest  // Channel to deliver request failure on
   267  	cancel  chan struct{}              // Channel to track sync cancellation
   268  	timeout *time.Timer                // Timer to track delivery timeout
   269  	stale   chan struct{}              // Channel to signal the request was dropped
   270  
   271  	hashes []common.Hash // Bytecode hashes to validate responses
   272  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   273  }
   274  
   275  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   276  type bytecodeHealResponse struct {
   277  	task *healTask // Task which this request is filling
   278  
   279  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   280  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   281  }
   282  
   283  // accountTask represents the sync task for a chunk of the account snapshot.
   284  type accountTask struct {
   285  	// These fields get serialized to leveldb on shutdown
   286  	Next     common.Hash                    // Next account to sync in this interval
   287  	Last     common.Hash                    // Last account to sync in this interval
   288  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   289  
   290  	// These fields are internals used during runtime
   291  	req  *accountRequest  // Pending request to fill this task
   292  	res  *accountResponse // Validate response filling this task
   293  	pend int              // Number of pending subtasks for this round
   294  
   295  	needCode  []bool // Flags whether the filling accounts need code retrieval
   296  	needState []bool // Flags whether the filling accounts need storage retrieval
   297  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   298  
   299  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   300  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   301  
   302  	// TODO-Klaytn-Snapsync consider to use stack trie
   303  	genTrie *statedb.Trie // Node generator from storage slots
   304  	trieDb  *statedb.Database
   305  
   306  	done bool // Flag whether the task can be removed
   307  }
   308  
   309  // storageTask represents the sync task for a chunk of the storage snapshot.
   310  type storageTask struct {
   311  	Next common.Hash // Next account to sync in this interval
   312  	Last common.Hash // Last account to sync in this interval
   313  
   314  	// These fields are internals used during runtime
   315  	root common.Hash     // Storage root hash for this instance
   316  	req  *storageRequest // Pending request to fill this task
   317  
   318  	// TODO-Klaytn-Snapsync consider to use stack trie
   319  	genTrie *statedb.Trie // Node generator from storage slots
   320  	trieDb  *statedb.Database
   321  
   322  	done bool // Flag whether the task can be removed
   323  }
   324  
   325  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   326  type healTask struct {
   327  	scheduler *statedb.TrieSync // State trie sync scheduler defining the tasks
   328  
   329  	trieTasks map[common.Hash]statedb.SyncPath // Set of trie node tasks currently queued for retrieval
   330  	codeTasks map[common.Hash]struct{}         // Set of byte code tasks currently queued for retrieval
   331  }
   332  
   333  // SyncProgress is a database entry to allow suspending and resuming a snapshot state
   334  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   335  // snap sync without prior knowledge of the suspension point.
   336  type SyncProgress struct {
   337  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   338  
   339  	// Status report during syncing phase
   340  	AccountSynced  uint64             // Number of accounts downloaded
   341  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   342  	BytecodeSynced uint64             // Number of bytecodes downloaded
   343  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   344  	StorageSynced  uint64             // Number of storage slots downloaded
   345  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   346  
   347  	// Status report during healing phase
   348  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   349  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   350  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   351  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   352  }
   353  
   354  // SyncPending is analogous to SyncProgress, but it's used to report on pending
   355  // ephemeral sync progress that doesn't get persisted into the database.
   356  type SyncPending struct {
   357  	TrienodeHeal uint64 // Number of state trie nodes pending
   358  	BytecodeHeal uint64 // Number of bytecodes pending
   359  }
   360  
   361  // SyncPeer abstracts out the methods required for a peer to be synced against
   362  // with the goal of allowing the construction of mock peers without the full
   363  // blown networking.
   364  type SyncPeer interface {
   365  	// ID retrieves the peer's unique identifier.
   366  	ID() string
   367  
   368  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   369  	// trie, starting with the origin.
   370  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   371  
   372  	// RequestStorageRanges fetches a batch of storage slots belonging to one or
   373  	// more accounts. If slots from only one accout is requested, an origin marker
   374  	// may also be used to retrieve from there.
   375  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   376  
   377  	// RequestByteCodes fetches a batch of bytecodes by hash.
   378  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   379  
   380  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   381  	// a specificstate trie.
   382  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   383  
   384  	// Log retrieves the peer's own contextual logger.
   385  	Log() log.Logger
   386  }
   387  
   388  // Syncer is an Klaytn account and storage trie syncer based on snapshots and
   389  // the snap protocol. It's purpose is to download all the accounts and storage
   390  // slots from remote peers and reassemble chunks of the state trie, on top of
   391  // which a state sync can be run to fix any gaps / overlaps.
   392  //
   393  // Every network request has a variety of failure events:
   394  //   - The peer disconnects after task assignment, failing to send the request
   395  //   - The peer disconnects after sending the request, before delivering on it
   396  //   - The peer remains connected, but does not deliver a response in time
   397  //   - The peer delivers a stale response after a previous timeout
   398  //   - The peer delivers a refusal to serve the requested state
   399  type Syncer struct {
   400  	db database.DBManager // Database to store the trie nodes into (and dedup)
   401  
   402  	root    common.Hash    // Current state trie root being synced
   403  	tasks   []*accountTask // Current account task set being synced
   404  	snapped bool           // Flag to signal that snap phase is done
   405  	healer  *healTask      // Current state healing task being executed
   406  	update  chan struct{}  // Notification channel for possible sync progression
   407  
   408  	peers    map[string]SyncPeer // Currently active peers to download from
   409  	peerJoin *event.Feed         // Event feed to react to peers joining
   410  	peerDrop *event.Feed         // Event feed to react to peers dropping
   411  	rates    *msgrate.Trackers   // Message throughput rates for peers
   412  
   413  	// Request tracking during syncing phase
   414  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   415  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   416  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   417  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   418  
   419  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   420  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   421  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   422  
   423  	accountSynced  uint64             // Number of accounts downloaded
   424  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   425  	bytecodeSynced uint64             // Number of bytecodes downloaded
   426  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   427  	storageSynced  uint64             // Number of storage slots downloaded
   428  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   429  
   430  	extProgress *SyncProgress // progress that can be exposed to external caller.
   431  
   432  	// Request tracking during healing phase
   433  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   434  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   435  
   436  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   437  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   438  
   439  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   440  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   441  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   442  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   443  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   444  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   445  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   446  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   447  
   448  	stateWriter        database.SnapshotDBBatch // Shared batch writer used for persisting raw states
   449  	accountHealed      uint64                   // Number of accounts downloaded during the healing stage
   450  	accountHealedBytes common.StorageSize       // Number of raw account bytes persisted to disk during the healing stage
   451  	storageHealed      uint64                   // Number of storage slots downloaded during the healing stage
   452  	storageHealedBytes common.StorageSize       // Number of raw storage bytes persisted to disk during the healing stage
   453  
   454  	startTime time.Time // Time instance when snapshot sync started
   455  	logTime   time.Time // Time instance when status was last reported
   456  
   457  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   458  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   459  }
   460  
   461  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   462  // snap protocol.
   463  func NewSyncer(db database.DBManager) *Syncer {
   464  	return &Syncer{
   465  		db: db,
   466  
   467  		peers:    make(map[string]SyncPeer),
   468  		peerJoin: new(event.Feed),
   469  		peerDrop: new(event.Feed),
   470  		rates:    msgrate.NewTrackers(logger.NewWith("proto", "snap")),
   471  		update:   make(chan struct{}, 1),
   472  
   473  		accountIdlers:  make(map[string]struct{}),
   474  		storageIdlers:  make(map[string]struct{}),
   475  		bytecodeIdlers: make(map[string]struct{}),
   476  
   477  		accountReqs:  make(map[uint64]*accountRequest),
   478  		storageReqs:  make(map[uint64]*storageRequest),
   479  		bytecodeReqs: make(map[uint64]*bytecodeRequest),
   480  
   481  		trienodeHealIdlers: make(map[string]struct{}),
   482  		bytecodeHealIdlers: make(map[string]struct{}),
   483  
   484  		trienodeHealReqs: make(map[uint64]*trienodeHealRequest),
   485  		bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest),
   486  		stateWriter:      db.NewSnapshotDBBatch(),
   487  
   488  		extProgress: new(SyncProgress),
   489  	}
   490  }
   491  
   492  // Register injects a new data source into the syncer's peerset.
   493  func (s *Syncer) Register(peer SyncPeer) error {
   494  	// Make sure the peer is not registered yet
   495  	id := peer.ID()
   496  
   497  	s.lock.Lock()
   498  	if _, ok := s.peers[id]; ok {
   499  		logger.Error("Snap peer already registered", "id", id)
   500  
   501  		s.lock.Unlock()
   502  		return errors.New("already registered")
   503  	}
   504  	s.peers[id] = peer
   505  	s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip()))
   506  
   507  	// Mark the peer as idle, even if no sync is running
   508  	s.accountIdlers[id] = struct{}{}
   509  	s.storageIdlers[id] = struct{}{}
   510  	s.bytecodeIdlers[id] = struct{}{}
   511  	s.trienodeHealIdlers[id] = struct{}{}
   512  	s.bytecodeHealIdlers[id] = struct{}{}
   513  	s.lock.Unlock()
   514  
   515  	// Notify any active syncs that a new peer can be assigned data
   516  	s.peerJoin.Send(id)
   517  	return nil
   518  }
   519  
   520  // Unregister injects a new data source into the syncer's peerset.
   521  func (s *Syncer) Unregister(id string) error {
   522  	// Remove all traces of the peer from the registry
   523  	s.lock.Lock()
   524  	if _, ok := s.peers[id]; !ok {
   525  		logger.Error("Snap peer not registered", "id", id)
   526  
   527  		s.lock.Unlock()
   528  		return errors.New("not registered")
   529  	}
   530  	delete(s.peers, id)
   531  	s.rates.Untrack(id)
   532  
   533  	// Remove status markers, even if no sync is running
   534  	delete(s.statelessPeers, id)
   535  
   536  	delete(s.accountIdlers, id)
   537  	delete(s.storageIdlers, id)
   538  	delete(s.bytecodeIdlers, id)
   539  	delete(s.trienodeHealIdlers, id)
   540  	delete(s.bytecodeHealIdlers, id)
   541  	s.lock.Unlock()
   542  
   543  	// Notify any active syncs that pending requests need to be reverted
   544  	s.peerDrop.Send(id)
   545  	return nil
   546  }
   547  
   548  // Sync starts (or resumes a previous) sync cycle to iterate over an state trie
   549  // with the given root and reconstruct the nodes based on the snapshot leaves.
   550  // Previously downloaded segments will not be redownloaded of fixed, rather any
   551  // errors will be healed after the leaves are fully accumulated.
   552  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   553  	// Move the trie root from any previous value, revert stateless markers for
   554  	// any peers and initialize the syncer if it was not yet run
   555  	s.lock.Lock()
   556  	s.root = root
   557  	s.healer = &healTask{
   558  		scheduler: state.NewStateSync(root, s.db, nil, nil, s.onHealState),
   559  		trieTasks: make(map[common.Hash]statedb.SyncPath),
   560  		codeTasks: make(map[common.Hash]struct{}),
   561  	}
   562  	s.statelessPeers = make(map[string]struct{})
   563  	s.lock.Unlock()
   564  
   565  	if s.startTime == (time.Time{}) {
   566  		s.startTime = time.Now()
   567  	}
   568  	// Retrieve the previous sync status from LevelDB and abort if already synced
   569  	s.loadSyncStatus()
   570  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   571  		logger.Debug("Snapshot sync already completed")
   572  		return nil
   573  	}
   574  	defer func() { // Persist any progress, independent of failure
   575  		for _, task := range s.tasks {
   576  			s.forwardAccountTask(task)
   577  		}
   578  		s.cleanAccountTasks()
   579  		s.saveSyncStatus()
   580  	}()
   581  
   582  	logger.Debug("Starting snapshot sync cycle", "root", root)
   583  
   584  	// Flush out the last committed raw states
   585  	defer func() {
   586  		if s.stateWriter.ValueSize() > 0 {
   587  			s.stateWriter.Write()
   588  			s.stateWriter.Reset()
   589  		}
   590  	}()
   591  	defer s.report(true)
   592  
   593  	// Whether sync completed or not, disregard any future packets
   594  	defer func() {
   595  		logger.Debug("Terminating snapshot sync cycle", "root", root)
   596  		s.lock.Lock()
   597  		s.accountReqs = make(map[uint64]*accountRequest)
   598  		s.storageReqs = make(map[uint64]*storageRequest)
   599  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   600  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   601  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   602  		s.lock.Unlock()
   603  	}()
   604  	// Keep scheduling sync tasks
   605  	peerJoin := make(chan string, 16)
   606  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   607  	defer peerJoinSub.Unsubscribe()
   608  
   609  	peerDrop := make(chan string, 16)
   610  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   611  	defer peerDropSub.Unsubscribe()
   612  
   613  	// Create a set of unique channels for this sync cycle. We need these to be
   614  	// ephemeral so a data race doesn't accidentally deliver something stale on
   615  	// a persistent channel across syncs (yup, this happened)
   616  	var (
   617  		accountReqFails      = make(chan *accountRequest)
   618  		storageReqFails      = make(chan *storageRequest)
   619  		bytecodeReqFails     = make(chan *bytecodeRequest)
   620  		accountResps         = make(chan *accountResponse)
   621  		storageResps         = make(chan *storageResponse)
   622  		bytecodeResps        = make(chan *bytecodeResponse)
   623  		trienodeHealReqFails = make(chan *trienodeHealRequest)
   624  		bytecodeHealReqFails = make(chan *bytecodeHealRequest)
   625  		trienodeHealResps    = make(chan *trienodeHealResponse)
   626  		bytecodeHealResps    = make(chan *bytecodeHealResponse)
   627  	)
   628  	for {
   629  		// Remove all completed tasks and terminate sync if everything's done
   630  		s.cleanStorageTasks()
   631  		s.cleanAccountTasks()
   632  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   633  			return nil
   634  		}
   635  		// Assign all the data retrieval tasks to any free peers
   636  		s.assignAccountTasks(accountResps, accountReqFails, cancel)
   637  		s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel)
   638  		s.assignStorageTasks(storageResps, storageReqFails, cancel)
   639  
   640  		if len(s.tasks) == 0 {
   641  			// Sync phase done, run heal phase
   642  			s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
   643  			s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
   644  		}
   645  		// Update sync progress
   646  		s.lock.Lock()
   647  		s.extProgress = &SyncProgress{
   648  			AccountSynced:      s.accountSynced,
   649  			AccountBytes:       s.accountBytes,
   650  			BytecodeSynced:     s.bytecodeSynced,
   651  			BytecodeBytes:      s.bytecodeBytes,
   652  			StorageSynced:      s.storageSynced,
   653  			StorageBytes:       s.storageBytes,
   654  			TrienodeHealSynced: s.trienodeHealSynced,
   655  			TrienodeHealBytes:  s.trienodeHealBytes,
   656  			BytecodeHealSynced: s.bytecodeHealSynced,
   657  			BytecodeHealBytes:  s.bytecodeHealBytes,
   658  		}
   659  		s.lock.Unlock()
   660  		// Wait for something to happen
   661  		select {
   662  		case <-s.update:
   663  			// Something happened (new peer, delivery, timeout), recheck tasks
   664  		case <-peerJoin:
   665  			// A new peer joined, try to schedule it new tasks
   666  		case id := <-peerDrop:
   667  			s.revertRequests(id)
   668  		case <-cancel:
   669  			return ErrCancelled
   670  
   671  		case req := <-accountReqFails:
   672  			s.revertAccountRequest(req)
   673  		case req := <-bytecodeReqFails:
   674  			s.revertBytecodeRequest(req)
   675  		case req := <-storageReqFails:
   676  			s.revertStorageRequest(req)
   677  		case req := <-trienodeHealReqFails:
   678  			s.revertTrienodeHealRequest(req)
   679  		case req := <-bytecodeHealReqFails:
   680  			s.revertBytecodeHealRequest(req)
   681  
   682  		case res := <-accountResps:
   683  			s.processAccountResponse(res)
   684  		case res := <-bytecodeResps:
   685  			s.processBytecodeResponse(res)
   686  		case res := <-storageResps:
   687  			s.processStorageResponse(res)
   688  		case res := <-trienodeHealResps:
   689  			s.processTrienodeHealResponse(res)
   690  		case res := <-bytecodeHealResps:
   691  			s.processBytecodeHealResponse(res)
   692  		}
   693  		// Report stats if something meaningful happened
   694  		s.report(false)
   695  	}
   696  }
   697  
   698  // loadSyncStatus retrieves a previously aborted sync status from the database,
   699  // or generates a fresh one if none is available.
   700  func (s *Syncer) loadSyncStatus() {
   701  	var progress SyncProgress
   702  
   703  	if status := s.db.ReadSnapshotSyncStatus(); status != nil {
   704  		if err := json.Unmarshal(status, &progress); err != nil {
   705  			logger.Error("Failed to decode snap sync status", "err", err)
   706  		} else {
   707  			for _, task := range progress.Tasks {
   708  				logger.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   709  			}
   710  			s.tasks = progress.Tasks
   711  			for _, task := range s.tasks {
   712  				task.trieDb = statedb.NewDatabase(s.db)
   713  				task.genTrie, err = statedb.NewTrie(common.Hash{}, task.trieDb)
   714  
   715  				for _, subtasks := range task.SubTasks {
   716  					for _, subtask := range subtasks {
   717  						subtask.trieDb = statedb.NewDatabase(s.db)
   718  						subtask.genTrie, _ = statedb.NewTrie(common.Hash{}, subtask.trieDb)
   719  					}
   720  				}
   721  			}
   722  			s.lock.Lock()
   723  			defer s.lock.Unlock()
   724  
   725  			s.snapped = len(s.tasks) == 0
   726  
   727  			s.accountSynced = progress.AccountSynced
   728  			s.accountBytes = progress.AccountBytes
   729  			s.bytecodeSynced = progress.BytecodeSynced
   730  			s.bytecodeBytes = progress.BytecodeBytes
   731  			s.storageSynced = progress.StorageSynced
   732  			s.storageBytes = progress.StorageBytes
   733  
   734  			s.trienodeHealSynced = progress.TrienodeHealSynced
   735  			s.trienodeHealBytes = progress.TrienodeHealBytes
   736  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   737  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   738  			return
   739  		}
   740  	}
   741  	// Either we've failed to decode the previus state, or there was none.
   742  	// Start a fresh sync by chunking up the account range and scheduling
   743  	// them for retrieval.
   744  	s.tasks = nil
   745  	s.accountSynced, s.accountBytes = 0, 0
   746  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   747  	s.storageSynced, s.storageBytes = 0, 0
   748  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   749  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   750  
   751  	var next common.Hash
   752  	step := new(big.Int).Sub(
   753  		new(big.Int).Div(
   754  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   755  			big.NewInt(int64(accountConcurrency)),
   756  		), common.Big1,
   757  	)
   758  	for i := 0; i < accountConcurrency; i++ {
   759  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   760  		if i == accountConcurrency-1 {
   761  			// Make sure we don't overflow if the step is not a proper divisor
   762  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   763  		}
   764  		db := statedb.NewDatabase(s.db)
   765  		trie, _ := statedb.NewTrie(common.Hash{}, db)
   766  		s.tasks = append(s.tasks, &accountTask{
   767  			Next:     next,
   768  			Last:     last,
   769  			SubTasks: make(map[common.Hash][]*storageTask),
   770  			genTrie:  trie,
   771  			trieDb:   db,
   772  		})
   773  		logger.Debug("Created account sync task", "from", next, "last", last)
   774  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   775  	}
   776  }
   777  
   778  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   779  func (s *Syncer) saveSyncStatus() {
   780  	// Serialize any partial progress to disk before spinning down
   781  	for _, task := range s.tasks {
   782  		if err := task.trieDb.Commit(task.genTrie.Hash(), false, 0); err != nil {
   783  			logger.Error("Failed to persist account slots", "err", err)
   784  		}
   785  		for _, subtasks := range task.SubTasks {
   786  			for _, subtask := range subtasks {
   787  				if err := subtask.trieDb.Commit(subtask.genTrie.Hash(), false, 0); err != nil {
   788  					logger.Error("Failed to persist storage slots", "err", err)
   789  				}
   790  			}
   791  		}
   792  	}
   793  	// Store the actual progress markers
   794  	progress := &SyncProgress{
   795  		Tasks:              s.tasks,
   796  		AccountSynced:      s.accountSynced,
   797  		AccountBytes:       s.accountBytes,
   798  		BytecodeSynced:     s.bytecodeSynced,
   799  		BytecodeBytes:      s.bytecodeBytes,
   800  		StorageSynced:      s.storageSynced,
   801  		StorageBytes:       s.storageBytes,
   802  		TrienodeHealSynced: s.trienodeHealSynced,
   803  		TrienodeHealBytes:  s.trienodeHealBytes,
   804  		BytecodeHealSynced: s.bytecodeHealSynced,
   805  		BytecodeHealBytes:  s.bytecodeHealBytes,
   806  	}
   807  	status, err := json.Marshal(progress)
   808  	if err != nil {
   809  		panic(err) // This can only fail during implementation
   810  	}
   811  	s.db.WriteSnapshotSyncStatus(status)
   812  }
   813  
   814  // Progress returns the snap sync status statistics.
   815  func (s *Syncer) Progress() (*SyncProgress, *SyncPending) {
   816  	s.lock.Lock()
   817  	defer s.lock.Unlock()
   818  	pending := new(SyncPending)
   819  	if s.healer != nil {
   820  		pending.TrienodeHeal = uint64(len(s.healer.trieTasks))
   821  		pending.BytecodeHeal = uint64(len(s.healer.codeTasks))
   822  	}
   823  	return s.extProgress, pending
   824  }
   825  
   826  // cleanAccountTasks removes account range retrieval tasks that have already been
   827  // completed.
   828  func (s *Syncer) cleanAccountTasks() {
   829  	// If the sync was already done before, don't even bother
   830  	if len(s.tasks) == 0 {
   831  		return
   832  	}
   833  	// Sync wasn't finished previously, check for any task that can be finalized
   834  	for i := 0; i < len(s.tasks); i++ {
   835  		if s.tasks[i].done {
   836  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   837  			i--
   838  		}
   839  	}
   840  	// If everything was just finalized just, generate the account trie and start heal
   841  	if len(s.tasks) == 0 {
   842  		s.lock.Lock()
   843  		s.snapped = true
   844  		s.lock.Unlock()
   845  
   846  		// Push the final sync report
   847  		s.reportSyncProgress(true)
   848  	}
   849  }
   850  
   851  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   852  // within, cleaning any that have been completed.
   853  func (s *Syncer) cleanStorageTasks() {
   854  	for _, task := range s.tasks {
   855  		for account, subtasks := range task.SubTasks {
   856  			// Remove storage range retrieval tasks that completed
   857  			for j := 0; j < len(subtasks); j++ {
   858  				if subtasks[j].done {
   859  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   860  					j--
   861  				}
   862  			}
   863  			if len(subtasks) > 0 {
   864  				task.SubTasks[account] = subtasks
   865  				continue
   866  			}
   867  			// If all storage chunks are done, mark the account as done too
   868  			for j, hash := range task.res.hashes {
   869  				if hash == account {
   870  					task.needState[j] = false
   871  				}
   872  			}
   873  			delete(task.SubTasks, account)
   874  			task.pend--
   875  
   876  			// If this was the last pending task, forward the account task
   877  			if task.pend == 0 {
   878  				s.forwardAccountTask(task)
   879  			}
   880  		}
   881  	}
   882  }
   883  
   884  // assignAccountTasks attempts to match idle peers to pending account range
   885  // retrievals.
   886  func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) {
   887  	s.lock.Lock()
   888  	defer s.lock.Unlock()
   889  
   890  	// Sort the peers by download capacity to use faster ones if many available
   891  	idlers := &capacitySort{
   892  		ids:  make([]string, 0, len(s.accountIdlers)),
   893  		caps: make([]int, 0, len(s.accountIdlers)),
   894  	}
   895  	targetTTL := s.rates.TargetTimeout()
   896  	for id := range s.accountIdlers {
   897  		if _, ok := s.statelessPeers[id]; ok {
   898  			continue
   899  		}
   900  		idlers.ids = append(idlers.ids, id)
   901  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL))
   902  	}
   903  	if len(idlers.ids) == 0 {
   904  		// TODO-Klaytn-SnapSync enhance logging if necessary
   905  		return
   906  	}
   907  	sort.Sort(sort.Reverse(idlers))
   908  
   909  	// Iterate over all the tasks and try to find a pending one
   910  	for _, task := range s.tasks {
   911  		// Skip any tasks already filling
   912  		if task.req != nil || task.res != nil {
   913  			continue
   914  		}
   915  		// Task pending retrieval, try to find an idle peer. If no such peer
   916  		// exists, we probably assigned tasks for all (or they are stateless).
   917  		// Abort the entire assignment mechanism.
   918  		if len(idlers.ids) == 0 {
   919  			return
   920  		}
   921  		var (
   922  			idle = idlers.ids[0]
   923  			peer = s.peers[idle]
   924  			cap  = idlers.caps[0]
   925  		)
   926  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
   927  
   928  		// Matched a pending task to an idle peer, allocate a unique request id
   929  		var reqid uint64
   930  		for {
   931  			reqid = uint64(rand.Int63())
   932  			if reqid == 0 {
   933  				continue
   934  			}
   935  			if _, ok := s.accountReqs[reqid]; ok {
   936  				continue
   937  			}
   938  			break
   939  		}
   940  		// Generate the network query and send it to the peer
   941  		req := &accountRequest{
   942  			peer:    idle,
   943  			id:      reqid,
   944  			time:    time.Now(),
   945  			deliver: success,
   946  			revert:  fail,
   947  			cancel:  cancel,
   948  			stale:   make(chan struct{}),
   949  			origin:  task.Next,
   950  			limit:   task.Last,
   951  			task:    task,
   952  		}
   953  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
   954  			peer.Log().Debug("Account range request timed out", "reqid", reqid)
   955  			s.rates.Update(idle, AccountRangeMsg, 0, 0)
   956  			s.scheduleRevertAccountRequest(req)
   957  		})
   958  		s.accountReqs[reqid] = req
   959  		delete(s.accountIdlers, idle)
   960  
   961  		s.pend.Add(1)
   962  		go func(root common.Hash) {
   963  			defer s.pend.Done()
   964  
   965  			// Attempt to send the remote request and revert if it fails
   966  			if cap > maxRequestSize {
   967  				cap = maxRequestSize
   968  			}
   969  			if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
   970  				cap = minRequestSize
   971  			}
   972  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil {
   973  				peer.Log().Debug("Failed to request account range", "err", err)
   974  				s.scheduleRevertAccountRequest(req)
   975  			}
   976  		}(s.root)
   977  
   978  		// Inject the request into the task to block further assignments
   979  		task.req = req
   980  	}
   981  }
   982  
   983  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
   984  func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) {
   985  	s.lock.Lock()
   986  	defer s.lock.Unlock()
   987  
   988  	// Sort the peers by download capacity to use faster ones if many available
   989  	idlers := &capacitySort{
   990  		ids:  make([]string, 0, len(s.bytecodeIdlers)),
   991  		caps: make([]int, 0, len(s.bytecodeIdlers)),
   992  	}
   993  	targetTTL := s.rates.TargetTimeout()
   994  	for id := range s.bytecodeIdlers {
   995  		if _, ok := s.statelessPeers[id]; ok {
   996  			continue
   997  		}
   998  		idlers.ids = append(idlers.ids, id)
   999  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1000  	}
  1001  	if len(idlers.ids) == 0 {
  1002  		return
  1003  	}
  1004  	sort.Sort(sort.Reverse(idlers))
  1005  
  1006  	// Iterate over all the tasks and try to find a pending one
  1007  	for _, task := range s.tasks {
  1008  		// Skip any tasks not in the bytecode retrieval phase
  1009  		if task.res == nil {
  1010  			continue
  1011  		}
  1012  		// Skip tasks that are already retrieving (or done with) all codes
  1013  		if len(task.codeTasks) == 0 {
  1014  			continue
  1015  		}
  1016  		// Task pending retrieval, try to find an idle peer. If no such peer
  1017  		// exists, we probably assigned tasks for all (or they are stateless).
  1018  		// Abort the entire assignment mechanism.
  1019  		if len(idlers.ids) == 0 {
  1020  			return
  1021  		}
  1022  		var (
  1023  			idle = idlers.ids[0]
  1024  			peer = s.peers[idle]
  1025  			cap  = idlers.caps[0]
  1026  		)
  1027  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1028  
  1029  		// Matched a pending task to an idle peer, allocate a unique request id
  1030  		var reqid uint64
  1031  		for {
  1032  			reqid = uint64(rand.Int63())
  1033  			if reqid == 0 {
  1034  				continue
  1035  			}
  1036  			if _, ok := s.bytecodeReqs[reqid]; ok {
  1037  				continue
  1038  			}
  1039  			break
  1040  		}
  1041  		// Generate the network query and send it to the peer
  1042  		if cap > maxCodeRequestCount {
  1043  			cap = maxCodeRequestCount
  1044  		}
  1045  		hashes := make([]common.Hash, 0, cap)
  1046  		for hash := range task.codeTasks {
  1047  			delete(task.codeTasks, hash)
  1048  			hashes = append(hashes, hash)
  1049  			if len(hashes) >= cap {
  1050  				break
  1051  			}
  1052  		}
  1053  		req := &bytecodeRequest{
  1054  			peer:    idle,
  1055  			id:      reqid,
  1056  			time:    time.Now(),
  1057  			deliver: success,
  1058  			revert:  fail,
  1059  			cancel:  cancel,
  1060  			stale:   make(chan struct{}),
  1061  			hashes:  hashes,
  1062  			task:    task,
  1063  		}
  1064  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1065  			peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
  1066  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1067  			s.scheduleRevertBytecodeRequest(req)
  1068  		})
  1069  		s.bytecodeReqs[reqid] = req
  1070  		delete(s.bytecodeIdlers, idle)
  1071  
  1072  		s.pend.Add(1)
  1073  		go func() {
  1074  			defer s.pend.Done()
  1075  
  1076  			// Attempt to send the remote request and revert if it fails
  1077  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1078  				logger.Debug("Failed to request bytecodes", "err", err)
  1079  				s.scheduleRevertBytecodeRequest(req)
  1080  			}
  1081  		}()
  1082  	}
  1083  }
  1084  
  1085  // assignStorageTasks attempts to match idle peers to pending storage range
  1086  // retrievals.
  1087  func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) {
  1088  	s.lock.Lock()
  1089  	defer s.lock.Unlock()
  1090  
  1091  	// Sort the peers by download capacity to use faster ones if many available
  1092  	idlers := &capacitySort{
  1093  		ids:  make([]string, 0, len(s.storageIdlers)),
  1094  		caps: make([]int, 0, len(s.storageIdlers)),
  1095  	}
  1096  	targetTTL := s.rates.TargetTimeout()
  1097  	for id := range s.storageIdlers {
  1098  		if _, ok := s.statelessPeers[id]; ok {
  1099  			continue
  1100  		}
  1101  		idlers.ids = append(idlers.ids, id)
  1102  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL))
  1103  	}
  1104  	if len(idlers.ids) == 0 {
  1105  		return
  1106  	}
  1107  	sort.Sort(sort.Reverse(idlers))
  1108  
  1109  	// Iterate over all the tasks and try to find a pending one
  1110  	for _, task := range s.tasks {
  1111  		// Skip any tasks not in the storage retrieval phase
  1112  		if task.res == nil {
  1113  			continue
  1114  		}
  1115  		// Skip tasks that are already retrieving (or done with) all small states
  1116  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
  1117  			continue
  1118  		}
  1119  		// Task pending retrieval, try to find an idle peer. If no such peer
  1120  		// exists, we probably assigned tasks for all (or they are stateless).
  1121  		// Abort the entire assignment mechanism.
  1122  		if len(idlers.ids) == 0 {
  1123  			return
  1124  		}
  1125  		var (
  1126  			idle = idlers.ids[0]
  1127  			peer = s.peers[idle]
  1128  			cap  = idlers.caps[0]
  1129  		)
  1130  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1131  
  1132  		// Matched a pending task to an idle peer, allocate a unique request id
  1133  		var reqid uint64
  1134  		for {
  1135  			reqid = uint64(rand.Int63())
  1136  			if reqid == 0 {
  1137  				continue
  1138  			}
  1139  			if _, ok := s.storageReqs[reqid]; ok {
  1140  				continue
  1141  			}
  1142  			break
  1143  		}
  1144  		// Generate the network query and send it to the peer. If there are
  1145  		// large contract tasks pending, complete those before diving into
  1146  		// even more new contracts.
  1147  		if cap > maxRequestSize {
  1148  			cap = maxRequestSize
  1149  		}
  1150  		if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
  1151  			cap = minRequestSize
  1152  		}
  1153  		storageSets := cap / 1024
  1154  
  1155  		var (
  1156  			accounts = make([]common.Hash, 0, storageSets)
  1157  			roots    = make([]common.Hash, 0, storageSets)
  1158  			subtask  *storageTask
  1159  		)
  1160  		for account, subtasks := range task.SubTasks {
  1161  			for _, st := range subtasks {
  1162  				// Skip any subtasks already filling
  1163  				if st.req != nil {
  1164  					continue
  1165  				}
  1166  				// Found an incomplete storage chunk, schedule it
  1167  				accounts = append(accounts, account)
  1168  				roots = append(roots, st.root)
  1169  				subtask = st
  1170  				break // Large contract chunks are downloaded individually
  1171  			}
  1172  			if subtask != nil {
  1173  				break // Large contract chunks are downloaded individually
  1174  			}
  1175  		}
  1176  		if subtask == nil {
  1177  			// No large contract required retrieval, but small ones available
  1178  			for acccount, root := range task.stateTasks {
  1179  				delete(task.stateTasks, acccount)
  1180  
  1181  				accounts = append(accounts, acccount)
  1182  				roots = append(roots, root)
  1183  
  1184  				if len(accounts) >= storageSets {
  1185  					break
  1186  				}
  1187  			}
  1188  		}
  1189  		// If nothing was found, it means this task is actually already fully
  1190  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1191  		if len(accounts) == 0 {
  1192  			continue
  1193  		}
  1194  		req := &storageRequest{
  1195  			peer:     idle,
  1196  			id:       reqid,
  1197  			time:     time.Now(),
  1198  			deliver:  success,
  1199  			revert:   fail,
  1200  			cancel:   cancel,
  1201  			stale:    make(chan struct{}),
  1202  			accounts: accounts,
  1203  			roots:    roots,
  1204  			mainTask: task,
  1205  			subTask:  subtask,
  1206  		}
  1207  		if subtask != nil {
  1208  			req.origin = subtask.Next
  1209  			req.limit = subtask.Last
  1210  		}
  1211  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1212  			peer.Log().Debug("Storage request timed out", "reqid", reqid)
  1213  			s.rates.Update(idle, StorageRangesMsg, 0, 0)
  1214  			s.scheduleRevertStorageRequest(req)
  1215  		})
  1216  		s.storageReqs[reqid] = req
  1217  		delete(s.storageIdlers, idle)
  1218  
  1219  		s.pend.Add(1)
  1220  		go func(root common.Hash) {
  1221  			defer s.pend.Done()
  1222  
  1223  			// Attempt to send the remote request and revert if it fails
  1224  			var origin, limit []byte
  1225  			if subtask != nil {
  1226  				origin, limit = req.origin[:], req.limit[:]
  1227  			}
  1228  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil {
  1229  				logger.Debug("Failed to request storage", "err", err)
  1230  				s.scheduleRevertStorageRequest(req)
  1231  			}
  1232  		}(s.root)
  1233  
  1234  		// Inject the request into the subtask to block further assignments
  1235  		if subtask != nil {
  1236  			subtask.req = req
  1237  		}
  1238  	}
  1239  }
  1240  
  1241  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1242  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1243  func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) {
  1244  	s.lock.Lock()
  1245  	defer s.lock.Unlock()
  1246  
  1247  	// Sort the peers by download capacity to use faster ones if many available
  1248  	idlers := &capacitySort{
  1249  		ids:  make([]string, 0, len(s.trienodeHealIdlers)),
  1250  		caps: make([]int, 0, len(s.trienodeHealIdlers)),
  1251  	}
  1252  	targetTTL := s.rates.TargetTimeout()
  1253  	for id := range s.trienodeHealIdlers {
  1254  		if _, ok := s.statelessPeers[id]; ok {
  1255  			continue
  1256  		}
  1257  		idlers.ids = append(idlers.ids, id)
  1258  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL))
  1259  	}
  1260  	if len(idlers.ids) == 0 {
  1261  		return
  1262  	}
  1263  	sort.Sort(sort.Reverse(idlers))
  1264  
  1265  	// Iterate over pending tasks and try to find a peer to retrieve with
  1266  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1267  		// If there are not enough trie tasks queued to fully assign, fill the
  1268  		// queue from the state sync scheduler. The trie synced schedules these
  1269  		// together with bytecodes, so we need to queue them combined.
  1270  		var (
  1271  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1272  			want = maxTrieRequestCount + maxCodeRequestCount
  1273  		)
  1274  		if have < want {
  1275  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1276  			for i, hash := range nodes {
  1277  				s.healer.trieTasks[hash] = paths[i]
  1278  			}
  1279  			for _, hash := range codes {
  1280  				s.healer.codeTasks[hash] = struct{}{}
  1281  			}
  1282  		}
  1283  		// If all the heal tasks are bytecodes or already downloading, bail
  1284  		if len(s.healer.trieTasks) == 0 {
  1285  			return
  1286  		}
  1287  		// Task pending retrieval, try to find an idle peer. If no such peer
  1288  		// exists, we probably assigned tasks for all (or they are stateless).
  1289  		// Abort the entire assignment mechanism.
  1290  		if len(idlers.ids) == 0 {
  1291  			return
  1292  		}
  1293  		var (
  1294  			idle = idlers.ids[0]
  1295  			peer = s.peers[idle]
  1296  			cap  = idlers.caps[0]
  1297  		)
  1298  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1299  
  1300  		// Matched a pending task to an idle peer, allocate a unique request id
  1301  		var reqid uint64
  1302  		for {
  1303  			reqid = uint64(rand.Int63())
  1304  			if reqid == 0 {
  1305  				continue
  1306  			}
  1307  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1308  				continue
  1309  			}
  1310  			break
  1311  		}
  1312  		// Generate the network query and send it to the peer
  1313  		if cap > maxTrieRequestCount {
  1314  			cap = maxTrieRequestCount
  1315  		}
  1316  		var (
  1317  			hashes   = make([]common.Hash, 0, cap)
  1318  			paths    = make([]statedb.SyncPath, 0, cap)
  1319  			pathsets = make([]TrieNodePathSet, 0, cap)
  1320  		)
  1321  		for hash, pathset := range s.healer.trieTasks {
  1322  			delete(s.healer.trieTasks, hash)
  1323  
  1324  			hashes = append(hashes, hash)
  1325  			paths = append(paths, pathset)
  1326  			pathsets = append(pathsets, [][]byte(pathset)) // TODO-Klaytn-SnapSync group requests by account hash
  1327  
  1328  			if len(hashes) >= cap {
  1329  				break
  1330  			}
  1331  		}
  1332  		req := &trienodeHealRequest{
  1333  			peer:    idle,
  1334  			id:      reqid,
  1335  			time:    time.Now(),
  1336  			deliver: success,
  1337  			revert:  fail,
  1338  			cancel:  cancel,
  1339  			stale:   make(chan struct{}),
  1340  			hashes:  hashes,
  1341  			paths:   paths,
  1342  			task:    s.healer,
  1343  		}
  1344  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1345  			peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
  1346  			s.rates.Update(idle, TrieNodesMsg, 0, 0)
  1347  			s.scheduleRevertTrienodeHealRequest(req)
  1348  		})
  1349  		s.trienodeHealReqs[reqid] = req
  1350  		delete(s.trienodeHealIdlers, idle)
  1351  
  1352  		s.pend.Add(1)
  1353  		go func(root common.Hash) {
  1354  			defer s.pend.Done()
  1355  
  1356  			// Attempt to send the remote request and revert if it fails
  1357  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1358  				logger.Debug("Failed to request trienode healers", "err", err)
  1359  				s.scheduleRevertTrienodeHealRequest(req)
  1360  			}
  1361  		}(s.root)
  1362  	}
  1363  }
  1364  
  1365  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1366  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1367  func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) {
  1368  	s.lock.Lock()
  1369  	defer s.lock.Unlock()
  1370  
  1371  	// Sort the peers by download capacity to use faster ones if many available
  1372  	idlers := &capacitySort{
  1373  		ids:  make([]string, 0, len(s.bytecodeHealIdlers)),
  1374  		caps: make([]int, 0, len(s.bytecodeHealIdlers)),
  1375  	}
  1376  	targetTTL := s.rates.TargetTimeout()
  1377  	for id := range s.bytecodeHealIdlers {
  1378  		if _, ok := s.statelessPeers[id]; ok {
  1379  			continue
  1380  		}
  1381  		idlers.ids = append(idlers.ids, id)
  1382  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1383  	}
  1384  	if len(idlers.ids) == 0 {
  1385  		return
  1386  	}
  1387  	sort.Sort(sort.Reverse(idlers))
  1388  
  1389  	// Iterate over pending tasks and try to find a peer to retrieve with
  1390  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1391  		// If there are not enough trie tasks queued to fully assign, fill the
  1392  		// queue from the state sync scheduler. The trie synced schedules these
  1393  		// together with trie nodes, so we need to queue them combined.
  1394  		var (
  1395  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1396  			want = maxTrieRequestCount + maxCodeRequestCount
  1397  		)
  1398  		if have < want {
  1399  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1400  			for i, hash := range nodes {
  1401  				s.healer.trieTasks[hash] = paths[i]
  1402  			}
  1403  			for _, hash := range codes {
  1404  				s.healer.codeTasks[hash] = struct{}{}
  1405  			}
  1406  		}
  1407  		// If all the heal tasks are trienodes or already downloading, bail
  1408  		if len(s.healer.codeTasks) == 0 {
  1409  			return
  1410  		}
  1411  		// Task pending retrieval, try to find an idle peer. If no such peer
  1412  		// exists, we probably assigned tasks for all (or they are stateless).
  1413  		// Abort the entire assignment mechanism.
  1414  		if len(idlers.ids) == 0 {
  1415  			return
  1416  		}
  1417  		var (
  1418  			idle = idlers.ids[0]
  1419  			peer = s.peers[idle]
  1420  			cap  = idlers.caps[0]
  1421  		)
  1422  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1423  
  1424  		// Matched a pending task to an idle peer, allocate a unique request id
  1425  		var reqid uint64
  1426  		for {
  1427  			reqid = uint64(rand.Int63())
  1428  			if reqid == 0 {
  1429  				continue
  1430  			}
  1431  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1432  				continue
  1433  			}
  1434  			break
  1435  		}
  1436  		// Generate the network query and send it to the peer
  1437  		if cap > maxCodeRequestCount {
  1438  			cap = maxCodeRequestCount
  1439  		}
  1440  		hashes := make([]common.Hash, 0, cap)
  1441  		for hash := range s.healer.codeTasks {
  1442  			delete(s.healer.codeTasks, hash)
  1443  
  1444  			hashes = append(hashes, hash)
  1445  			if len(hashes) >= cap {
  1446  				break
  1447  			}
  1448  		}
  1449  		req := &bytecodeHealRequest{
  1450  			peer:    idle,
  1451  			id:      reqid,
  1452  			time:    time.Now(),
  1453  			deliver: success,
  1454  			revert:  fail,
  1455  			cancel:  cancel,
  1456  			stale:   make(chan struct{}),
  1457  			hashes:  hashes,
  1458  			task:    s.healer,
  1459  		}
  1460  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1461  			peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
  1462  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1463  			s.scheduleRevertBytecodeHealRequest(req)
  1464  		})
  1465  		s.bytecodeHealReqs[reqid] = req
  1466  		delete(s.bytecodeHealIdlers, idle)
  1467  
  1468  		s.pend.Add(1)
  1469  		go func() {
  1470  			defer s.pend.Done()
  1471  
  1472  			// Attempt to send the remote request and revert if it fails
  1473  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1474  				logger.Debug("Failed to request bytecode healers", "err", err)
  1475  				s.scheduleRevertBytecodeHealRequest(req)
  1476  			}
  1477  		}()
  1478  	}
  1479  }
  1480  
  1481  // revertRequests locates all the currently pending reuqests from a particular
  1482  // peer and reverts them, rescheduling for others to fulfill.
  1483  func (s *Syncer) revertRequests(peer string) {
  1484  	// Gather the requests first, revertals need the lock too
  1485  	s.lock.Lock()
  1486  	var accountReqs []*accountRequest
  1487  	for _, req := range s.accountReqs {
  1488  		if req.peer == peer {
  1489  			accountReqs = append(accountReqs, req)
  1490  		}
  1491  	}
  1492  	var bytecodeReqs []*bytecodeRequest
  1493  	for _, req := range s.bytecodeReqs {
  1494  		if req.peer == peer {
  1495  			bytecodeReqs = append(bytecodeReqs, req)
  1496  		}
  1497  	}
  1498  	var storageReqs []*storageRequest
  1499  	for _, req := range s.storageReqs {
  1500  		if req.peer == peer {
  1501  			storageReqs = append(storageReqs, req)
  1502  		}
  1503  	}
  1504  	var trienodeHealReqs []*trienodeHealRequest
  1505  	for _, req := range s.trienodeHealReqs {
  1506  		if req.peer == peer {
  1507  			trienodeHealReqs = append(trienodeHealReqs, req)
  1508  		}
  1509  	}
  1510  	var bytecodeHealReqs []*bytecodeHealRequest
  1511  	for _, req := range s.bytecodeHealReqs {
  1512  		if req.peer == peer {
  1513  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1514  		}
  1515  	}
  1516  	s.lock.Unlock()
  1517  
  1518  	// Revert all the requests matching the peer
  1519  	for _, req := range accountReqs {
  1520  		s.revertAccountRequest(req)
  1521  	}
  1522  	for _, req := range bytecodeReqs {
  1523  		s.revertBytecodeRequest(req)
  1524  	}
  1525  	for _, req := range storageReqs {
  1526  		s.revertStorageRequest(req)
  1527  	}
  1528  	for _, req := range trienodeHealReqs {
  1529  		s.revertTrienodeHealRequest(req)
  1530  	}
  1531  	for _, req := range bytecodeHealReqs {
  1532  		s.revertBytecodeHealRequest(req)
  1533  	}
  1534  }
  1535  
  1536  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1537  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1538  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1539  	select {
  1540  	case req.revert <- req:
  1541  		// Sync event loop notified
  1542  	case <-req.cancel:
  1543  		// Sync cycle got cancelled
  1544  	case <-req.stale:
  1545  		// Request already reverted
  1546  	}
  1547  }
  1548  
  1549  // revertAccountRequest cleans up an account range request and returns all failed
  1550  // retrieval tasks to the scheduler for reassignment.
  1551  //
  1552  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1553  // On peer threads, use scheduleRevertAccountRequest.
  1554  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1555  	logger.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1556  	select {
  1557  	case <-req.stale:
  1558  		logger.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1559  		return
  1560  	default:
  1561  	}
  1562  	close(req.stale)
  1563  
  1564  	// Remove the request from the tracked set
  1565  	s.lock.Lock()
  1566  	delete(s.accountReqs, req.id)
  1567  	s.lock.Unlock()
  1568  
  1569  	// If there's a timeout timer still running, abort it and mark the account
  1570  	// task as not-pending, ready for resheduling
  1571  	req.timeout.Stop()
  1572  	if req.task.req == req {
  1573  		req.task.req = nil
  1574  	}
  1575  }
  1576  
  1577  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1578  // and return all failed retrieval tasks to the scheduler for reassignment.
  1579  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1580  	select {
  1581  	case req.revert <- req:
  1582  		// Sync event loop notified
  1583  	case <-req.cancel:
  1584  		// Sync cycle got cancelled
  1585  	case <-req.stale:
  1586  		// Request already reverted
  1587  	}
  1588  }
  1589  
  1590  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1591  // retrieval tasks to the scheduler for reassignment.
  1592  //
  1593  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1594  // On peer threads, use scheduleRevertBytecodeRequest.
  1595  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1596  	logger.Debug("Reverting bytecode request", "peer", req.peer)
  1597  	select {
  1598  	case <-req.stale:
  1599  		logger.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1600  		return
  1601  	default:
  1602  	}
  1603  	close(req.stale)
  1604  
  1605  	// Remove the request from the tracked set
  1606  	s.lock.Lock()
  1607  	delete(s.bytecodeReqs, req.id)
  1608  	s.lock.Unlock()
  1609  
  1610  	// If there's a timeout timer still running, abort it and mark the code
  1611  	// retrievals as not-pending, ready for resheduling
  1612  	req.timeout.Stop()
  1613  	for _, hash := range req.hashes {
  1614  		req.task.codeTasks[hash] = struct{}{}
  1615  	}
  1616  }
  1617  
  1618  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1619  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1620  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1621  	select {
  1622  	case req.revert <- req:
  1623  		// Sync event loop notified
  1624  	case <-req.cancel:
  1625  		// Sync cycle got cancelled
  1626  	case <-req.stale:
  1627  		// Request already reverted
  1628  	}
  1629  }
  1630  
  1631  // revertStorageRequest cleans up a storage range request and returns all failed
  1632  // retrieval tasks to the scheduler for reassignment.
  1633  //
  1634  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1635  // On peer threads, use scheduleRevertStorageRequest.
  1636  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1637  	logger.Debug("Reverting storage request", "peer", req.peer)
  1638  	select {
  1639  	case <-req.stale:
  1640  		logger.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1641  		return
  1642  	default:
  1643  	}
  1644  	close(req.stale)
  1645  
  1646  	// Remove the request from the tracked set
  1647  	s.lock.Lock()
  1648  	delete(s.storageReqs, req.id)
  1649  	s.lock.Unlock()
  1650  
  1651  	// If there's a timeout timer still running, abort it and mark the storage
  1652  	// task as not-pending, ready for resheduling
  1653  	req.timeout.Stop()
  1654  	if req.subTask != nil {
  1655  		req.subTask.req = nil
  1656  	} else {
  1657  		for i, account := range req.accounts {
  1658  			req.mainTask.stateTasks[account] = req.roots[i]
  1659  		}
  1660  	}
  1661  }
  1662  
  1663  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1664  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1665  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1666  	select {
  1667  	case req.revert <- req:
  1668  		// Sync event loop notified
  1669  	case <-req.cancel:
  1670  		// Sync cycle got cancelled
  1671  	case <-req.stale:
  1672  		// Request already reverted
  1673  	}
  1674  }
  1675  
  1676  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1677  // failed retrieval tasks to the scheduler for reassignment.
  1678  //
  1679  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1680  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1681  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1682  	logger.Debug("Reverting trienode heal request", "peer", req.peer)
  1683  	select {
  1684  	case <-req.stale:
  1685  		logger.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1686  		return
  1687  	default:
  1688  	}
  1689  	close(req.stale)
  1690  
  1691  	// Remove the request from the tracked set
  1692  	s.lock.Lock()
  1693  	delete(s.trienodeHealReqs, req.id)
  1694  	s.lock.Unlock()
  1695  
  1696  	// If there's a timeout timer still running, abort it and mark the trie node
  1697  	// retrievals as not-pending, ready for resheduling
  1698  	req.timeout.Stop()
  1699  	for i, hash := range req.hashes {
  1700  		req.task.trieTasks[hash] = req.paths[i]
  1701  	}
  1702  }
  1703  
  1704  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1705  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1706  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1707  	select {
  1708  	case req.revert <- req:
  1709  		// Sync event loop notified
  1710  	case <-req.cancel:
  1711  		// Sync cycle got cancelled
  1712  	case <-req.stale:
  1713  		// Request already reverted
  1714  	}
  1715  }
  1716  
  1717  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1718  // failed retrieval tasks to the scheduler for reassignment.
  1719  //
  1720  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1721  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1722  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1723  	logger.Debug("Reverting bytecode heal request", "peer", req.peer)
  1724  	select {
  1725  	case <-req.stale:
  1726  		logger.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1727  		return
  1728  	default:
  1729  	}
  1730  	close(req.stale)
  1731  
  1732  	// Remove the request from the tracked set
  1733  	s.lock.Lock()
  1734  	delete(s.bytecodeHealReqs, req.id)
  1735  	s.lock.Unlock()
  1736  
  1737  	// If there's a timeout timer still running, abort it and mark the code
  1738  	// retrievals as not-pending, ready for resheduling
  1739  	req.timeout.Stop()
  1740  	for _, hash := range req.hashes {
  1741  		req.task.codeTasks[hash] = struct{}{}
  1742  	}
  1743  }
  1744  
  1745  // processAccountResponse integrates an already validated account range response
  1746  // into the account tasks.
  1747  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1748  	// Switch the task from pending to filling
  1749  	res.task.req = nil
  1750  	res.task.res = res
  1751  
  1752  	// Ensure that the response doesn't overflow into the subsequent task
  1753  	last := res.task.Last.Big()
  1754  	for i, hash := range res.hashes {
  1755  		// Mark the range complete if the last is already included.
  1756  		// Keep iteration to delete the extra states if exists.
  1757  		cmp := hash.Big().Cmp(last)
  1758  		if cmp == 0 {
  1759  			res.cont = false
  1760  			continue
  1761  		}
  1762  		if cmp > 0 {
  1763  			// Chunk overflown, cut off excess
  1764  			res.hashes = res.hashes[:i]
  1765  			res.accounts = res.accounts[:i]
  1766  			res.cont = false // Mark range completed
  1767  			break
  1768  		}
  1769  	}
  1770  	// Iterate over all the accounts and assemble which ones need further sub-
  1771  	// filling before the entire account range can be persisted.
  1772  	res.task.needCode = make([]bool, len(res.accounts))
  1773  	res.task.needState = make([]bool, len(res.accounts))
  1774  	res.task.needHeal = make([]bool, len(res.accounts))
  1775  
  1776  	res.task.codeTasks = make(map[common.Hash]struct{})
  1777  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1778  
  1779  	resumed := make(map[common.Hash]struct{})
  1780  
  1781  	res.task.pend = 0
  1782  	for i, acc := range res.accounts {
  1783  		pacc := account.GetProgramAccount(acc)
  1784  		// Check if the account is a contract with an unknown code
  1785  		if pacc != nil && !bytes.Equal(pacc.GetCodeHash(), emptyCode[:]) {
  1786  			if !s.db.HasCodeWithPrefix(common.BytesToHash(pacc.GetCodeHash())) {
  1787  				res.task.codeTasks[common.BytesToHash(pacc.GetCodeHash())] = struct{}{}
  1788  				res.task.needCode[i] = true
  1789  				res.task.pend++
  1790  			}
  1791  		}
  1792  		// Check if the account is a contract with an unknown storage trie
  1793  		if pacc != nil && pacc.GetStorageRoot() != emptyRoot {
  1794  			if ok, err := s.db.HasStateTrieNode(pacc.GetStorageRoot().Bytes()); err != nil || !ok {
  1795  				// If there was a previous large state retrieval in progress,
  1796  				// don't restart it from scratch. This happens if a sync cycle
  1797  				// is interrupted and resumed later. However, *do* update the
  1798  				// previous root hash.
  1799  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1800  					logger.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", pacc.GetStorageRoot())
  1801  					for _, subtask := range subtasks {
  1802  						subtask.root = pacc.GetStorageRoot()
  1803  					}
  1804  					res.task.needHeal[i] = true
  1805  					resumed[res.hashes[i]] = struct{}{}
  1806  				} else {
  1807  					res.task.stateTasks[res.hashes[i]] = pacc.GetStorageRoot()
  1808  				}
  1809  				res.task.needState[i] = true
  1810  				res.task.pend++
  1811  			}
  1812  		}
  1813  	}
  1814  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1815  	// some progress if a new peer gives us less accounts than an old one, but for
  1816  	// now we have to live with that.
  1817  	for hash := range res.task.SubTasks {
  1818  		if _, ok := resumed[hash]; !ok {
  1819  			logger.Debug("Aborting suspended storage retrieval", "account", hash)
  1820  			delete(res.task.SubTasks, hash)
  1821  		}
  1822  	}
  1823  	// If the account range contained no contracts, or all have been fully filled
  1824  	// beforehand, short circuit storage filling and forward to the next task
  1825  	if res.task.pend == 0 {
  1826  		s.forwardAccountTask(res.task)
  1827  		return
  1828  	}
  1829  	// Some accounts are incomplete, leave as is for the storage and contract
  1830  	// task assigners to pick up and fill.
  1831  }
  1832  
  1833  // processBytecodeResponse integrates an already validated bytecode response
  1834  // into the account tasks.
  1835  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1836  	batch := s.db.NewBatch(database.StateTrieDB)
  1837  
  1838  	var codes uint64
  1839  	for i, hash := range res.hashes {
  1840  		code := res.codes[i]
  1841  
  1842  		// If the bytecode was not delivered, reschedule it
  1843  		if code == nil {
  1844  			res.task.codeTasks[hash] = struct{}{}
  1845  			continue
  1846  		}
  1847  		// Code was delivered, mark it not needed any more
  1848  		for j, acc := range res.task.res.accounts {
  1849  			pacc := account.GetProgramAccount(acc)
  1850  			if pacc != nil && res.task.needCode[j] && hash == common.BytesToHash(pacc.GetCodeHash()) {
  1851  				res.task.needCode[j] = false
  1852  				res.task.pend--
  1853  			}
  1854  		}
  1855  		// Push the bytecode into a database batch
  1856  		codes++
  1857  		if err := batch.Put(database.CodeKey(hash), code); err != nil {
  1858  			logger.Crit("Failed to store contract code", "err", err)
  1859  		}
  1860  	}
  1861  	bytes := common.StorageSize(batch.ValueSize())
  1862  	if err := batch.Write(); err != nil {
  1863  		logger.Crit("Failed to persist bytecodes", "err", err)
  1864  	}
  1865  	s.bytecodeSynced += codes
  1866  	s.bytecodeBytes += bytes
  1867  
  1868  	logger.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1869  
  1870  	// If this delivery completed the last pending task, forward the account task
  1871  	// to the next chunk
  1872  	if res.task.pend == 0 {
  1873  		s.forwardAccountTask(res.task)
  1874  		return
  1875  	}
  1876  	// Some accounts are still incomplete, leave as is for the storage and contract
  1877  	// task assigners to pick up and fill.
  1878  }
  1879  
  1880  // processStorageResponse integrates an already validated storage response
  1881  // into the account tasks.
  1882  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1883  	// Switch the subtask from pending to idle
  1884  	if res.subTask != nil {
  1885  		res.subTask.req = nil
  1886  	}
  1887  	batch := s.db.NewSnapshotDBBatch()
  1888  	var (
  1889  		slots           int
  1890  		oldStorageBytes = s.storageBytes
  1891  	)
  1892  	// Iterate over all the accounts and reconstruct their storage tries from the
  1893  	// delivered slots
  1894  	for i, accountHash := range res.accounts {
  1895  		// If the account was not delivered, reschedule it
  1896  		if i >= len(res.hashes) {
  1897  			res.mainTask.stateTasks[accountHash] = res.roots[i]
  1898  			continue
  1899  		}
  1900  		// State was delivered, if complete mark as not needed any more, otherwise
  1901  		// mark the account as needing healing
  1902  		for j, hash := range res.mainTask.res.hashes {
  1903  			if accountHash != hash {
  1904  				continue
  1905  			}
  1906  			pacc := account.GetProgramAccount(res.mainTask.res.accounts[j])
  1907  			if pacc == nil {
  1908  				continue
  1909  			}
  1910  
  1911  			// If the packet contains multiple contract storage slots, all
  1912  			// but the last are surely complete. The last contract may be
  1913  			// chunked, so check it's continuation flag.
  1914  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1915  				res.mainTask.needState[j] = false
  1916  				res.mainTask.pend--
  1917  			}
  1918  			// If the last contract was chunked, mark it as needing healing
  1919  			// to avoid writing it out to disk prematurely.
  1920  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1921  				res.mainTask.needHeal[j] = true
  1922  			}
  1923  			// If the last contract was chunked, we need to switch to large
  1924  			// contract handling mode
  1925  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1926  				// If we haven't yet started a large-contract retrieval, create
  1927  				// the subtasks for it within the main account task
  1928  				if tasks, ok := res.mainTask.SubTasks[accountHash]; !ok {
  1929  					var (
  1930  						keys    = res.hashes[i]
  1931  						chunks  = uint64(storageConcurrency)
  1932  						lastKey common.Hash
  1933  					)
  1934  					if len(keys) > 0 {
  1935  						lastKey = keys[len(keys)-1]
  1936  					}
  1937  					// If the number of slots remaining is low, decrease the
  1938  					// number of chunks. Somewhere on the order of 10-15K slots
  1939  					// fit into a packet of 500KB. A key/slot pair is maximum 64
  1940  					// bytes, so pessimistically maxRequestSize/64 = 8K.
  1941  					//
  1942  					// Chunk so that at least 2 packets are needed to fill a task.
  1943  					if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil {
  1944  						if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks {
  1945  							chunks = n + 1
  1946  						}
  1947  						logger.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks)
  1948  					} else {
  1949  						logger.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks)
  1950  					}
  1951  					r := newHashRange(lastKey, chunks)
  1952  
  1953  					// Our first task is the one that was just filled by this response.
  1954  					db := statedb.NewDatabase(s.db)
  1955  					trie, _ := statedb.NewTrie(common.Hash{}, db)
  1956  					tasks = append(tasks, &storageTask{
  1957  						Next:    common.Hash{},
  1958  						Last:    r.End(),
  1959  						root:    pacc.GetStorageRoot(),
  1960  						genTrie: trie,
  1961  						trieDb:  db,
  1962  					})
  1963  					for r.Next() {
  1964  						db := statedb.NewDatabase(s.db)
  1965  						trie, _ := statedb.NewTrie(common.Hash{}, db)
  1966  						tasks = append(tasks, &storageTask{
  1967  							Next:    r.Start(),
  1968  							Last:    r.End(),
  1969  							root:    pacc.GetStorageRoot(),
  1970  							genTrie: trie,
  1971  							trieDb:  db,
  1972  						})
  1973  					}
  1974  					for _, task := range tasks {
  1975  						logger.Debug("Created storage sync task", "account", accountHash, "root", pacc.GetStorageRoot(), "from", task.Next, "last", task.Last)
  1976  					}
  1977  					res.mainTask.SubTasks[accountHash] = tasks
  1978  
  1979  					// Since we've just created the sub-tasks, this response
  1980  					// is surely for the first one (zero origin)
  1981  					res.subTask = tasks[0]
  1982  				}
  1983  			}
  1984  			// If we're in large contract delivery mode, forward the subtask
  1985  			if res.subTask != nil {
  1986  				// Ensure the response doesn't overflow into the subsequent task
  1987  				last := res.subTask.Last.Big()
  1988  				// Find the first overflowing key. While at it, mark res as complete
  1989  				// if we find the range to include or pass the 'last'
  1990  				index := sort.Search(len(res.hashes[i]), func(k int) bool {
  1991  					cmp := res.hashes[i][k].Big().Cmp(last)
  1992  					if cmp >= 0 {
  1993  						res.cont = false
  1994  					}
  1995  					return cmp > 0
  1996  				})
  1997  				if index >= 0 {
  1998  					// cut off excess
  1999  					res.hashes[i] = res.hashes[i][:index]
  2000  					res.slots[i] = res.slots[i][:index]
  2001  				}
  2002  				// Forward the relevant storage chunk (even if created just now)
  2003  				if res.cont {
  2004  					res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1])
  2005  				} else {
  2006  					res.subTask.done = true
  2007  				}
  2008  			}
  2009  		}
  2010  		// Iterate over all the complete contracts, reconstruct the trie nodes and
  2011  		// push them to disk. If the contract is chunked, the trie nodes will be
  2012  		// reconstructed later.
  2013  		slots += len(res.hashes[i])
  2014  
  2015  		if i < len(res.hashes)-1 || res.subTask == nil {
  2016  			db := statedb.NewDatabase(s.db)
  2017  			tr, _ := statedb.NewTrie(common.Hash{}, db)
  2018  			for j := 0; j < len(res.hashes[i]); j++ {
  2019  				tr.Update(res.hashes[i][j][:], res.slots[i][j])
  2020  			}
  2021  			root, _ := tr.Commit(nil)
  2022  			_, nodeSize, _ := db.Size()
  2023  			if err := db.Commit(root, false, 0); err != nil {
  2024  				logger.Error("Failed to persist storage slots", "err", err)
  2025  			} else {
  2026  				s.storageBytes += nodeSize
  2027  			}
  2028  		}
  2029  		// Persist the received storage segements. These flat state maybe
  2030  		// outdated during the sync, but it can be fixed later during the
  2031  		// snapshot generation.
  2032  		for j := 0; j < len(res.hashes[i]); j++ {
  2033  			batch.WriteStorageSnapshot(accountHash, res.hashes[i][j], res.slots[i][j])
  2034  			s.storageBytes += common.StorageSize(len(database.StorageSnapshotKey(accountHash, res.hashes[i][j])) + len(res.slots[i][j]))
  2035  
  2036  			// If we're storing large contracts, generate the trie nodes
  2037  			// on the fly to not trash the gluing points
  2038  			if i == len(res.hashes)-1 && res.subTask != nil {
  2039  				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
  2040  			}
  2041  		}
  2042  	}
  2043  	// Large contracts could have generated new trie nodes, flush them to disk
  2044  	if res.subTask != nil {
  2045  		if res.subTask.done {
  2046  			root, _ := res.subTask.genTrie.Commit(nil)
  2047  			_, nodeSize, _ := res.subTask.trieDb.Size()
  2048  
  2049  			if err := res.subTask.trieDb.Commit(root, false, 0); err != nil {
  2050  				logger.Error("Failed to persist stack slots", "root", root, "err", err)
  2051  			} else if root == res.subTask.root {
  2052  				s.storageBytes += nodeSize
  2053  				// If the chunk's root is an overflown but full delivery, clear the heal request
  2054  				for i, account := range res.mainTask.res.hashes {
  2055  					if account == res.accounts[len(res.accounts)-1] {
  2056  						res.mainTask.needHeal[i] = false
  2057  					}
  2058  				}
  2059  			}
  2060  		}
  2061  	}
  2062  	// Flush anything written just now and update the stats
  2063  	if err := batch.Write(); err != nil {
  2064  		logger.Crit("Failed to persist storage slots", "err", err)
  2065  	}
  2066  	s.storageSynced += uint64(slots)
  2067  
  2068  	logger.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes)
  2069  
  2070  	// If this delivery completed the last pending task, forward the account task
  2071  	// to the next chunk
  2072  	if res.mainTask.pend == 0 {
  2073  		s.forwardAccountTask(res.mainTask)
  2074  		return
  2075  	}
  2076  	// Some accounts are still incomplete, leave as is for the storage and contract
  2077  	// task assigners to pick up and fill.
  2078  }
  2079  
  2080  // processTrienodeHealResponse integrates an already validated trienode response
  2081  // into the healer tasks.
  2082  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  2083  	for i, hash := range res.hashes {
  2084  		node := res.nodes[i]
  2085  
  2086  		// If the trie node was not delivered, reschedule it
  2087  		if node == nil {
  2088  			res.task.trieTasks[hash] = res.paths[i]
  2089  			continue
  2090  		}
  2091  		// Push the trie node into the state syncer
  2092  		s.trienodeHealSynced++
  2093  		s.trienodeHealBytes += common.StorageSize(len(node))
  2094  
  2095  		err := s.healer.scheduler.Process(statedb.SyncResult{Hash: hash, Data: node})
  2096  		switch err {
  2097  		case nil:
  2098  		case statedb.ErrAlreadyProcessed:
  2099  			s.trienodeHealDups++
  2100  		case statedb.ErrNotRequested:
  2101  			s.trienodeHealNops++
  2102  		default:
  2103  			logger.Error("Invalid trienode processed", "hash", hash, "err", err)
  2104  		}
  2105  	}
  2106  	batch := s.db.NewBatch(database.StateTrieDB)
  2107  	if _, err := s.healer.scheduler.Commit(batch); err != nil {
  2108  		logger.Error("Failed to commit healing data", "err", err)
  2109  	}
  2110  	if err := batch.Write(); err != nil {
  2111  		logger.Crit("Failed to persist healing data", "err", err)
  2112  	}
  2113  	logger.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  2114  }
  2115  
  2116  // processBytecodeHealResponse integrates an already validated bytecode response
  2117  // into the healer tasks.
  2118  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  2119  	for i, hash := range res.hashes {
  2120  		node := res.codes[i]
  2121  
  2122  		// If the trie node was not delivered, reschedule it
  2123  		if node == nil {
  2124  			res.task.codeTasks[hash] = struct{}{}
  2125  			continue
  2126  		}
  2127  		// Push the trie node into the state syncer
  2128  		s.bytecodeHealSynced++
  2129  		s.bytecodeHealBytes += common.StorageSize(len(node))
  2130  
  2131  		err := s.healer.scheduler.Process(statedb.SyncResult{Hash: hash, Data: node})
  2132  		switch err {
  2133  		case nil:
  2134  		case statedb.ErrAlreadyProcessed:
  2135  			s.bytecodeHealDups++
  2136  		case statedb.ErrNotRequested:
  2137  			s.bytecodeHealNops++
  2138  		default:
  2139  			logger.Error("Invalid bytecode processed", "hash", hash, "err", err)
  2140  		}
  2141  	}
  2142  	batch := s.db.NewBatch(database.StateTrieDB)
  2143  	if _, err := s.healer.scheduler.Commit(batch); err != nil {
  2144  		logger.Error("Failed to commit healing data", "err", err)
  2145  	}
  2146  	if err := batch.Write(); err != nil {
  2147  		logger.Crit("Failed to persist healing data", "err", err)
  2148  	}
  2149  	logger.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  2150  }
  2151  
  2152  // forwardAccountTask takes a filled account task and persists anything available
  2153  // into the database, after which it forwards the next account marker so that the
  2154  // task's next chunk may be filled.
  2155  func (s *Syncer) forwardAccountTask(task *accountTask) {
  2156  	// Remove any pending delivery
  2157  	res := task.res
  2158  	if res == nil {
  2159  		return // nothing to forward
  2160  	}
  2161  	task.res = nil
  2162  
  2163  	// Persist the received account segements. These flat state maybe
  2164  	// outdated during the sync, but it can be fixed later during the
  2165  	// snapshot generation.
  2166  	oldAccountBytes := s.accountBytes
  2167  
  2168  	batch := s.db.NewSnapshotDBBatch()
  2169  	for i, hash := range res.hashes {
  2170  		if task.needCode[i] || task.needState[i] {
  2171  			break
  2172  		}
  2173  		serializer := account.NewAccountSerializerWithAccount(res.accounts[i])
  2174  		bytes, err := rlp.EncodeToBytes(serializer)
  2175  		if err != nil {
  2176  			logger.Error("Failed to encode account")
  2177  		}
  2178  		batch.WriteAccountSnapshot(hash, bytes)
  2179  		s.accountBytes += common.StorageSize(len(database.AccountSnapshotKey(hash)) + len(bytes))
  2180  
  2181  		// If the task is complete, drop it into the trie to generate
  2182  		// account trie nodes for it
  2183  		if !task.needHeal[i] {
  2184  			task.genTrie.Update(hash[:], bytes)
  2185  		}
  2186  	}
  2187  	// Flush anything written just now and update the stats
  2188  	if err := batch.Write(); err != nil {
  2189  		logger.Crit("Failed to persist accounts", "err", err)
  2190  	}
  2191  	s.accountSynced += uint64(len(res.accounts))
  2192  
  2193  	// Task filling persisted, push it the chunk marker forward to the first
  2194  	// account still missing data.
  2195  	for i, hash := range res.hashes {
  2196  		if task.needCode[i] || task.needState[i] {
  2197  			return
  2198  		}
  2199  		task.Next = incHash(hash)
  2200  	}
  2201  	// All accounts marked as complete, track if the entire task is done
  2202  	task.done = !res.cont
  2203  
  2204  	// Trie could have generated trie nodes, push them to disk (we need to
  2205  	// flush after finalizing task.done. It's fine even if we crash and lose this
  2206  	// write as it will only cause more data to be downloaded during heal.
  2207  	if task.done {
  2208  		root, _ := task.genTrie.Commit(nil)
  2209  		_, nodeSize, _ := task.trieDb.Size()
  2210  
  2211  		if err := task.trieDb.Commit(root, false, 0); err != nil {
  2212  			logger.Error("Failed to persist account slots", "root", root.String(), "err", err)
  2213  		} else {
  2214  			s.accountBytes += nodeSize
  2215  		}
  2216  	}
  2217  	logger.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes)
  2218  }
  2219  
  2220  // OnAccounts is a callback method to invoke when a range of accounts are
  2221  // received from a remote peer.
  2222  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  2223  	size := common.StorageSize(len(hashes) * common.HashLength)
  2224  	for _, account := range accounts {
  2225  		size += common.StorageSize(len(account))
  2226  	}
  2227  	for _, node := range proof {
  2228  		size += common.StorageSize(len(node))
  2229  	}
  2230  	logger := peer.Log().NewWith("reqid", id)
  2231  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  2232  
  2233  	// Whether or not the response is valid, we can mark the peer as idle and
  2234  	// notify the scheduler to assign a new task. If the response is invalid,
  2235  	// we'll drop the peer in a bit.
  2236  	s.lock.Lock()
  2237  	if _, ok := s.peers[peer.ID()]; ok {
  2238  		s.accountIdlers[peer.ID()] = struct{}{}
  2239  	}
  2240  	select {
  2241  	case s.update <- struct{}{}:
  2242  	default:
  2243  	}
  2244  	// Ensure the response is for a valid request
  2245  	req, ok := s.accountReqs[id]
  2246  	if !ok {
  2247  		// Request stale, perhaps the peer timed out but came through in the end
  2248  		logger.Warn("Unexpected account range packet")
  2249  		s.lock.Unlock()
  2250  		return nil
  2251  	}
  2252  	delete(s.accountReqs, id)
  2253  	s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size))
  2254  
  2255  	// Clean up the request timeout timer, we'll see how to proceed further based
  2256  	// on the actual delivered content
  2257  	if !req.timeout.Stop() {
  2258  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2259  		s.lock.Unlock()
  2260  		return nil
  2261  	}
  2262  	// Response is valid, but check if peer is signalling that it does not have
  2263  	// the requested data. For account range queries that means the state being
  2264  	// retrieved was either already pruned remotely, or the peer is not yet
  2265  	// synced to our head.
  2266  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2267  		logger.Debug("Peer rejected account range request", "root", s.root)
  2268  		s.statelessPeers[peer.ID()] = struct{}{}
  2269  		s.lock.Unlock()
  2270  
  2271  		// Signal this request as failed, and ready for rescheduling
  2272  		s.scheduleRevertAccountRequest(req)
  2273  		return nil
  2274  	}
  2275  	root := s.root
  2276  	s.lock.Unlock()
  2277  
  2278  	// Reconstruct a partial trie from the response and verify it
  2279  	keys := make([][]byte, len(hashes))
  2280  	for i, key := range hashes {
  2281  		keys[i] = common.CopyBytes(key[:])
  2282  	}
  2283  	nodes := make(NodeList, len(proof))
  2284  	for i, node := range proof {
  2285  		nodes[i] = node
  2286  	}
  2287  	proofdb := nodes.NodeSet()
  2288  
  2289  	var end []byte
  2290  	if len(keys) > 0 {
  2291  		end = keys[len(keys)-1]
  2292  	}
  2293  	cont, err := statedb.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2294  	if err != nil {
  2295  		logger.Warn("Account range failed proof", "err", err)
  2296  		// Signal this request as failed, and ready for rescheduling
  2297  		s.scheduleRevertAccountRequest(req)
  2298  		return err
  2299  	}
  2300  	accs := make([]account.Account, len(accounts))
  2301  	for i, accBytes := range accounts {
  2302  		serializer := account.NewAccountSerializer()
  2303  		if err := rlp.DecodeBytes(accBytes, serializer); err != nil {
  2304  			panic(err) // We created these blobs, we must be able to decode them
  2305  		}
  2306  		accs[i] = serializer.GetAccount()
  2307  	}
  2308  	response := &accountResponse{
  2309  		task:     req.task,
  2310  		hashes:   hashes,
  2311  		accounts: accs,
  2312  		cont:     cont,
  2313  	}
  2314  	select {
  2315  	case req.deliver <- response:
  2316  	case <-req.cancel:
  2317  	case <-req.stale:
  2318  	}
  2319  	return nil
  2320  }
  2321  
  2322  // OnByteCodes is a callback method to invoke when a batch of contract
  2323  // bytes codes are received from a remote peer.
  2324  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2325  	s.lock.RLock()
  2326  	syncing := !s.snapped
  2327  	s.lock.RUnlock()
  2328  
  2329  	if syncing {
  2330  		return s.onByteCodes(peer, id, bytecodes)
  2331  	}
  2332  	return s.onHealByteCodes(peer, id, bytecodes)
  2333  }
  2334  
  2335  // onByteCodes is a callback method to invoke when a batch of contract
  2336  // bytes codes are received from a remote peer in the syncing phase.
  2337  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2338  	var size common.StorageSize
  2339  	for _, code := range bytecodes {
  2340  		size += common.StorageSize(len(code))
  2341  	}
  2342  	logger := peer.Log().NewWith("reqid", id)
  2343  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2344  
  2345  	// Whether or not the response is valid, we can mark the peer as idle and
  2346  	// notify the scheduler to assign a new task. If the response is invalid,
  2347  	// we'll drop the peer in a bit.
  2348  	s.lock.Lock()
  2349  	if _, ok := s.peers[peer.ID()]; ok {
  2350  		s.bytecodeIdlers[peer.ID()] = struct{}{}
  2351  	}
  2352  	select {
  2353  	case s.update <- struct{}{}:
  2354  	default:
  2355  	}
  2356  	// Ensure the response is for a valid request
  2357  	req, ok := s.bytecodeReqs[id]
  2358  	if !ok {
  2359  		// Request stale, perhaps the peer timed out but came through in the end
  2360  		logger.Warn("Unexpected bytecode packet")
  2361  		s.lock.Unlock()
  2362  		return nil
  2363  	}
  2364  	delete(s.bytecodeReqs, id)
  2365  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2366  
  2367  	// Clean up the request timeout timer, we'll see how to proceed further based
  2368  	// on the actual delivered content
  2369  	if !req.timeout.Stop() {
  2370  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2371  		s.lock.Unlock()
  2372  		return nil
  2373  	}
  2374  
  2375  	// Response is valid, but check if peer is signalling that it does not have
  2376  	// the requested data. For bytecode range queries that means the peer is not
  2377  	// yet synced.
  2378  	if len(bytecodes) == 0 {
  2379  		logger.Debug("Peer rejected bytecode request")
  2380  		s.statelessPeers[peer.ID()] = struct{}{}
  2381  		s.lock.Unlock()
  2382  
  2383  		// Signal this request as failed, and ready for rescheduling
  2384  		s.scheduleRevertBytecodeRequest(req)
  2385  		return nil
  2386  	}
  2387  	s.lock.Unlock()
  2388  
  2389  	// Cross reference the requested bytecodes with the response to find gaps
  2390  	// that the serving node is missing
  2391  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2392  	hash := make([]byte, 32)
  2393  
  2394  	codes := make([][]byte, len(req.hashes))
  2395  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2396  		// Find the next hash that we've been served, leaving misses with nils
  2397  		hasher.Reset()
  2398  		hasher.Write(bytecodes[i])
  2399  		hasher.Read(hash)
  2400  
  2401  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2402  			j++
  2403  		}
  2404  		if j < len(req.hashes) {
  2405  			codes[j] = bytecodes[i]
  2406  			j++
  2407  			continue
  2408  		}
  2409  		// We've either ran out of hashes, or got unrequested data
  2410  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2411  		// Signal this request as failed, and ready for rescheduling
  2412  		s.scheduleRevertBytecodeRequest(req)
  2413  		return errors.New("unexpected bytecode")
  2414  	}
  2415  	// Response validated, send it to the scheduler for filling
  2416  	response := &bytecodeResponse{
  2417  		task:   req.task,
  2418  		hashes: req.hashes,
  2419  		codes:  codes,
  2420  	}
  2421  	select {
  2422  	case req.deliver <- response:
  2423  	case <-req.cancel:
  2424  	case <-req.stale:
  2425  	}
  2426  	return nil
  2427  }
  2428  
  2429  // OnStorage is a callback method to invoke when ranges of storage slots
  2430  // are received from a remote peer.
  2431  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2432  	// Gather some trace stats to aid in debugging issues
  2433  	var (
  2434  		hashCount int
  2435  		slotCount int
  2436  		size      common.StorageSize
  2437  	)
  2438  	for _, hashset := range hashes {
  2439  		size += common.StorageSize(common.HashLength * len(hashset))
  2440  		hashCount += len(hashset)
  2441  	}
  2442  	for _, slotset := range slots {
  2443  		for _, slot := range slotset {
  2444  			size += common.StorageSize(len(slot))
  2445  		}
  2446  		slotCount += len(slotset)
  2447  	}
  2448  	for _, node := range proof {
  2449  		size += common.StorageSize(len(node))
  2450  	}
  2451  	logger := peer.Log().NewWith("reqid", id)
  2452  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2453  
  2454  	// Whether or not the response is valid, we can mark the peer as idle and
  2455  	// notify the scheduler to assign a new task. If the response is invalid,
  2456  	// we'll drop the peer in a bit.
  2457  	s.lock.Lock()
  2458  	if _, ok := s.peers[peer.ID()]; ok {
  2459  		s.storageIdlers[peer.ID()] = struct{}{}
  2460  	}
  2461  	select {
  2462  	case s.update <- struct{}{}:
  2463  	default:
  2464  	}
  2465  	// Ensure the response is for a valid request
  2466  	req, ok := s.storageReqs[id]
  2467  	if !ok {
  2468  		// Request stale, perhaps the peer timed out but came through in the end
  2469  		logger.Warn("Unexpected storage ranges packet")
  2470  		s.lock.Unlock()
  2471  		return nil
  2472  	}
  2473  	delete(s.storageReqs, id)
  2474  	s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size))
  2475  
  2476  	// Clean up the request timeout timer, we'll see how to proceed further based
  2477  	// on the actual delivered content
  2478  	if !req.timeout.Stop() {
  2479  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2480  		s.lock.Unlock()
  2481  		return nil
  2482  	}
  2483  
  2484  	// Reject the response if the hash sets and slot sets don't match, or if the
  2485  	// peer sent more data than requested.
  2486  	if len(hashes) != len(slots) {
  2487  		s.lock.Unlock()
  2488  		s.scheduleRevertStorageRequest(req) // reschedule request
  2489  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2490  		return errors.New("hash and slot set size mismatch")
  2491  	}
  2492  	if len(hashes) > len(req.accounts) {
  2493  		s.lock.Unlock()
  2494  		s.scheduleRevertStorageRequest(req) // reschedule request
  2495  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2496  		return errors.New("hash set larger than requested")
  2497  	}
  2498  	// Response is valid, but check if peer is signalling that it does not have
  2499  	// the requested data. For storage range queries that means the state being
  2500  	// retrieved was either already pruned remotely, or the peer is not yet
  2501  	// synced to our head.
  2502  	if len(hashes) == 0 {
  2503  		logger.Debug("Peer rejected storage request")
  2504  		s.statelessPeers[peer.ID()] = struct{}{}
  2505  		s.lock.Unlock()
  2506  		s.scheduleRevertStorageRequest(req) // reschedule request
  2507  		return nil
  2508  	}
  2509  	s.lock.Unlock()
  2510  
  2511  	// Reconstruct the partial tries from the response and verify them
  2512  	var cont bool
  2513  
  2514  	for i := 0; i < len(hashes); i++ {
  2515  		// Convert the keys and proofs into an internal format
  2516  		keys := make([][]byte, len(hashes[i]))
  2517  		for j, key := range hashes[i] {
  2518  			keys[j] = common.CopyBytes(key[:])
  2519  		}
  2520  		nodes := make(NodeList, 0, len(proof))
  2521  		if i == len(hashes)-1 {
  2522  			for _, node := range proof {
  2523  				nodes = append(nodes, node)
  2524  			}
  2525  		}
  2526  		var err error
  2527  		if len(nodes) == 0 {
  2528  			// No proof has been attached, the response must cover the entire key
  2529  			// space and hash to the origin root.
  2530  			_, err = statedb.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2531  			if err != nil {
  2532  				s.scheduleRevertStorageRequest(req) // reschedule request
  2533  				logger.Warn("Storage slots failed proof", "err", err)
  2534  				return err
  2535  			}
  2536  		} else {
  2537  			// A proof was attached, the response is only partial, check that the
  2538  			// returned data is indeed part of the storage trie
  2539  			proofdb := nodes.NodeSet()
  2540  
  2541  			var end []byte
  2542  			if len(keys) > 0 {
  2543  				end = keys[len(keys)-1]
  2544  			}
  2545  			cont, err = statedb.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2546  			if err != nil {
  2547  				s.scheduleRevertStorageRequest(req) // reschedule request
  2548  				logger.Warn("Storage range failed proof", "err", err)
  2549  				return err
  2550  			}
  2551  		}
  2552  	}
  2553  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2554  	response := &storageResponse{
  2555  		mainTask: req.mainTask,
  2556  		subTask:  req.subTask,
  2557  		accounts: req.accounts,
  2558  		roots:    req.roots,
  2559  		hashes:   hashes,
  2560  		slots:    slots,
  2561  		cont:     cont,
  2562  	}
  2563  	select {
  2564  	case req.deliver <- response:
  2565  	case <-req.cancel:
  2566  	case <-req.stale:
  2567  	}
  2568  	return nil
  2569  }
  2570  
  2571  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2572  // are received from a remote peer.
  2573  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2574  	var size common.StorageSize
  2575  	for _, node := range trienodes {
  2576  		size += common.StorageSize(len(node))
  2577  	}
  2578  	logger := peer.Log().NewWith("reqid", id)
  2579  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2580  
  2581  	// Whether or not the response is valid, we can mark the peer as idle and
  2582  	// notify the scheduler to assign a new task. If the response is invalid,
  2583  	// we'll drop the peer in a bit.
  2584  	s.lock.Lock()
  2585  	if _, ok := s.peers[peer.ID()]; ok {
  2586  		s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2587  	}
  2588  	select {
  2589  	case s.update <- struct{}{}:
  2590  	default:
  2591  	}
  2592  	// Ensure the response is for a valid request
  2593  	req, ok := s.trienodeHealReqs[id]
  2594  	if !ok {
  2595  		// Request stale, perhaps the peer timed out but came through in the end
  2596  		logger.Warn("Unexpected trienode heal packet")
  2597  		s.lock.Unlock()
  2598  		return nil
  2599  	}
  2600  	delete(s.trienodeHealReqs, id)
  2601  	s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes))
  2602  
  2603  	// Clean up the request timeout timer, we'll see how to proceed further based
  2604  	// on the actual delivered content
  2605  	if !req.timeout.Stop() {
  2606  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2607  		s.lock.Unlock()
  2608  		return nil
  2609  	}
  2610  
  2611  	// Response is valid, but check if peer is signalling that it does not have
  2612  	// the requested data. For bytecode range queries that means the peer is not
  2613  	// yet synced.
  2614  	if len(trienodes) == 0 {
  2615  		logger.Debug("Peer rejected trienode heal request")
  2616  		s.statelessPeers[peer.ID()] = struct{}{}
  2617  		s.lock.Unlock()
  2618  
  2619  		// Signal this request as failed, and ready for rescheduling
  2620  		s.scheduleRevertTrienodeHealRequest(req)
  2621  		return nil
  2622  	}
  2623  	s.lock.Unlock()
  2624  
  2625  	// Cross reference the requested trienodes with the response to find gaps
  2626  	// that the serving node is missing
  2627  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2628  	hash := make([]byte, 32)
  2629  
  2630  	nodes := make([][]byte, len(req.hashes))
  2631  	for i, j := 0, 0; i < len(trienodes); i++ {
  2632  		// Find the next hash that we've been served, leaving misses with nils
  2633  		hasher.Reset()
  2634  		hasher.Write(trienodes[i])
  2635  		hasher.Read(hash)
  2636  
  2637  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2638  			j++
  2639  		}
  2640  		if j < len(req.hashes) {
  2641  			nodes[j] = trienodes[i]
  2642  			j++
  2643  			continue
  2644  		}
  2645  		// We've either ran out of hashes, or got unrequested data
  2646  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2647  		// Signal this request as failed, and ready for rescheduling
  2648  		s.scheduleRevertTrienodeHealRequest(req)
  2649  		return errors.New("unexpected healing trienode")
  2650  	}
  2651  	// Response validated, send it to the scheduler for filling
  2652  	response := &trienodeHealResponse{
  2653  		task:   req.task,
  2654  		hashes: req.hashes,
  2655  		paths:  req.paths,
  2656  		nodes:  nodes,
  2657  	}
  2658  	select {
  2659  	case req.deliver <- response:
  2660  	case <-req.cancel:
  2661  	case <-req.stale:
  2662  	}
  2663  	return nil
  2664  }
  2665  
  2666  // onHealByteCodes is a callback method to invoke when a batch of contract
  2667  // bytes codes are received from a remote peer in the healing phase.
  2668  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2669  	var size common.StorageSize
  2670  	for _, code := range bytecodes {
  2671  		size += common.StorageSize(len(code))
  2672  	}
  2673  	logger := peer.Log().NewWith("reqid", id)
  2674  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2675  
  2676  	// Whether or not the response is valid, we can mark the peer as idle and
  2677  	// notify the scheduler to assign a new task. If the response is invalid,
  2678  	// we'll drop the peer in a bit.
  2679  	s.lock.Lock()
  2680  	if _, ok := s.peers[peer.ID()]; ok {
  2681  		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2682  	}
  2683  	select {
  2684  	case s.update <- struct{}{}:
  2685  	default:
  2686  	}
  2687  	// Ensure the response is for a valid request
  2688  	req, ok := s.bytecodeHealReqs[id]
  2689  	if !ok {
  2690  		// Request stale, perhaps the peer timed out but came through in the end
  2691  		logger.Warn("Unexpected bytecode heal packet")
  2692  		s.lock.Unlock()
  2693  		return nil
  2694  	}
  2695  	delete(s.bytecodeHealReqs, id)
  2696  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2697  
  2698  	// Clean up the request timeout timer, we'll see how to proceed further based
  2699  	// on the actual delivered content
  2700  	if !req.timeout.Stop() {
  2701  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2702  		s.lock.Unlock()
  2703  		return nil
  2704  	}
  2705  
  2706  	// Response is valid, but check if peer is signalling that it does not have
  2707  	// the requested data. For bytecode range queries that means the peer is not
  2708  	// yet synced.
  2709  	if len(bytecodes) == 0 {
  2710  		logger.Debug("Peer rejected bytecode heal request")
  2711  		s.statelessPeers[peer.ID()] = struct{}{}
  2712  		s.lock.Unlock()
  2713  
  2714  		// Signal this request as failed, and ready for rescheduling
  2715  		s.scheduleRevertBytecodeHealRequest(req)
  2716  		return nil
  2717  	}
  2718  	s.lock.Unlock()
  2719  
  2720  	// Cross reference the requested bytecodes with the response to find gaps
  2721  	// that the serving node is missing
  2722  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2723  	hash := make([]byte, 32)
  2724  
  2725  	codes := make([][]byte, len(req.hashes))
  2726  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2727  		// Find the next hash that we've been served, leaving misses with nils
  2728  		hasher.Reset()
  2729  		hasher.Write(bytecodes[i])
  2730  		hasher.Read(hash)
  2731  
  2732  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2733  			j++
  2734  		}
  2735  		if j < len(req.hashes) {
  2736  			codes[j] = bytecodes[i]
  2737  			j++
  2738  			continue
  2739  		}
  2740  		// We've either ran out of hashes, or got unrequested data
  2741  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2742  		// Signal this request as failed, and ready for rescheduling
  2743  		s.scheduleRevertBytecodeHealRequest(req)
  2744  		return errors.New("unexpected healing bytecode")
  2745  	}
  2746  	// Response validated, send it to the scheduler for filling
  2747  	response := &bytecodeHealResponse{
  2748  		task:   req.task,
  2749  		hashes: req.hashes,
  2750  		codes:  codes,
  2751  	}
  2752  	select {
  2753  	case req.deliver <- response:
  2754  	case <-req.cancel:
  2755  	case <-req.stale:
  2756  	}
  2757  	return nil
  2758  }
  2759  
  2760  // onHealState is a callback method to invoke when a flat state(account
  2761  // or storage slot) is downloded during the healing stage. The flat states
  2762  // can be persisted blindly and can be fixed later in the generation stage.
  2763  // Note it's not concurrent safe, please handle the concurrent issue outside.
  2764  func (s *Syncer) onHealState(paths [][]byte, value []byte) error {
  2765  	if len(paths) == 1 {
  2766  		s.stateWriter.WriteAccountSnapshot(common.BytesToHash(paths[0]), value)
  2767  		s.accountHealed += 1
  2768  		s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(value))
  2769  	}
  2770  	if len(paths) == 2 {
  2771  		s.stateWriter.WriteStorageSnapshot(common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value)
  2772  		s.storageHealed += 1
  2773  		s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value))
  2774  	}
  2775  	if s.stateWriter.ValueSize() > database.IdealBatchSize {
  2776  		s.stateWriter.Write() // It's fine to ignore the error here
  2777  		s.stateWriter.Reset()
  2778  	}
  2779  	return nil
  2780  }
  2781  
  2782  // hashSpace is the total size of the 256 bit hash space for accounts.
  2783  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2784  
  2785  // report calculates various status reports and provides it to the user.
  2786  func (s *Syncer) report(force bool) {
  2787  	if len(s.tasks) > 0 {
  2788  		s.reportSyncProgress(force)
  2789  		return
  2790  	}
  2791  	s.reportHealProgress(force)
  2792  }
  2793  
  2794  // reportSyncProgress calculates various status reports and provides it to the user.
  2795  func (s *Syncer) reportSyncProgress(force bool) {
  2796  	// Don't report all the events, just occasionally
  2797  	if !force && time.Since(s.logTime) < 8*time.Second {
  2798  		return
  2799  	}
  2800  	// Don't report anything until we have a meaningful progress
  2801  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2802  	if synced == 0 {
  2803  		return
  2804  	}
  2805  	accountGaps := new(big.Int)
  2806  	for _, task := range s.tasks {
  2807  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2808  	}
  2809  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2810  	if accountFills.BitLen() == 0 {
  2811  		return
  2812  	}
  2813  	s.logTime = time.Now()
  2814  	estBytes := float64(new(big.Int).Div(
  2815  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2816  		accountFills,
  2817  	).Uint64())
  2818  	// Don't report anything until we have a meaningful progress
  2819  	if estBytes < 1.0 {
  2820  		return
  2821  	}
  2822  	elapsed := time.Since(s.startTime)
  2823  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2824  
  2825  	// Create a mega progress report
  2826  	var (
  2827  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2828  		accounts = fmt.Sprintf("%v@%v", s.accountSynced, s.accountBytes.TerminalString())
  2829  		storage  = fmt.Sprintf("%v@%v", s.storageSynced, s.storageBytes.TerminalString())
  2830  		bytecode = fmt.Sprintf("%v@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString())
  2831  	)
  2832  	logger.Info("State sync in progress", "synced", progress, "state", synced,
  2833  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2834  }
  2835  
  2836  // reportHealProgress calculates various status reports and provides it to the user.
  2837  func (s *Syncer) reportHealProgress(force bool) {
  2838  	// Don't report all the events, just occasionally
  2839  	if !force && time.Since(s.logTime) < 8*time.Second {
  2840  		return
  2841  	}
  2842  	s.logTime = time.Now()
  2843  
  2844  	// Create a mega progress report
  2845  	var (
  2846  		trienode = fmt.Sprintf("%v@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString())
  2847  		bytecode = fmt.Sprintf("%v@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString())
  2848  		accounts = fmt.Sprintf("%v@%v", s.accountHealed, s.accountHealedBytes.TerminalString())
  2849  		storage  = fmt.Sprintf("%v@%v", s.storageHealed, s.storageHealedBytes.TerminalString())
  2850  	)
  2851  	logger.Info("State heal in progress", "accounts", accounts, "slots", storage,
  2852  		"codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending())
  2853  }
  2854  
  2855  // estimateRemainingSlots tries to determine roughly how many slots are left in
  2856  // a contract storage, based on the number of keys and the last hash. This method
  2857  // assumes that the hashes are lexicographically ordered and evenly distributed.
  2858  func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
  2859  	if last == (common.Hash{}) {
  2860  		return 0, errors.New("last hash empty")
  2861  	}
  2862  	space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes)))
  2863  	space.Div(space, last.Big())
  2864  	if !space.IsUint64() {
  2865  		// Gigantic address space probably due to too few or malicious slots
  2866  		return 0, errors.New("too few slots for estimation")
  2867  	}
  2868  	return space.Uint64() - uint64(hashes), nil
  2869  }
  2870  
  2871  // capacitySort implements the Sort interface, allowing sorting by peer message
  2872  // throughput. Note, callers should use sort.Reverse to get the desired effect
  2873  // of highest capacity being at the front.
  2874  type capacitySort struct {
  2875  	ids  []string
  2876  	caps []int
  2877  }
  2878  
  2879  func (s *capacitySort) Len() int {
  2880  	return len(s.ids)
  2881  }
  2882  
  2883  func (s *capacitySort) Less(i, j int) bool {
  2884  	return s.caps[i] < s.caps[j]
  2885  }
  2886  
  2887  func (s *capacitySort) Swap(i, j int) {
  2888  	s.ids[i], s.ids[j] = s.ids[j], s.ids[i]
  2889  	s.caps[i], s.caps[j] = s.caps[j], s.caps[i]
  2890  }