github.com/klaytn/klaytn@v1.12.1/node/cn/snap/sync.go (about)

     1  // Modifications Copyright 2022 The klaytn Authors
     2  // Copyright 2020 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from eth/protocols/snap/sync.go (2022/06/29).
    19  // Modified and improved for the klaytn development.
    20  
    21  package snap
    22  
    23  import (
    24  	"bytes"
    25  	"encoding/json"
    26  	"errors"
    27  	"fmt"
    28  	"math/big"
    29  	"math/rand"
    30  	"sort"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/klaytn/klaytn/blockchain/state"
    35  	"github.com/klaytn/klaytn/blockchain/types/account"
    36  	"github.com/klaytn/klaytn/common"
    37  	"github.com/klaytn/klaytn/common/math"
    38  	"github.com/klaytn/klaytn/crypto"
    39  	"github.com/klaytn/klaytn/event"
    40  	"github.com/klaytn/klaytn/log"
    41  	"github.com/klaytn/klaytn/networks/p2p/msgrate"
    42  	"github.com/klaytn/klaytn/rlp"
    43  	"github.com/klaytn/klaytn/storage/database"
    44  	"github.com/klaytn/klaytn/storage/statedb"
    45  	"golang.org/x/crypto/sha3"
    46  )
    47  
    48  var logger = log.NewModuleLogger(log.SnapshotSync)
    49  
    50  var (
    51  	// emptyRoot is the known root hash of an empty trie.
    52  	emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
    53  
    54  	// emptyCode is the known hash of the empty EVM bytecode.
    55  	emptyCode = crypto.Keccak256Hash(nil)
    56  )
    57  
    58  const (
    59  	// minRequestSize is the minimum number of bytes to request from a remote peer.
    60  	// This number is used as the low cap for account and storage range requests.
    61  	// Bytecode and trienode are limited inherently by item count (1).
    62  	minRequestSize = 64 * 1024
    63  
    64  	// maxRequestSize is the maximum number of bytes to request from a remote peer.
    65  	// This number is used as the high cap for account and storage range requests.
    66  	// Bytecode and trienode are limited more explicitly by the caps below.
    67  	maxRequestSize = 512 * 1024
    68  
    69  	// maxCodeRequestCount is the maximum number of bytecode blobs to request in a
    70  	// single query. If this number is too low, we're not filling responses fully
    71  	// and waste round trip times. If it's too high, we're capping responses and
    72  	// waste bandwidth.
    73  	//
    74  	// Depoyed bytecodes are currently capped at 24KB, so the minimum request
    75  	// size should be maxRequestSize / 24K. Assuming that most contracts do not
    76  	// come close to that, requesting 4x should be a good approximation.
    77  	maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4
    78  
    79  	// maxTrieRequestCount is the maximum number of trie node blobs to request in
    80  	// a single query. If this number is too low, we're not filling responses fully
    81  	// and waste round trip times. If it's too high, we're capping responses and
    82  	// waste bandwidth.
    83  	maxTrieRequestCount = maxRequestSize / 512
    84  )
    85  
    86  var (
    87  	// accountConcurrency is the number of chunks to split the account trie into
    88  	// to allow concurrent retrievals.
    89  	accountConcurrency = 16
    90  
    91  	// storageConcurrency is the number of chunks to split the a large contract
    92  	// storage trie into to allow concurrent retrievals.
    93  	storageConcurrency = 16
    94  )
    95  
    96  // ErrCancelled is returned from snap syncing if the operation was prematurely
    97  // terminated.
    98  var ErrCancelled = errors.New("sync cancelled")
    99  
   100  // accountRequest tracks a pending account range request to ensure responses are
   101  // to actual requests and to validate any security constraints.
   102  //
   103  // Concurrency note: account requests and responses are handled concurrently from
   104  // the main runloop to allow Merkle proof verifications on the peer's thread and
   105  // to drop on invalid response. The request struct must contain all the data to
   106  // construct the response without accessing runloop internals (i.e. task). That
   107  // is only included to allow the runloop to match a response to the task being
   108  // synced without having yet another set of maps.
   109  type accountRequest struct {
   110  	peer string    // Peer to which this request is assigned
   111  	id   uint64    // Request ID of this request
   112  	time time.Time // Timestamp when the request was sent
   113  
   114  	deliver chan *accountResponse // Channel to deliver successful response on
   115  	revert  chan *accountRequest  // Channel to deliver request failure on
   116  	cancel  chan struct{}         // Channel to track sync cancellation
   117  	timeout *time.Timer           // Timer to track delivery timeout
   118  	stale   chan struct{}         // Channel to signal the request was dropped
   119  
   120  	origin common.Hash // First account requested to allow continuation checks
   121  	limit  common.Hash // Last account requested to allow non-overlapping chunking
   122  
   123  	task *accountTask // Task which this request is filling (only access fields through the runloop!!)
   124  }
   125  
   126  // accountResponse is an already Merkle-verified remote response to an account
   127  // range request. It contains the subtrie for the requested account range and
   128  // the database that's going to be filled with the internal nodes on commit.
   129  type accountResponse struct {
   130  	task *accountTask // Task which this request is filling
   131  
   132  	hashes   []common.Hash     // Account hashes in the returned range
   133  	accounts []account.Account // Expanded accounts in the returned range
   134  
   135  	cont bool // Whether the account range has a continuation
   136  }
   137  
   138  // bytecodeRequest tracks a pending bytecode request to ensure responses are to
   139  // actual requests and to validate any security constraints.
   140  //
   141  // Concurrency note: bytecode requests and responses are handled concurrently from
   142  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   143  // to drop on invalid response. The request struct must contain all the data to
   144  // construct the response without accessing runloop internals (i.e. task). That
   145  // is only included to allow the runloop to match a response to the task being
   146  // synced without having yet another set of maps.
   147  type bytecodeRequest struct {
   148  	peer string    // Peer to which this request is assigned
   149  	id   uint64    // Request ID of this request
   150  	time time.Time // Timestamp when the request was sent
   151  
   152  	deliver chan *bytecodeResponse // Channel to deliver successful response on
   153  	revert  chan *bytecodeRequest  // Channel to deliver request failure on
   154  	cancel  chan struct{}          // Channel to track sync cancellation
   155  	timeout *time.Timer            // Timer to track delivery timeout
   156  	stale   chan struct{}          // Channel to signal the request was dropped
   157  
   158  	hashes []common.Hash // Bytecode hashes to validate responses
   159  	task   *accountTask  // Task which this request is filling (only access fields through the runloop!!)
   160  }
   161  
   162  // bytecodeResponse is an already verified remote response to a bytecode request.
   163  type bytecodeResponse struct {
   164  	task *accountTask // Task which this request is filling
   165  
   166  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   167  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   168  }
   169  
   170  // storageRequest tracks a pending storage ranges request to ensure responses are
   171  // to actual requests and to validate any security constraints.
   172  //
   173  // Concurrency note: storage requests and responses are handled concurrently from
   174  // the main runloop to allow Merkle proof verifications on the peer's thread and
   175  // to drop on invalid response. The request struct must contain all the data to
   176  // construct the response without accessing runloop internals (i.e. tasks). That
   177  // is only included to allow the runloop to match a response to the task being
   178  // synced without having yet another set of maps.
   179  type storageRequest struct {
   180  	peer string    // Peer to which this request is assigned
   181  	id   uint64    // Request ID of this request
   182  	time time.Time // Timestamp when the request was sent
   183  
   184  	deliver chan *storageResponse // Channel to deliver successful response on
   185  	revert  chan *storageRequest  // Channel to deliver request failure on
   186  	cancel  chan struct{}         // Channel to track sync cancellation
   187  	timeout *time.Timer           // Timer to track delivery timeout
   188  	stale   chan struct{}         // Channel to signal the request was dropped
   189  
   190  	accounts []common.Hash // Account hashes to validate responses
   191  	roots    []common.Hash // Storage roots to validate responses
   192  
   193  	origin common.Hash // First storage slot requested to allow continuation checks
   194  	limit  common.Hash // Last storage slot requested to allow non-overlapping chunking
   195  
   196  	mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!)
   197  	subTask  *storageTask // Task which this response is filling (only access fields through the runloop!!)
   198  }
   199  
   200  // storageResponse is an already Merkle-verified remote response to a storage
   201  // range request. It contains the subtries for the requested storage ranges and
   202  // the databases that's going to be filled with the internal nodes on commit.
   203  type storageResponse struct {
   204  	mainTask *accountTask // Task which this response belongs to
   205  	subTask  *storageTask // Task which this response is filling
   206  
   207  	accounts []common.Hash // Account hashes requested, may be only partially filled
   208  	roots    []common.Hash // Storage roots requested, may be only partially filled
   209  
   210  	hashes [][]common.Hash // Storage slot hashes in the returned range
   211  	slots  [][][]byte      // Storage slot values in the returned range
   212  
   213  	cont bool // Whether the last storage range has a continuation
   214  }
   215  
   216  // trienodeHealRequest tracks a pending state trie request to ensure responses
   217  // are to actual requests and to validate any security constraints.
   218  //
   219  // Concurrency note: trie node requests and responses are handled concurrently from
   220  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   221  // to drop on invalid response. The request struct must contain all the data to
   222  // construct the response without accessing runloop internals (i.e. task). That
   223  // is only included to allow the runloop to match a response to the task being
   224  // synced without having yet another set of maps.
   225  type trienodeHealRequest struct {
   226  	peer string    // Peer to which this request is assigned
   227  	id   uint64    // Request ID of this request
   228  	time time.Time // Timestamp when the request was sent
   229  
   230  	deliver chan *trienodeHealResponse // Channel to deliver successful response on
   231  	revert  chan *trienodeHealRequest  // Channel to deliver request failure on
   232  	cancel  chan struct{}              // Channel to track sync cancellation
   233  	timeout *time.Timer                // Timer to track delivery timeout
   234  	stale   chan struct{}              // Channel to signal the request was dropped
   235  
   236  	hashes []common.Hash      // Trie node hashes to validate responses
   237  	paths  []statedb.SyncPath // Trie node paths requested for rescheduling
   238  
   239  	task *healTask // Task which this request is filling (only access fields through the runloop!!)
   240  }
   241  
   242  // trienodeHealResponse is an already verified remote response to a trie node request.
   243  type trienodeHealResponse struct {
   244  	task *healTask // Task which this request is filling
   245  
   246  	hashes []common.Hash      // Hashes of the trie nodes to avoid double hashing
   247  	paths  []statedb.SyncPath // Trie node paths requested for rescheduling missing ones
   248  	nodes  [][]byte           // Actual trie nodes to store into the database (nil = missing)
   249  }
   250  
   251  // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to
   252  // actual requests and to validate any security constraints.
   253  //
   254  // Concurrency note: bytecode requests and responses are handled concurrently from
   255  // the main runloop to allow Keccak256 hash verifications on the peer's thread and
   256  // to drop on invalid response. The request struct must contain all the data to
   257  // construct the response without accessing runloop internals (i.e. task). That
   258  // is only included to allow the runloop to match a response to the task being
   259  // synced without having yet another set of maps.
   260  type bytecodeHealRequest struct {
   261  	peer string    // Peer to which this request is assigned
   262  	id   uint64    // Request ID of this request
   263  	time time.Time // Timestamp when the request was sent
   264  
   265  	deliver chan *bytecodeHealResponse // Channel to deliver successful response on
   266  	revert  chan *bytecodeHealRequest  // Channel to deliver request failure on
   267  	cancel  chan struct{}              // Channel to track sync cancellation
   268  	timeout *time.Timer                // Timer to track delivery timeout
   269  	stale   chan struct{}              // Channel to signal the request was dropped
   270  
   271  	hashes []common.Hash // Bytecode hashes to validate responses
   272  	task   *healTask     // Task which this request is filling (only access fields through the runloop!!)
   273  }
   274  
   275  // bytecodeHealResponse is an already verified remote response to a bytecode request.
   276  type bytecodeHealResponse struct {
   277  	task *healTask // Task which this request is filling
   278  
   279  	hashes []common.Hash // Hashes of the bytecode to avoid double hashing
   280  	codes  [][]byte      // Actual bytecodes to store into the database (nil = missing)
   281  }
   282  
   283  // accountTask represents the sync task for a chunk of the account snapshot.
   284  type accountTask struct {
   285  	// These fields get serialized to leveldb on shutdown
   286  	Next     common.Hash                    // Next account to sync in this interval
   287  	Last     common.Hash                    // Last account to sync in this interval
   288  	SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts
   289  
   290  	// These fields are internals used during runtime
   291  	req  *accountRequest  // Pending request to fill this task
   292  	res  *accountResponse // Validate response filling this task
   293  	pend int              // Number of pending subtasks for this round
   294  
   295  	needCode  []bool // Flags whether the filling accounts need code retrieval
   296  	needState []bool // Flags whether the filling accounts need storage retrieval
   297  	needHeal  []bool // Flags whether the filling accounts's state was chunked and need healing
   298  
   299  	codeTasks  map[common.Hash]struct{}    // Code hashes that need retrieval
   300  	stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval
   301  
   302  	// TODO-Klaytn-Snapsync consider to use stack trie
   303  	genTrie *statedb.Trie // Node generator from storage slots
   304  	trieDb  *statedb.Database
   305  
   306  	done bool // Flag whether the task can be removed
   307  }
   308  
   309  // storageTask represents the sync task for a chunk of the storage snapshot.
   310  type storageTask struct {
   311  	Next common.Hash // Next account to sync in this interval
   312  	Last common.Hash // Last account to sync in this interval
   313  
   314  	// These fields are internals used during runtime
   315  	root common.Hash     // Storage root hash for this instance
   316  	req  *storageRequest // Pending request to fill this task
   317  
   318  	// TODO-Klaytn-Snapsync consider to use stack trie
   319  	genTrie *statedb.Trie // Node generator from storage slots
   320  	trieDb  *statedb.Database
   321  
   322  	done bool // Flag whether the task can be removed
   323  }
   324  
   325  // healTask represents the sync task for healing the snap-synced chunk boundaries.
   326  type healTask struct {
   327  	scheduler *statedb.TrieSync // State trie sync scheduler defining the tasks
   328  
   329  	trieTasks map[common.Hash]statedb.SyncPath // Set of trie node tasks currently queued for retrieval
   330  	codeTasks map[common.Hash]struct{}         // Set of byte code tasks currently queued for retrieval
   331  }
   332  
   333  // SyncProgress is a database entry to allow suspending and resuming a snapshot state
   334  // sync. Opposed to full and fast sync, there is no way to restart a suspended
   335  // snap sync without prior knowledge of the suspension point.
   336  type SyncProgress struct {
   337  	Tasks []*accountTask // The suspended account tasks (contract tasks within)
   338  
   339  	// Status report during syncing phase
   340  	AccountSynced  uint64             // Number of accounts downloaded
   341  	AccountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   342  	BytecodeSynced uint64             // Number of bytecodes downloaded
   343  	BytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   344  	StorageSynced  uint64             // Number of storage slots downloaded
   345  	StorageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   346  
   347  	// Status report during healing phase
   348  	TrienodeHealSynced uint64             // Number of state trie nodes downloaded
   349  	TrienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   350  	BytecodeHealSynced uint64             // Number of bytecodes downloaded
   351  	BytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   352  }
   353  
   354  // SyncPending is analogous to SyncProgress, but it's used to report on pending
   355  // ephemeral sync progress that doesn't get persisted into the database.
   356  type SyncPending struct {
   357  	TrienodeHeal uint64 // Number of state trie nodes pending
   358  	BytecodeHeal uint64 // Number of bytecodes pending
   359  }
   360  
   361  // SyncPeer abstracts out the methods required for a peer to be synced against
   362  // with the goal of allowing the construction of mock peers without the full
   363  // blown networking.
   364  type SyncPeer interface {
   365  	// ID retrieves the peer's unique identifier.
   366  	ID() string
   367  
   368  	// RequestAccountRange fetches a batch of accounts rooted in a specific account
   369  	// trie, starting with the origin.
   370  	RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error
   371  
   372  	// RequestStorageRanges fetches a batch of storage slots belonging to one or
   373  	// more accounts. If slots from only one accout is requested, an origin marker
   374  	// may also be used to retrieve from there.
   375  	RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error
   376  
   377  	// RequestByteCodes fetches a batch of bytecodes by hash.
   378  	RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error
   379  
   380  	// RequestTrieNodes fetches a batch of account or storage trie nodes rooted in
   381  	// a specificstate trie.
   382  	RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error
   383  
   384  	// Log retrieves the peer's own contextual logger.
   385  	Log() log.Logger
   386  }
   387  
   388  // Syncer is an Klaytn account and storage trie syncer based on snapshots and
   389  // the snap protocol. It's purpose is to download all the accounts and storage
   390  // slots from remote peers and reassemble chunks of the state trie, on top of
   391  // which a state sync can be run to fix any gaps / overlaps.
   392  //
   393  // Every network request has a variety of failure events:
   394  //   - The peer disconnects after task assignment, failing to send the request
   395  //   - The peer disconnects after sending the request, before delivering on it
   396  //   - The peer remains connected, but does not deliver a response in time
   397  //   - The peer delivers a stale response after a previous timeout
   398  //   - The peer delivers a refusal to serve the requested state
   399  type Syncer struct {
   400  	db database.DBManager // Database to store the trie nodes into (and dedup)
   401  
   402  	root    common.Hash    // Current state trie root being synced
   403  	tasks   []*accountTask // Current account task set being synced
   404  	snapped bool           // Flag to signal that snap phase is done
   405  	healer  *healTask      // Current state healing task being executed
   406  	update  chan struct{}  // Notification channel for possible sync progression
   407  
   408  	peers    map[string]SyncPeer // Currently active peers to download from
   409  	peerJoin *event.Feed         // Event feed to react to peers joining
   410  	peerDrop *event.Feed         // Event feed to react to peers dropping
   411  	rates    *msgrate.Trackers   // Message throughput rates for peers
   412  
   413  	// Request tracking during syncing phase
   414  	statelessPeers map[string]struct{} // Peers that failed to deliver state data
   415  	accountIdlers  map[string]struct{} // Peers that aren't serving account requests
   416  	bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   417  	storageIdlers  map[string]struct{} // Peers that aren't serving storage requests
   418  
   419  	accountReqs  map[uint64]*accountRequest  // Account requests currently running
   420  	bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running
   421  	storageReqs  map[uint64]*storageRequest  // Storage requests currently running
   422  
   423  	accountSynced  uint64             // Number of accounts downloaded
   424  	accountBytes   common.StorageSize // Number of account trie bytes persisted to disk
   425  	bytecodeSynced uint64             // Number of bytecodes downloaded
   426  	bytecodeBytes  common.StorageSize // Number of bytecode bytes downloaded
   427  	storageSynced  uint64             // Number of storage slots downloaded
   428  	storageBytes   common.StorageSize // Number of storage trie bytes persisted to disk
   429  
   430  	extProgress *SyncProgress // progress that can be exposed to external caller.
   431  
   432  	// Request tracking during healing phase
   433  	trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests
   434  	bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests
   435  
   436  	trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running
   437  	bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running
   438  
   439  	trienodeHealSynced uint64             // Number of state trie nodes downloaded
   440  	trienodeHealBytes  common.StorageSize // Number of state trie bytes persisted to disk
   441  	trienodeHealDups   uint64             // Number of state trie nodes already processed
   442  	trienodeHealNops   uint64             // Number of state trie nodes not requested
   443  	bytecodeHealSynced uint64             // Number of bytecodes downloaded
   444  	bytecodeHealBytes  common.StorageSize // Number of bytecodes persisted to disk
   445  	bytecodeHealDups   uint64             // Number of bytecodes already processed
   446  	bytecodeHealNops   uint64             // Number of bytecodes not requested
   447  
   448  	stateWriter        database.SnapshotDBBatch // Shared batch writer used for persisting raw states
   449  	accountHealed      uint64                   // Number of accounts downloaded during the healing stage
   450  	accountHealedBytes common.StorageSize       // Number of raw account bytes persisted to disk during the healing stage
   451  	storageHealed      uint64                   // Number of storage slots downloaded during the healing stage
   452  	storageHealedBytes common.StorageSize       // Number of raw storage bytes persisted to disk during the healing stage
   453  
   454  	startTime time.Time // Time instance when snapshot sync started
   455  	logTime   time.Time // Time instance when status was last reported
   456  
   457  	pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
   458  	lock sync.RWMutex   // Protects fields that can change outside of sync (peers, reqs, root)
   459  }
   460  
   461  // NewSyncer creates a new snapshot syncer to download the Ethereum state over the
   462  // snap protocol.
   463  func NewSyncer(db database.DBManager) *Syncer {
   464  	return &Syncer{
   465  		db: db,
   466  
   467  		peers:    make(map[string]SyncPeer),
   468  		peerJoin: new(event.Feed),
   469  		peerDrop: new(event.Feed),
   470  		rates:    msgrate.NewTrackers(logger.NewWith("proto", "snap")),
   471  		update:   make(chan struct{}, 1),
   472  
   473  		accountIdlers:  make(map[string]struct{}),
   474  		storageIdlers:  make(map[string]struct{}),
   475  		bytecodeIdlers: make(map[string]struct{}),
   476  
   477  		accountReqs:  make(map[uint64]*accountRequest),
   478  		storageReqs:  make(map[uint64]*storageRequest),
   479  		bytecodeReqs: make(map[uint64]*bytecodeRequest),
   480  
   481  		trienodeHealIdlers: make(map[string]struct{}),
   482  		bytecodeHealIdlers: make(map[string]struct{}),
   483  
   484  		trienodeHealReqs: make(map[uint64]*trienodeHealRequest),
   485  		bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest),
   486  		stateWriter:      db.NewSnapshotDBBatch(),
   487  
   488  		extProgress: new(SyncProgress),
   489  	}
   490  }
   491  
   492  // Register injects a new data source into the syncer's peerset.
   493  func (s *Syncer) Register(peer SyncPeer) error {
   494  	// Make sure the peer is not registered yet
   495  	id := peer.ID()
   496  
   497  	s.lock.Lock()
   498  	if _, ok := s.peers[id]; ok {
   499  		logger.Error("Snap peer already registered", "id", id)
   500  
   501  		s.lock.Unlock()
   502  		return errors.New("already registered")
   503  	}
   504  	s.peers[id] = peer
   505  	s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip()))
   506  
   507  	// Mark the peer as idle, even if no sync is running
   508  	s.accountIdlers[id] = struct{}{}
   509  	s.storageIdlers[id] = struct{}{}
   510  	s.bytecodeIdlers[id] = struct{}{}
   511  	s.trienodeHealIdlers[id] = struct{}{}
   512  	s.bytecodeHealIdlers[id] = struct{}{}
   513  	s.lock.Unlock()
   514  
   515  	// Notify any active syncs that a new peer can be assigned data
   516  	s.peerJoin.Send(id)
   517  	return nil
   518  }
   519  
   520  // Unregister injects a new data source into the syncer's peerset.
   521  func (s *Syncer) Unregister(id string) error {
   522  	// Remove all traces of the peer from the registry
   523  	s.lock.Lock()
   524  	if _, ok := s.peers[id]; !ok {
   525  		logger.Error("Snap peer not registered", "id", id)
   526  
   527  		s.lock.Unlock()
   528  		return errors.New("not registered")
   529  	}
   530  	delete(s.peers, id)
   531  	s.rates.Untrack(id)
   532  
   533  	// Remove status markers, even if no sync is running
   534  	delete(s.statelessPeers, id)
   535  
   536  	delete(s.accountIdlers, id)
   537  	delete(s.storageIdlers, id)
   538  	delete(s.bytecodeIdlers, id)
   539  	delete(s.trienodeHealIdlers, id)
   540  	delete(s.bytecodeHealIdlers, id)
   541  	s.lock.Unlock()
   542  
   543  	// Notify any active syncs that pending requests need to be reverted
   544  	s.peerDrop.Send(id)
   545  	return nil
   546  }
   547  
   548  // Sync starts (or resumes a previous) sync cycle to iterate over an state trie
   549  // with the given root and reconstruct the nodes based on the snapshot leaves.
   550  // Previously downloaded segments will not be redownloaded of fixed, rather any
   551  // errors will be healed after the leaves are fully accumulated.
   552  func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
   553  	// Move the trie root from any previous value, revert stateless markers for
   554  	// any peers and initialize the syncer if it was not yet run
   555  	s.lock.Lock()
   556  	s.root = root
   557  	s.healer = &healTask{
   558  		scheduler: state.NewStateSync(root, s.db, nil, nil, s.onHealState),
   559  		trieTasks: make(map[common.Hash]statedb.SyncPath),
   560  		codeTasks: make(map[common.Hash]struct{}),
   561  	}
   562  	s.statelessPeers = make(map[string]struct{})
   563  	s.lock.Unlock()
   564  
   565  	if s.startTime == (time.Time{}) {
   566  		s.startTime = time.Now()
   567  	}
   568  	// Retrieve the previous sync status from LevelDB and abort if already synced
   569  	s.loadSyncStatus()
   570  	if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   571  		logger.Debug("Snapshot sync already completed")
   572  		return nil
   573  	}
   574  	defer func() { // Persist any progress, independent of failure
   575  		for _, task := range s.tasks {
   576  			s.forwardAccountTask(task)
   577  		}
   578  		s.cleanAccountTasks()
   579  		s.saveSyncStatus()
   580  	}()
   581  
   582  	logger.Debug("Starting snapshot sync cycle", "root", root)
   583  
   584  	// Flush out the last committed raw states
   585  	defer func() {
   586  		if s.stateWriter.ValueSize() > 0 {
   587  			s.stateWriter.Write()
   588  			s.stateWriter.Reset()
   589  		}
   590  		s.stateWriter.Release()
   591  	}()
   592  	defer s.report(true)
   593  
   594  	// Whether sync completed or not, disregard any future packets
   595  	defer func() {
   596  		logger.Debug("Terminating snapshot sync cycle", "root", root)
   597  		s.lock.Lock()
   598  		s.accountReqs = make(map[uint64]*accountRequest)
   599  		s.storageReqs = make(map[uint64]*storageRequest)
   600  		s.bytecodeReqs = make(map[uint64]*bytecodeRequest)
   601  		s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest)
   602  		s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest)
   603  		s.lock.Unlock()
   604  	}()
   605  	// Keep scheduling sync tasks
   606  	peerJoin := make(chan string, 16)
   607  	peerJoinSub := s.peerJoin.Subscribe(peerJoin)
   608  	defer peerJoinSub.Unsubscribe()
   609  
   610  	peerDrop := make(chan string, 16)
   611  	peerDropSub := s.peerDrop.Subscribe(peerDrop)
   612  	defer peerDropSub.Unsubscribe()
   613  
   614  	// Create a set of unique channels for this sync cycle. We need these to be
   615  	// ephemeral so a data race doesn't accidentally deliver something stale on
   616  	// a persistent channel across syncs (yup, this happened)
   617  	var (
   618  		accountReqFails      = make(chan *accountRequest)
   619  		storageReqFails      = make(chan *storageRequest)
   620  		bytecodeReqFails     = make(chan *bytecodeRequest)
   621  		accountResps         = make(chan *accountResponse)
   622  		storageResps         = make(chan *storageResponse)
   623  		bytecodeResps        = make(chan *bytecodeResponse)
   624  		trienodeHealReqFails = make(chan *trienodeHealRequest)
   625  		bytecodeHealReqFails = make(chan *bytecodeHealRequest)
   626  		trienodeHealResps    = make(chan *trienodeHealResponse)
   627  		bytecodeHealResps    = make(chan *bytecodeHealResponse)
   628  	)
   629  	for {
   630  		// Remove all completed tasks and terminate sync if everything's done
   631  		s.cleanStorageTasks()
   632  		s.cleanAccountTasks()
   633  		if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
   634  			return nil
   635  		}
   636  		// Assign all the data retrieval tasks to any free peers
   637  		s.assignAccountTasks(accountResps, accountReqFails, cancel)
   638  		s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel)
   639  		s.assignStorageTasks(storageResps, storageReqFails, cancel)
   640  
   641  		if len(s.tasks) == 0 {
   642  			// Sync phase done, run heal phase
   643  			s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
   644  			s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
   645  		}
   646  		// Update sync progress
   647  		s.lock.Lock()
   648  		s.extProgress = &SyncProgress{
   649  			AccountSynced:      s.accountSynced,
   650  			AccountBytes:       s.accountBytes,
   651  			BytecodeSynced:     s.bytecodeSynced,
   652  			BytecodeBytes:      s.bytecodeBytes,
   653  			StorageSynced:      s.storageSynced,
   654  			StorageBytes:       s.storageBytes,
   655  			TrienodeHealSynced: s.trienodeHealSynced,
   656  			TrienodeHealBytes:  s.trienodeHealBytes,
   657  			BytecodeHealSynced: s.bytecodeHealSynced,
   658  			BytecodeHealBytes:  s.bytecodeHealBytes,
   659  		}
   660  		s.lock.Unlock()
   661  		// Wait for something to happen
   662  		select {
   663  		case <-s.update:
   664  			// Something happened (new peer, delivery, timeout), recheck tasks
   665  		case <-peerJoin:
   666  			// A new peer joined, try to schedule it new tasks
   667  		case id := <-peerDrop:
   668  			s.revertRequests(id)
   669  		case <-cancel:
   670  			return ErrCancelled
   671  
   672  		case req := <-accountReqFails:
   673  			s.revertAccountRequest(req)
   674  		case req := <-bytecodeReqFails:
   675  			s.revertBytecodeRequest(req)
   676  		case req := <-storageReqFails:
   677  			s.revertStorageRequest(req)
   678  		case req := <-trienodeHealReqFails:
   679  			s.revertTrienodeHealRequest(req)
   680  		case req := <-bytecodeHealReqFails:
   681  			s.revertBytecodeHealRequest(req)
   682  
   683  		case res := <-accountResps:
   684  			s.processAccountResponse(res)
   685  		case res := <-bytecodeResps:
   686  			s.processBytecodeResponse(res)
   687  		case res := <-storageResps:
   688  			s.processStorageResponse(res)
   689  		case res := <-trienodeHealResps:
   690  			s.processTrienodeHealResponse(res)
   691  		case res := <-bytecodeHealResps:
   692  			s.processBytecodeHealResponse(res)
   693  		}
   694  		// Report stats if something meaningful happened
   695  		s.report(false)
   696  	}
   697  }
   698  
   699  // loadSyncStatus retrieves a previously aborted sync status from the database,
   700  // or generates a fresh one if none is available.
   701  func (s *Syncer) loadSyncStatus() {
   702  	var progress SyncProgress
   703  
   704  	if status := s.db.ReadSnapshotSyncStatus(); status != nil {
   705  		if err := json.Unmarshal(status, &progress); err != nil {
   706  			logger.Error("Failed to decode snap sync status", "err", err)
   707  		} else {
   708  			for _, task := range progress.Tasks {
   709  				logger.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last)
   710  			}
   711  			s.tasks = progress.Tasks
   712  			for _, task := range s.tasks {
   713  				task.trieDb = statedb.NewDatabase(s.db)
   714  				task.genTrie, err = statedb.NewTrie(common.Hash{}, task.trieDb, nil)
   715  
   716  				for _, subtasks := range task.SubTasks {
   717  					for _, subtask := range subtasks {
   718  						subtask.trieDb = statedb.NewDatabase(s.db)
   719  						subtask.genTrie, _ = statedb.NewTrie(common.Hash{}, subtask.trieDb, nil)
   720  					}
   721  				}
   722  			}
   723  			s.lock.Lock()
   724  			defer s.lock.Unlock()
   725  
   726  			s.snapped = len(s.tasks) == 0
   727  
   728  			s.accountSynced = progress.AccountSynced
   729  			s.accountBytes = progress.AccountBytes
   730  			s.bytecodeSynced = progress.BytecodeSynced
   731  			s.bytecodeBytes = progress.BytecodeBytes
   732  			s.storageSynced = progress.StorageSynced
   733  			s.storageBytes = progress.StorageBytes
   734  
   735  			s.trienodeHealSynced = progress.TrienodeHealSynced
   736  			s.trienodeHealBytes = progress.TrienodeHealBytes
   737  			s.bytecodeHealSynced = progress.BytecodeHealSynced
   738  			s.bytecodeHealBytes = progress.BytecodeHealBytes
   739  			return
   740  		}
   741  	}
   742  	// Either we've failed to decode the previus state, or there was none.
   743  	// Start a fresh sync by chunking up the account range and scheduling
   744  	// them for retrieval.
   745  	s.tasks = nil
   746  	s.accountSynced, s.accountBytes = 0, 0
   747  	s.bytecodeSynced, s.bytecodeBytes = 0, 0
   748  	s.storageSynced, s.storageBytes = 0, 0
   749  	s.trienodeHealSynced, s.trienodeHealBytes = 0, 0
   750  	s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0
   751  
   752  	var next common.Hash
   753  	step := new(big.Int).Sub(
   754  		new(big.Int).Div(
   755  			new(big.Int).Exp(common.Big2, common.Big256, nil),
   756  			big.NewInt(int64(accountConcurrency)),
   757  		), common.Big1,
   758  	)
   759  	for i := 0; i < accountConcurrency; i++ {
   760  		last := common.BigToHash(new(big.Int).Add(next.Big(), step))
   761  		if i == accountConcurrency-1 {
   762  			// Make sure we don't overflow if the step is not a proper divisor
   763  			last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")
   764  		}
   765  		db := statedb.NewDatabase(s.db)
   766  		trie, _ := statedb.NewTrie(common.Hash{}, db, nil)
   767  		s.tasks = append(s.tasks, &accountTask{
   768  			Next:     next,
   769  			Last:     last,
   770  			SubTasks: make(map[common.Hash][]*storageTask),
   771  			genTrie:  trie,
   772  			trieDb:   db,
   773  		})
   774  		logger.Debug("Created account sync task", "from", next, "last", last)
   775  		next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1))
   776  	}
   777  }
   778  
   779  // saveSyncStatus marshals the remaining sync tasks into leveldb.
   780  func (s *Syncer) saveSyncStatus() {
   781  	// Serialize any partial progress to disk before spinning down
   782  	for _, task := range s.tasks {
   783  		if err := task.trieDb.Commit(task.genTrie.Hash(), false, 0); err != nil {
   784  			logger.Error("Failed to persist account slots", "err", err)
   785  		}
   786  		for _, subtasks := range task.SubTasks {
   787  			for _, subtask := range subtasks {
   788  				if err := subtask.trieDb.Commit(subtask.genTrie.Hash(), false, 0); err != nil {
   789  					logger.Error("Failed to persist storage slots", "err", err)
   790  				}
   791  			}
   792  		}
   793  	}
   794  	// Store the actual progress markers
   795  	progress := &SyncProgress{
   796  		Tasks:              s.tasks,
   797  		AccountSynced:      s.accountSynced,
   798  		AccountBytes:       s.accountBytes,
   799  		BytecodeSynced:     s.bytecodeSynced,
   800  		BytecodeBytes:      s.bytecodeBytes,
   801  		StorageSynced:      s.storageSynced,
   802  		StorageBytes:       s.storageBytes,
   803  		TrienodeHealSynced: s.trienodeHealSynced,
   804  		TrienodeHealBytes:  s.trienodeHealBytes,
   805  		BytecodeHealSynced: s.bytecodeHealSynced,
   806  		BytecodeHealBytes:  s.bytecodeHealBytes,
   807  	}
   808  	status, err := json.Marshal(progress)
   809  	if err != nil {
   810  		panic(err) // This can only fail during implementation
   811  	}
   812  	s.db.WriteSnapshotSyncStatus(status)
   813  }
   814  
   815  // Progress returns the snap sync status statistics.
   816  func (s *Syncer) Progress() (*SyncProgress, *SyncPending) {
   817  	s.lock.Lock()
   818  	defer s.lock.Unlock()
   819  	pending := new(SyncPending)
   820  	if s.healer != nil {
   821  		pending.TrienodeHeal = uint64(len(s.healer.trieTasks))
   822  		pending.BytecodeHeal = uint64(len(s.healer.codeTasks))
   823  	}
   824  	return s.extProgress, pending
   825  }
   826  
   827  // cleanAccountTasks removes account range retrieval tasks that have already been
   828  // completed.
   829  func (s *Syncer) cleanAccountTasks() {
   830  	// If the sync was already done before, don't even bother
   831  	if len(s.tasks) == 0 {
   832  		return
   833  	}
   834  	// Sync wasn't finished previously, check for any task that can be finalized
   835  	for i := 0; i < len(s.tasks); i++ {
   836  		if s.tasks[i].done {
   837  			s.tasks = append(s.tasks[:i], s.tasks[i+1:]...)
   838  			i--
   839  		}
   840  	}
   841  	// If everything was just finalized just, generate the account trie and start heal
   842  	if len(s.tasks) == 0 {
   843  		s.lock.Lock()
   844  		s.snapped = true
   845  		s.lock.Unlock()
   846  
   847  		// Push the final sync report
   848  		s.reportSyncProgress(true)
   849  	}
   850  }
   851  
   852  // cleanStorageTasks iterates over all the account tasks and storage sub-tasks
   853  // within, cleaning any that have been completed.
   854  func (s *Syncer) cleanStorageTasks() {
   855  	for _, task := range s.tasks {
   856  		for account, subtasks := range task.SubTasks {
   857  			// Remove storage range retrieval tasks that completed
   858  			for j := 0; j < len(subtasks); j++ {
   859  				if subtasks[j].done {
   860  					subtasks = append(subtasks[:j], subtasks[j+1:]...)
   861  					j--
   862  				}
   863  			}
   864  			if len(subtasks) > 0 {
   865  				task.SubTasks[account] = subtasks
   866  				continue
   867  			}
   868  			// If all storage chunks are done, mark the account as done too
   869  			for j, hash := range task.res.hashes {
   870  				if hash == account {
   871  					task.needState[j] = false
   872  				}
   873  			}
   874  			delete(task.SubTasks, account)
   875  			task.pend--
   876  
   877  			// If this was the last pending task, forward the account task
   878  			if task.pend == 0 {
   879  				s.forwardAccountTask(task)
   880  			}
   881  		}
   882  	}
   883  }
   884  
   885  // assignAccountTasks attempts to match idle peers to pending account range
   886  // retrievals.
   887  func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) {
   888  	s.lock.Lock()
   889  	defer s.lock.Unlock()
   890  
   891  	// Sort the peers by download capacity to use faster ones if many available
   892  	idlers := &capacitySort{
   893  		ids:  make([]string, 0, len(s.accountIdlers)),
   894  		caps: make([]int, 0, len(s.accountIdlers)),
   895  	}
   896  	targetTTL := s.rates.TargetTimeout()
   897  	for id := range s.accountIdlers {
   898  		if _, ok := s.statelessPeers[id]; ok {
   899  			continue
   900  		}
   901  		idlers.ids = append(idlers.ids, id)
   902  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL))
   903  	}
   904  	if len(idlers.ids) == 0 {
   905  		// TODO-Klaytn-SnapSync enhance logging if necessary
   906  		return
   907  	}
   908  	sort.Sort(sort.Reverse(idlers))
   909  
   910  	// Iterate over all the tasks and try to find a pending one
   911  	for _, task := range s.tasks {
   912  		// Skip any tasks already filling
   913  		if task.req != nil || task.res != nil {
   914  			continue
   915  		}
   916  		// Task pending retrieval, try to find an idle peer. If no such peer
   917  		// exists, we probably assigned tasks for all (or they are stateless).
   918  		// Abort the entire assignment mechanism.
   919  		if len(idlers.ids) == 0 {
   920  			return
   921  		}
   922  		var (
   923  			idle = idlers.ids[0]
   924  			peer = s.peers[idle]
   925  			cap  = idlers.caps[0]
   926  		)
   927  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
   928  
   929  		// Matched a pending task to an idle peer, allocate a unique request id
   930  		var reqid uint64
   931  		for {
   932  			reqid = uint64(rand.Int63())
   933  			if reqid == 0 {
   934  				continue
   935  			}
   936  			if _, ok := s.accountReqs[reqid]; ok {
   937  				continue
   938  			}
   939  			break
   940  		}
   941  		// Generate the network query and send it to the peer
   942  		req := &accountRequest{
   943  			peer:    idle,
   944  			id:      reqid,
   945  			time:    time.Now(),
   946  			deliver: success,
   947  			revert:  fail,
   948  			cancel:  cancel,
   949  			stale:   make(chan struct{}),
   950  			origin:  task.Next,
   951  			limit:   task.Last,
   952  			task:    task,
   953  		}
   954  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
   955  			peer.Log().Debug("Account range request timed out", "reqid", reqid)
   956  			s.rates.Update(idle, AccountRangeMsg, 0, 0)
   957  			s.scheduleRevertAccountRequest(req)
   958  		})
   959  		s.accountReqs[reqid] = req
   960  		delete(s.accountIdlers, idle)
   961  
   962  		s.pend.Add(1)
   963  		go func(root common.Hash) {
   964  			defer s.pend.Done()
   965  
   966  			// Attempt to send the remote request and revert if it fails
   967  			if cap > maxRequestSize {
   968  				cap = maxRequestSize
   969  			}
   970  			if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
   971  				cap = minRequestSize
   972  			}
   973  			if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil {
   974  				peer.Log().Debug("Failed to request account range", "err", err)
   975  				s.scheduleRevertAccountRequest(req)
   976  			}
   977  		}(s.root)
   978  
   979  		// Inject the request into the task to block further assignments
   980  		task.req = req
   981  	}
   982  }
   983  
   984  // assignBytecodeTasks attempts to match idle peers to pending code retrievals.
   985  func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) {
   986  	s.lock.Lock()
   987  	defer s.lock.Unlock()
   988  
   989  	// Sort the peers by download capacity to use faster ones if many available
   990  	idlers := &capacitySort{
   991  		ids:  make([]string, 0, len(s.bytecodeIdlers)),
   992  		caps: make([]int, 0, len(s.bytecodeIdlers)),
   993  	}
   994  	targetTTL := s.rates.TargetTimeout()
   995  	for id := range s.bytecodeIdlers {
   996  		if _, ok := s.statelessPeers[id]; ok {
   997  			continue
   998  		}
   999  		idlers.ids = append(idlers.ids, id)
  1000  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1001  	}
  1002  	if len(idlers.ids) == 0 {
  1003  		return
  1004  	}
  1005  	sort.Sort(sort.Reverse(idlers))
  1006  
  1007  	// Iterate over all the tasks and try to find a pending one
  1008  	for _, task := range s.tasks {
  1009  		// Skip any tasks not in the bytecode retrieval phase
  1010  		if task.res == nil {
  1011  			continue
  1012  		}
  1013  		// Skip tasks that are already retrieving (or done with) all codes
  1014  		if len(task.codeTasks) == 0 {
  1015  			continue
  1016  		}
  1017  		// Task pending retrieval, try to find an idle peer. If no such peer
  1018  		// exists, we probably assigned tasks for all (or they are stateless).
  1019  		// Abort the entire assignment mechanism.
  1020  		if len(idlers.ids) == 0 {
  1021  			return
  1022  		}
  1023  		var (
  1024  			idle = idlers.ids[0]
  1025  			peer = s.peers[idle]
  1026  			cap  = idlers.caps[0]
  1027  		)
  1028  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1029  
  1030  		// Matched a pending task to an idle peer, allocate a unique request id
  1031  		var reqid uint64
  1032  		for {
  1033  			reqid = uint64(rand.Int63())
  1034  			if reqid == 0 {
  1035  				continue
  1036  			}
  1037  			if _, ok := s.bytecodeReqs[reqid]; ok {
  1038  				continue
  1039  			}
  1040  			break
  1041  		}
  1042  		// Generate the network query and send it to the peer
  1043  		if cap > maxCodeRequestCount {
  1044  			cap = maxCodeRequestCount
  1045  		}
  1046  		hashes := make([]common.Hash, 0, cap)
  1047  		for hash := range task.codeTasks {
  1048  			delete(task.codeTasks, hash)
  1049  			hashes = append(hashes, hash)
  1050  			if len(hashes) >= cap {
  1051  				break
  1052  			}
  1053  		}
  1054  		req := &bytecodeRequest{
  1055  			peer:    idle,
  1056  			id:      reqid,
  1057  			time:    time.Now(),
  1058  			deliver: success,
  1059  			revert:  fail,
  1060  			cancel:  cancel,
  1061  			stale:   make(chan struct{}),
  1062  			hashes:  hashes,
  1063  			task:    task,
  1064  		}
  1065  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1066  			peer.Log().Debug("Bytecode request timed out", "reqid", reqid)
  1067  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1068  			s.scheduleRevertBytecodeRequest(req)
  1069  		})
  1070  		s.bytecodeReqs[reqid] = req
  1071  		delete(s.bytecodeIdlers, idle)
  1072  
  1073  		s.pend.Add(1)
  1074  		go func() {
  1075  			defer s.pend.Done()
  1076  
  1077  			// Attempt to send the remote request and revert if it fails
  1078  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1079  				logger.Debug("Failed to request bytecodes", "err", err)
  1080  				s.scheduleRevertBytecodeRequest(req)
  1081  			}
  1082  		}()
  1083  	}
  1084  }
  1085  
  1086  // assignStorageTasks attempts to match idle peers to pending storage range
  1087  // retrievals.
  1088  func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) {
  1089  	s.lock.Lock()
  1090  	defer s.lock.Unlock()
  1091  
  1092  	// Sort the peers by download capacity to use faster ones if many available
  1093  	idlers := &capacitySort{
  1094  		ids:  make([]string, 0, len(s.storageIdlers)),
  1095  		caps: make([]int, 0, len(s.storageIdlers)),
  1096  	}
  1097  	targetTTL := s.rates.TargetTimeout()
  1098  	for id := range s.storageIdlers {
  1099  		if _, ok := s.statelessPeers[id]; ok {
  1100  			continue
  1101  		}
  1102  		idlers.ids = append(idlers.ids, id)
  1103  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL))
  1104  	}
  1105  	if len(idlers.ids) == 0 {
  1106  		return
  1107  	}
  1108  	sort.Sort(sort.Reverse(idlers))
  1109  
  1110  	// Iterate over all the tasks and try to find a pending one
  1111  	for _, task := range s.tasks {
  1112  		// Skip any tasks not in the storage retrieval phase
  1113  		if task.res == nil {
  1114  			continue
  1115  		}
  1116  		// Skip tasks that are already retrieving (or done with) all small states
  1117  		if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 {
  1118  			continue
  1119  		}
  1120  		// Task pending retrieval, try to find an idle peer. If no such peer
  1121  		// exists, we probably assigned tasks for all (or they are stateless).
  1122  		// Abort the entire assignment mechanism.
  1123  		if len(idlers.ids) == 0 {
  1124  			return
  1125  		}
  1126  		var (
  1127  			idle = idlers.ids[0]
  1128  			peer = s.peers[idle]
  1129  			cap  = idlers.caps[0]
  1130  		)
  1131  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1132  
  1133  		// Matched a pending task to an idle peer, allocate a unique request id
  1134  		var reqid uint64
  1135  		for {
  1136  			reqid = uint64(rand.Int63())
  1137  			if reqid == 0 {
  1138  				continue
  1139  			}
  1140  			if _, ok := s.storageReqs[reqid]; ok {
  1141  				continue
  1142  			}
  1143  			break
  1144  		}
  1145  		// Generate the network query and send it to the peer. If there are
  1146  		// large contract tasks pending, complete those before diving into
  1147  		// even more new contracts.
  1148  		if cap > maxRequestSize {
  1149  			cap = maxRequestSize
  1150  		}
  1151  		if cap < minRequestSize { // Don't bother with peers below a bare minimum performance
  1152  			cap = minRequestSize
  1153  		}
  1154  		storageSets := cap / 1024
  1155  
  1156  		var (
  1157  			accounts = make([]common.Hash, 0, storageSets)
  1158  			roots    = make([]common.Hash, 0, storageSets)
  1159  			subtask  *storageTask
  1160  		)
  1161  		for account, subtasks := range task.SubTasks {
  1162  			for _, st := range subtasks {
  1163  				// Skip any subtasks already filling
  1164  				if st.req != nil {
  1165  					continue
  1166  				}
  1167  				// Found an incomplete storage chunk, schedule it
  1168  				accounts = append(accounts, account)
  1169  				roots = append(roots, st.root)
  1170  				subtask = st
  1171  				break // Large contract chunks are downloaded individually
  1172  			}
  1173  			if subtask != nil {
  1174  				break // Large contract chunks are downloaded individually
  1175  			}
  1176  		}
  1177  		if subtask == nil {
  1178  			// No large contract required retrieval, but small ones available
  1179  			for acccount, root := range task.stateTasks {
  1180  				delete(task.stateTasks, acccount)
  1181  
  1182  				accounts = append(accounts, acccount)
  1183  				roots = append(roots, root)
  1184  
  1185  				if len(accounts) >= storageSets {
  1186  					break
  1187  				}
  1188  			}
  1189  		}
  1190  		// If nothing was found, it means this task is actually already fully
  1191  		// retrieving, but large contracts are hard to detect. Skip to the next.
  1192  		if len(accounts) == 0 {
  1193  			continue
  1194  		}
  1195  		req := &storageRequest{
  1196  			peer:     idle,
  1197  			id:       reqid,
  1198  			time:     time.Now(),
  1199  			deliver:  success,
  1200  			revert:   fail,
  1201  			cancel:   cancel,
  1202  			stale:    make(chan struct{}),
  1203  			accounts: accounts,
  1204  			roots:    roots,
  1205  			mainTask: task,
  1206  			subTask:  subtask,
  1207  		}
  1208  		if subtask != nil {
  1209  			req.origin = subtask.Next
  1210  			req.limit = subtask.Last
  1211  		}
  1212  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1213  			peer.Log().Debug("Storage request timed out", "reqid", reqid)
  1214  			s.rates.Update(idle, StorageRangesMsg, 0, 0)
  1215  			s.scheduleRevertStorageRequest(req)
  1216  		})
  1217  		s.storageReqs[reqid] = req
  1218  		delete(s.storageIdlers, idle)
  1219  
  1220  		s.pend.Add(1)
  1221  		go func(root common.Hash) {
  1222  			defer s.pend.Done()
  1223  
  1224  			// Attempt to send the remote request and revert if it fails
  1225  			var origin, limit []byte
  1226  			if subtask != nil {
  1227  				origin, limit = req.origin[:], req.limit[:]
  1228  			}
  1229  			if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil {
  1230  				logger.Debug("Failed to request storage", "err", err)
  1231  				s.scheduleRevertStorageRequest(req)
  1232  			}
  1233  		}(s.root)
  1234  
  1235  		// Inject the request into the subtask to block further assignments
  1236  		if subtask != nil {
  1237  			subtask.req = req
  1238  		}
  1239  	}
  1240  }
  1241  
  1242  // assignTrienodeHealTasks attempts to match idle peers to trie node requests to
  1243  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1244  func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) {
  1245  	s.lock.Lock()
  1246  	defer s.lock.Unlock()
  1247  
  1248  	// Sort the peers by download capacity to use faster ones if many available
  1249  	idlers := &capacitySort{
  1250  		ids:  make([]string, 0, len(s.trienodeHealIdlers)),
  1251  		caps: make([]int, 0, len(s.trienodeHealIdlers)),
  1252  	}
  1253  	targetTTL := s.rates.TargetTimeout()
  1254  	for id := range s.trienodeHealIdlers {
  1255  		if _, ok := s.statelessPeers[id]; ok {
  1256  			continue
  1257  		}
  1258  		idlers.ids = append(idlers.ids, id)
  1259  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL))
  1260  	}
  1261  	if len(idlers.ids) == 0 {
  1262  		return
  1263  	}
  1264  	sort.Sort(sort.Reverse(idlers))
  1265  
  1266  	// Iterate over pending tasks and try to find a peer to retrieve with
  1267  	for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1268  		// If there are not enough trie tasks queued to fully assign, fill the
  1269  		// queue from the state sync scheduler. The trie synced schedules these
  1270  		// together with bytecodes, so we need to queue them combined.
  1271  		var (
  1272  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1273  			want = maxTrieRequestCount + maxCodeRequestCount
  1274  		)
  1275  		if have < want {
  1276  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1277  			for i, hash := range nodes {
  1278  				s.healer.trieTasks[hash] = paths[i]
  1279  			}
  1280  			for _, hash := range codes {
  1281  				s.healer.codeTasks[hash] = struct{}{}
  1282  			}
  1283  		}
  1284  		// If all the heal tasks are bytecodes or already downloading, bail
  1285  		if len(s.healer.trieTasks) == 0 {
  1286  			return
  1287  		}
  1288  		// Task pending retrieval, try to find an idle peer. If no such peer
  1289  		// exists, we probably assigned tasks for all (or they are stateless).
  1290  		// Abort the entire assignment mechanism.
  1291  		if len(idlers.ids) == 0 {
  1292  			return
  1293  		}
  1294  		var (
  1295  			idle = idlers.ids[0]
  1296  			peer = s.peers[idle]
  1297  			cap  = idlers.caps[0]
  1298  		)
  1299  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1300  
  1301  		// Matched a pending task to an idle peer, allocate a unique request id
  1302  		var reqid uint64
  1303  		for {
  1304  			reqid = uint64(rand.Int63())
  1305  			if reqid == 0 {
  1306  				continue
  1307  			}
  1308  			if _, ok := s.trienodeHealReqs[reqid]; ok {
  1309  				continue
  1310  			}
  1311  			break
  1312  		}
  1313  		// Generate the network query and send it to the peer
  1314  		if cap > maxTrieRequestCount {
  1315  			cap = maxTrieRequestCount
  1316  		}
  1317  		var (
  1318  			hashes   = make([]common.Hash, 0, cap)
  1319  			paths    = make([]statedb.SyncPath, 0, cap)
  1320  			pathsets = make([]TrieNodePathSet, 0, cap)
  1321  		)
  1322  		for hash, pathset := range s.healer.trieTasks {
  1323  			delete(s.healer.trieTasks, hash)
  1324  
  1325  			hashes = append(hashes, hash)
  1326  			paths = append(paths, pathset)
  1327  			pathsets = append(pathsets, [][]byte(pathset)) // TODO-Klaytn-SnapSync group requests by account hash
  1328  
  1329  			if len(hashes) >= cap {
  1330  				break
  1331  			}
  1332  		}
  1333  		req := &trienodeHealRequest{
  1334  			peer:    idle,
  1335  			id:      reqid,
  1336  			time:    time.Now(),
  1337  			deliver: success,
  1338  			revert:  fail,
  1339  			cancel:  cancel,
  1340  			stale:   make(chan struct{}),
  1341  			hashes:  hashes,
  1342  			paths:   paths,
  1343  			task:    s.healer,
  1344  		}
  1345  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1346  			peer.Log().Debug("Trienode heal request timed out", "reqid", reqid)
  1347  			s.rates.Update(idle, TrieNodesMsg, 0, 0)
  1348  			s.scheduleRevertTrienodeHealRequest(req)
  1349  		})
  1350  		s.trienodeHealReqs[reqid] = req
  1351  		delete(s.trienodeHealIdlers, idle)
  1352  
  1353  		s.pend.Add(1)
  1354  		go func(root common.Hash) {
  1355  			defer s.pend.Done()
  1356  
  1357  			// Attempt to send the remote request and revert if it fails
  1358  			if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil {
  1359  				logger.Debug("Failed to request trienode healers", "err", err)
  1360  				s.scheduleRevertTrienodeHealRequest(req)
  1361  			}
  1362  		}(s.root)
  1363  	}
  1364  }
  1365  
  1366  // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to
  1367  // heal any trie errors caused by the snap sync's chunked retrieval model.
  1368  func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) {
  1369  	s.lock.Lock()
  1370  	defer s.lock.Unlock()
  1371  
  1372  	// Sort the peers by download capacity to use faster ones if many available
  1373  	idlers := &capacitySort{
  1374  		ids:  make([]string, 0, len(s.bytecodeHealIdlers)),
  1375  		caps: make([]int, 0, len(s.bytecodeHealIdlers)),
  1376  	}
  1377  	targetTTL := s.rates.TargetTimeout()
  1378  	for id := range s.bytecodeHealIdlers {
  1379  		if _, ok := s.statelessPeers[id]; ok {
  1380  			continue
  1381  		}
  1382  		idlers.ids = append(idlers.ids, id)
  1383  		idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL))
  1384  	}
  1385  	if len(idlers.ids) == 0 {
  1386  		return
  1387  	}
  1388  	sort.Sort(sort.Reverse(idlers))
  1389  
  1390  	// Iterate over pending tasks and try to find a peer to retrieve with
  1391  	for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 {
  1392  		// If there are not enough trie tasks queued to fully assign, fill the
  1393  		// queue from the state sync scheduler. The trie synced schedules these
  1394  		// together with trie nodes, so we need to queue them combined.
  1395  		var (
  1396  			have = len(s.healer.trieTasks) + len(s.healer.codeTasks)
  1397  			want = maxTrieRequestCount + maxCodeRequestCount
  1398  		)
  1399  		if have < want {
  1400  			nodes, paths, codes := s.healer.scheduler.Missing(want - have)
  1401  			for i, hash := range nodes {
  1402  				s.healer.trieTasks[hash] = paths[i]
  1403  			}
  1404  			for _, hash := range codes {
  1405  				s.healer.codeTasks[hash] = struct{}{}
  1406  			}
  1407  		}
  1408  		// If all the heal tasks are trienodes or already downloading, bail
  1409  		if len(s.healer.codeTasks) == 0 {
  1410  			return
  1411  		}
  1412  		// Task pending retrieval, try to find an idle peer. If no such peer
  1413  		// exists, we probably assigned tasks for all (or they are stateless).
  1414  		// Abort the entire assignment mechanism.
  1415  		if len(idlers.ids) == 0 {
  1416  			return
  1417  		}
  1418  		var (
  1419  			idle = idlers.ids[0]
  1420  			peer = s.peers[idle]
  1421  			cap  = idlers.caps[0]
  1422  		)
  1423  		idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:]
  1424  
  1425  		// Matched a pending task to an idle peer, allocate a unique request id
  1426  		var reqid uint64
  1427  		for {
  1428  			reqid = uint64(rand.Int63())
  1429  			if reqid == 0 {
  1430  				continue
  1431  			}
  1432  			if _, ok := s.bytecodeHealReqs[reqid]; ok {
  1433  				continue
  1434  			}
  1435  			break
  1436  		}
  1437  		// Generate the network query and send it to the peer
  1438  		if cap > maxCodeRequestCount {
  1439  			cap = maxCodeRequestCount
  1440  		}
  1441  		hashes := make([]common.Hash, 0, cap)
  1442  		for hash := range s.healer.codeTasks {
  1443  			delete(s.healer.codeTasks, hash)
  1444  
  1445  			hashes = append(hashes, hash)
  1446  			if len(hashes) >= cap {
  1447  				break
  1448  			}
  1449  		}
  1450  		req := &bytecodeHealRequest{
  1451  			peer:    idle,
  1452  			id:      reqid,
  1453  			time:    time.Now(),
  1454  			deliver: success,
  1455  			revert:  fail,
  1456  			cancel:  cancel,
  1457  			stale:   make(chan struct{}),
  1458  			hashes:  hashes,
  1459  			task:    s.healer,
  1460  		}
  1461  		req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() {
  1462  			peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid)
  1463  			s.rates.Update(idle, ByteCodesMsg, 0, 0)
  1464  			s.scheduleRevertBytecodeHealRequest(req)
  1465  		})
  1466  		s.bytecodeHealReqs[reqid] = req
  1467  		delete(s.bytecodeHealIdlers, idle)
  1468  
  1469  		s.pend.Add(1)
  1470  		go func() {
  1471  			defer s.pend.Done()
  1472  
  1473  			// Attempt to send the remote request and revert if it fails
  1474  			if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil {
  1475  				logger.Debug("Failed to request bytecode healers", "err", err)
  1476  				s.scheduleRevertBytecodeHealRequest(req)
  1477  			}
  1478  		}()
  1479  	}
  1480  }
  1481  
  1482  // revertRequests locates all the currently pending reuqests from a particular
  1483  // peer and reverts them, rescheduling for others to fulfill.
  1484  func (s *Syncer) revertRequests(peer string) {
  1485  	// Gather the requests first, revertals need the lock too
  1486  	s.lock.Lock()
  1487  	var accountReqs []*accountRequest
  1488  	for _, req := range s.accountReqs {
  1489  		if req.peer == peer {
  1490  			accountReqs = append(accountReqs, req)
  1491  		}
  1492  	}
  1493  	var bytecodeReqs []*bytecodeRequest
  1494  	for _, req := range s.bytecodeReqs {
  1495  		if req.peer == peer {
  1496  			bytecodeReqs = append(bytecodeReqs, req)
  1497  		}
  1498  	}
  1499  	var storageReqs []*storageRequest
  1500  	for _, req := range s.storageReqs {
  1501  		if req.peer == peer {
  1502  			storageReqs = append(storageReqs, req)
  1503  		}
  1504  	}
  1505  	var trienodeHealReqs []*trienodeHealRequest
  1506  	for _, req := range s.trienodeHealReqs {
  1507  		if req.peer == peer {
  1508  			trienodeHealReqs = append(trienodeHealReqs, req)
  1509  		}
  1510  	}
  1511  	var bytecodeHealReqs []*bytecodeHealRequest
  1512  	for _, req := range s.bytecodeHealReqs {
  1513  		if req.peer == peer {
  1514  			bytecodeHealReqs = append(bytecodeHealReqs, req)
  1515  		}
  1516  	}
  1517  	s.lock.Unlock()
  1518  
  1519  	// Revert all the requests matching the peer
  1520  	for _, req := range accountReqs {
  1521  		s.revertAccountRequest(req)
  1522  	}
  1523  	for _, req := range bytecodeReqs {
  1524  		s.revertBytecodeRequest(req)
  1525  	}
  1526  	for _, req := range storageReqs {
  1527  		s.revertStorageRequest(req)
  1528  	}
  1529  	for _, req := range trienodeHealReqs {
  1530  		s.revertTrienodeHealRequest(req)
  1531  	}
  1532  	for _, req := range bytecodeHealReqs {
  1533  		s.revertBytecodeHealRequest(req)
  1534  	}
  1535  }
  1536  
  1537  // scheduleRevertAccountRequest asks the event loop to clean up an account range
  1538  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1539  func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) {
  1540  	select {
  1541  	case req.revert <- req:
  1542  		// Sync event loop notified
  1543  	case <-req.cancel:
  1544  		// Sync cycle got cancelled
  1545  	case <-req.stale:
  1546  		// Request already reverted
  1547  	}
  1548  }
  1549  
  1550  // revertAccountRequest cleans up an account range request and returns all failed
  1551  // retrieval tasks to the scheduler for reassignment.
  1552  //
  1553  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1554  // On peer threads, use scheduleRevertAccountRequest.
  1555  func (s *Syncer) revertAccountRequest(req *accountRequest) {
  1556  	logger.Debug("Reverting account request", "peer", req.peer, "reqid", req.id)
  1557  	select {
  1558  	case <-req.stale:
  1559  		logger.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id)
  1560  		return
  1561  	default:
  1562  	}
  1563  	close(req.stale)
  1564  
  1565  	// Remove the request from the tracked set
  1566  	s.lock.Lock()
  1567  	delete(s.accountReqs, req.id)
  1568  	s.lock.Unlock()
  1569  
  1570  	// If there's a timeout timer still running, abort it and mark the account
  1571  	// task as not-pending, ready for resheduling
  1572  	req.timeout.Stop()
  1573  	if req.task.req == req {
  1574  		req.task.req = nil
  1575  	}
  1576  }
  1577  
  1578  // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request
  1579  // and return all failed retrieval tasks to the scheduler for reassignment.
  1580  func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) {
  1581  	select {
  1582  	case req.revert <- req:
  1583  		// Sync event loop notified
  1584  	case <-req.cancel:
  1585  		// Sync cycle got cancelled
  1586  	case <-req.stale:
  1587  		// Request already reverted
  1588  	}
  1589  }
  1590  
  1591  // revertBytecodeRequest cleans up a bytecode request and returns all failed
  1592  // retrieval tasks to the scheduler for reassignment.
  1593  //
  1594  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1595  // On peer threads, use scheduleRevertBytecodeRequest.
  1596  func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) {
  1597  	logger.Debug("Reverting bytecode request", "peer", req.peer)
  1598  	select {
  1599  	case <-req.stale:
  1600  		logger.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id)
  1601  		return
  1602  	default:
  1603  	}
  1604  	close(req.stale)
  1605  
  1606  	// Remove the request from the tracked set
  1607  	s.lock.Lock()
  1608  	delete(s.bytecodeReqs, req.id)
  1609  	s.lock.Unlock()
  1610  
  1611  	// If there's a timeout timer still running, abort it and mark the code
  1612  	// retrievals as not-pending, ready for resheduling
  1613  	req.timeout.Stop()
  1614  	for _, hash := range req.hashes {
  1615  		req.task.codeTasks[hash] = struct{}{}
  1616  	}
  1617  }
  1618  
  1619  // scheduleRevertStorageRequest asks the event loop to clean up a storage range
  1620  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1621  func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) {
  1622  	select {
  1623  	case req.revert <- req:
  1624  		// Sync event loop notified
  1625  	case <-req.cancel:
  1626  		// Sync cycle got cancelled
  1627  	case <-req.stale:
  1628  		// Request already reverted
  1629  	}
  1630  }
  1631  
  1632  // revertStorageRequest cleans up a storage range request and returns all failed
  1633  // retrieval tasks to the scheduler for reassignment.
  1634  //
  1635  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1636  // On peer threads, use scheduleRevertStorageRequest.
  1637  func (s *Syncer) revertStorageRequest(req *storageRequest) {
  1638  	logger.Debug("Reverting storage request", "peer", req.peer)
  1639  	select {
  1640  	case <-req.stale:
  1641  		logger.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id)
  1642  		return
  1643  	default:
  1644  	}
  1645  	close(req.stale)
  1646  
  1647  	// Remove the request from the tracked set
  1648  	s.lock.Lock()
  1649  	delete(s.storageReqs, req.id)
  1650  	s.lock.Unlock()
  1651  
  1652  	// If there's a timeout timer still running, abort it and mark the storage
  1653  	// task as not-pending, ready for resheduling
  1654  	req.timeout.Stop()
  1655  	if req.subTask != nil {
  1656  		req.subTask.req = nil
  1657  	} else {
  1658  		for i, account := range req.accounts {
  1659  			req.mainTask.stateTasks[account] = req.roots[i]
  1660  		}
  1661  	}
  1662  }
  1663  
  1664  // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal
  1665  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1666  func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) {
  1667  	select {
  1668  	case req.revert <- req:
  1669  		// Sync event loop notified
  1670  	case <-req.cancel:
  1671  		// Sync cycle got cancelled
  1672  	case <-req.stale:
  1673  		// Request already reverted
  1674  	}
  1675  }
  1676  
  1677  // revertTrienodeHealRequest cleans up a trienode heal request and returns all
  1678  // failed retrieval tasks to the scheduler for reassignment.
  1679  //
  1680  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1681  // On peer threads, use scheduleRevertTrienodeHealRequest.
  1682  func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) {
  1683  	logger.Debug("Reverting trienode heal request", "peer", req.peer)
  1684  	select {
  1685  	case <-req.stale:
  1686  		logger.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1687  		return
  1688  	default:
  1689  	}
  1690  	close(req.stale)
  1691  
  1692  	// Remove the request from the tracked set
  1693  	s.lock.Lock()
  1694  	delete(s.trienodeHealReqs, req.id)
  1695  	s.lock.Unlock()
  1696  
  1697  	// If there's a timeout timer still running, abort it and mark the trie node
  1698  	// retrievals as not-pending, ready for resheduling
  1699  	req.timeout.Stop()
  1700  	for i, hash := range req.hashes {
  1701  		req.task.trieTasks[hash] = req.paths[i]
  1702  	}
  1703  }
  1704  
  1705  // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal
  1706  // request and return all failed retrieval tasks to the scheduler for reassignment.
  1707  func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) {
  1708  	select {
  1709  	case req.revert <- req:
  1710  		// Sync event loop notified
  1711  	case <-req.cancel:
  1712  		// Sync cycle got cancelled
  1713  	case <-req.stale:
  1714  		// Request already reverted
  1715  	}
  1716  }
  1717  
  1718  // revertBytecodeHealRequest cleans up a bytecode heal request and returns all
  1719  // failed retrieval tasks to the scheduler for reassignment.
  1720  //
  1721  // Note, this needs to run on the event runloop thread to reschedule to idle peers.
  1722  // On peer threads, use scheduleRevertBytecodeHealRequest.
  1723  func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) {
  1724  	logger.Debug("Reverting bytecode heal request", "peer", req.peer)
  1725  	select {
  1726  	case <-req.stale:
  1727  		logger.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id)
  1728  		return
  1729  	default:
  1730  	}
  1731  	close(req.stale)
  1732  
  1733  	// Remove the request from the tracked set
  1734  	s.lock.Lock()
  1735  	delete(s.bytecodeHealReqs, req.id)
  1736  	s.lock.Unlock()
  1737  
  1738  	// If there's a timeout timer still running, abort it and mark the code
  1739  	// retrievals as not-pending, ready for resheduling
  1740  	req.timeout.Stop()
  1741  	for _, hash := range req.hashes {
  1742  		req.task.codeTasks[hash] = struct{}{}
  1743  	}
  1744  }
  1745  
  1746  // processAccountResponse integrates an already validated account range response
  1747  // into the account tasks.
  1748  func (s *Syncer) processAccountResponse(res *accountResponse) {
  1749  	// Switch the task from pending to filling
  1750  	res.task.req = nil
  1751  	res.task.res = res
  1752  
  1753  	// Ensure that the response doesn't overflow into the subsequent task
  1754  	last := res.task.Last.Big()
  1755  	for i, hash := range res.hashes {
  1756  		// Mark the range complete if the last is already included.
  1757  		// Keep iteration to delete the extra states if exists.
  1758  		cmp := hash.Big().Cmp(last)
  1759  		if cmp == 0 {
  1760  			res.cont = false
  1761  			continue
  1762  		}
  1763  		if cmp > 0 {
  1764  			// Chunk overflown, cut off excess
  1765  			res.hashes = res.hashes[:i]
  1766  			res.accounts = res.accounts[:i]
  1767  			res.cont = false // Mark range completed
  1768  			break
  1769  		}
  1770  	}
  1771  	// Iterate over all the accounts and assemble which ones need further sub-
  1772  	// filling before the entire account range can be persisted.
  1773  	res.task.needCode = make([]bool, len(res.accounts))
  1774  	res.task.needState = make([]bool, len(res.accounts))
  1775  	res.task.needHeal = make([]bool, len(res.accounts))
  1776  
  1777  	res.task.codeTasks = make(map[common.Hash]struct{})
  1778  	res.task.stateTasks = make(map[common.Hash]common.Hash)
  1779  
  1780  	resumed := make(map[common.Hash]struct{})
  1781  
  1782  	res.task.pend = 0
  1783  	for i, acc := range res.accounts {
  1784  		pacc := account.GetProgramAccount(acc)
  1785  		// Check if the account is a contract with an unknown code
  1786  		if pacc != nil && !bytes.Equal(pacc.GetCodeHash(), emptyCode[:]) {
  1787  			if !s.db.HasCodeWithPrefix(common.BytesToHash(pacc.GetCodeHash())) {
  1788  				res.task.codeTasks[common.BytesToHash(pacc.GetCodeHash())] = struct{}{}
  1789  				res.task.needCode[i] = true
  1790  				res.task.pend++
  1791  			}
  1792  		}
  1793  		// Check if the account is a contract with an unknown storage trie
  1794  		if pacc != nil && pacc.GetStorageRoot().Unextend() != emptyRoot {
  1795  			if ok, err := s.db.HasTrieNode(pacc.GetStorageRoot()); err != nil || !ok {
  1796  				// If there was a previous large state retrieval in progress,
  1797  				// don't restart it from scratch. This happens if a sync cycle
  1798  				// is interrupted and resumed later. However, *do* update the
  1799  				// previous root hash.
  1800  				if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok {
  1801  					logger.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", pacc.GetStorageRoot())
  1802  					for _, subtask := range subtasks {
  1803  						subtask.root = pacc.GetStorageRoot().Unextend()
  1804  					}
  1805  					res.task.needHeal[i] = true
  1806  					resumed[res.hashes[i]] = struct{}{}
  1807  				} else {
  1808  					res.task.stateTasks[res.hashes[i]] = pacc.GetStorageRoot().Unextend()
  1809  				}
  1810  				res.task.needState[i] = true
  1811  				res.task.pend++
  1812  			}
  1813  		}
  1814  	}
  1815  	// Delete any subtasks that have been aborted but not resumed. This may undo
  1816  	// some progress if a new peer gives us less accounts than an old one, but for
  1817  	// now we have to live with that.
  1818  	for hash := range res.task.SubTasks {
  1819  		if _, ok := resumed[hash]; !ok {
  1820  			logger.Debug("Aborting suspended storage retrieval", "account", hash)
  1821  			delete(res.task.SubTasks, hash)
  1822  		}
  1823  	}
  1824  	// If the account range contained no contracts, or all have been fully filled
  1825  	// beforehand, short circuit storage filling and forward to the next task
  1826  	if res.task.pend == 0 {
  1827  		s.forwardAccountTask(res.task)
  1828  		return
  1829  	}
  1830  	// Some accounts are incomplete, leave as is for the storage and contract
  1831  	// task assigners to pick up and fill.
  1832  }
  1833  
  1834  // processBytecodeResponse integrates an already validated bytecode response
  1835  // into the account tasks.
  1836  func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) {
  1837  	batch := s.db.NewBatch(database.StateTrieDB)
  1838  	defer batch.Release()
  1839  
  1840  	var codes uint64
  1841  	for i, hash := range res.hashes {
  1842  		code := res.codes[i]
  1843  
  1844  		// If the bytecode was not delivered, reschedule it
  1845  		if code == nil {
  1846  			res.task.codeTasks[hash] = struct{}{}
  1847  			continue
  1848  		}
  1849  		// Code was delivered, mark it not needed any more
  1850  		for j, acc := range res.task.res.accounts {
  1851  			pacc := account.GetProgramAccount(acc)
  1852  			if pacc != nil && res.task.needCode[j] && hash == common.BytesToHash(pacc.GetCodeHash()) {
  1853  				res.task.needCode[j] = false
  1854  				res.task.pend--
  1855  			}
  1856  		}
  1857  		// Push the bytecode into a database batch
  1858  		codes++
  1859  		s.db.PutCodeToBatch(batch, hash, code)
  1860  	}
  1861  	bytes := common.StorageSize(batch.ValueSize())
  1862  	if err := batch.Write(); err != nil {
  1863  		logger.Crit("Failed to persist bytecodes", "err", err)
  1864  	}
  1865  	s.bytecodeSynced += codes
  1866  	s.bytecodeBytes += bytes
  1867  
  1868  	logger.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes)
  1869  
  1870  	// If this delivery completed the last pending task, forward the account task
  1871  	// to the next chunk
  1872  	if res.task.pend == 0 {
  1873  		s.forwardAccountTask(res.task)
  1874  		return
  1875  	}
  1876  	// Some accounts are still incomplete, leave as is for the storage and contract
  1877  	// task assigners to pick up and fill.
  1878  }
  1879  
  1880  // processStorageResponse integrates an already validated storage response
  1881  // into the account tasks.
  1882  func (s *Syncer) processStorageResponse(res *storageResponse) {
  1883  	// Switch the subtask from pending to idle
  1884  	if res.subTask != nil {
  1885  		res.subTask.req = nil
  1886  	}
  1887  	batch := s.db.NewSnapshotDBBatch()
  1888  	defer batch.Release()
  1889  	var (
  1890  		slots           int
  1891  		oldStorageBytes = s.storageBytes
  1892  	)
  1893  	// Iterate over all the accounts and reconstruct their storage tries from the
  1894  	// delivered slots
  1895  	for i, accountHash := range res.accounts {
  1896  		// If the account was not delivered, reschedule it
  1897  		if i >= len(res.hashes) {
  1898  			res.mainTask.stateTasks[accountHash] = res.roots[i]
  1899  			continue
  1900  		}
  1901  		// State was delivered, if complete mark as not needed any more, otherwise
  1902  		// mark the account as needing healing
  1903  		for j, hash := range res.mainTask.res.hashes {
  1904  			if accountHash != hash {
  1905  				continue
  1906  			}
  1907  			pacc := account.GetProgramAccount(res.mainTask.res.accounts[j])
  1908  			if pacc == nil {
  1909  				continue
  1910  			}
  1911  
  1912  			// If the packet contains multiple contract storage slots, all
  1913  			// but the last are surely complete. The last contract may be
  1914  			// chunked, so check it's continuation flag.
  1915  			if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) {
  1916  				res.mainTask.needState[j] = false
  1917  				res.mainTask.pend--
  1918  			}
  1919  			// If the last contract was chunked, mark it as needing healing
  1920  			// to avoid writing it out to disk prematurely.
  1921  			if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont {
  1922  				res.mainTask.needHeal[j] = true
  1923  			}
  1924  			// If the last contract was chunked, we need to switch to large
  1925  			// contract handling mode
  1926  			if res.subTask == nil && i == len(res.hashes)-1 && res.cont {
  1927  				// If we haven't yet started a large-contract retrieval, create
  1928  				// the subtasks for it within the main account task
  1929  				if tasks, ok := res.mainTask.SubTasks[accountHash]; !ok {
  1930  					var (
  1931  						keys    = res.hashes[i]
  1932  						chunks  = uint64(storageConcurrency)
  1933  						lastKey common.Hash
  1934  					)
  1935  					if len(keys) > 0 {
  1936  						lastKey = keys[len(keys)-1]
  1937  					}
  1938  					// If the number of slots remaining is low, decrease the
  1939  					// number of chunks. Somewhere on the order of 10-15K slots
  1940  					// fit into a packet of 500KB. A key/slot pair is maximum 64
  1941  					// bytes, so pessimistically maxRequestSize/64 = 8K.
  1942  					//
  1943  					// Chunk so that at least 2 packets are needed to fill a task.
  1944  					if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil {
  1945  						if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks {
  1946  							chunks = n + 1
  1947  						}
  1948  						logger.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks)
  1949  					} else {
  1950  						logger.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks)
  1951  					}
  1952  					r := newHashRange(lastKey, chunks)
  1953  
  1954  					// Our first task is the one that was just filled by this response.
  1955  					db := statedb.NewDatabase(s.db)
  1956  					trie, _ := statedb.NewTrie(common.Hash{}, db, nil)
  1957  					tasks = append(tasks, &storageTask{
  1958  						Next:    common.Hash{},
  1959  						Last:    r.End(),
  1960  						root:    pacc.GetStorageRoot().Unextend(),
  1961  						genTrie: trie,
  1962  						trieDb:  db,
  1963  					})
  1964  					for r.Next() {
  1965  						db := statedb.NewDatabase(s.db)
  1966  						trie, _ := statedb.NewTrie(common.Hash{}, db, nil)
  1967  						tasks = append(tasks, &storageTask{
  1968  							Next:    r.Start(),
  1969  							Last:    r.End(),
  1970  							root:    pacc.GetStorageRoot().Unextend(),
  1971  							genTrie: trie,
  1972  							trieDb:  db,
  1973  						})
  1974  					}
  1975  					for _, task := range tasks {
  1976  						logger.Debug("Created storage sync task", "account", accountHash, "root", pacc.GetStorageRoot(), "from", task.Next, "last", task.Last)
  1977  					}
  1978  					res.mainTask.SubTasks[accountHash] = tasks
  1979  
  1980  					// Since we've just created the sub-tasks, this response
  1981  					// is surely for the first one (zero origin)
  1982  					res.subTask = tasks[0]
  1983  				}
  1984  			}
  1985  			// If we're in large contract delivery mode, forward the subtask
  1986  			if res.subTask != nil {
  1987  				// Ensure the response doesn't overflow into the subsequent task
  1988  				last := res.subTask.Last.Big()
  1989  				// Find the first overflowing key. While at it, mark res as complete
  1990  				// if we find the range to include or pass the 'last'
  1991  				index := sort.Search(len(res.hashes[i]), func(k int) bool {
  1992  					cmp := res.hashes[i][k].Big().Cmp(last)
  1993  					if cmp >= 0 {
  1994  						res.cont = false
  1995  					}
  1996  					return cmp > 0
  1997  				})
  1998  				if index >= 0 {
  1999  					// cut off excess
  2000  					res.hashes[i] = res.hashes[i][:index]
  2001  					res.slots[i] = res.slots[i][:index]
  2002  				}
  2003  				// Forward the relevant storage chunk (even if created just now)
  2004  				if res.cont {
  2005  					res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1])
  2006  				} else {
  2007  					res.subTask.done = true
  2008  				}
  2009  			}
  2010  		}
  2011  		// Iterate over all the complete contracts, reconstruct the trie nodes and
  2012  		// push them to disk. If the contract is chunked, the trie nodes will be
  2013  		// reconstructed later.
  2014  		slots += len(res.hashes[i])
  2015  
  2016  		if i < len(res.hashes)-1 || res.subTask == nil {
  2017  			db := statedb.NewDatabase(s.db)
  2018  			tr, _ := statedb.NewTrie(common.Hash{}, db, nil)
  2019  			for j := 0; j < len(res.hashes[i]); j++ {
  2020  				tr.Update(res.hashes[i][j][:], res.slots[i][j])
  2021  			}
  2022  			root, _ := tr.Commit(nil)
  2023  			_, nodeSize, _ := db.Size()
  2024  			if err := db.Commit(root, false, 0); err != nil {
  2025  				logger.Error("Failed to persist storage slots", "err", err)
  2026  			} else {
  2027  				s.storageBytes += nodeSize
  2028  			}
  2029  		}
  2030  		// Persist the received storage segements. These flat state maybe
  2031  		// outdated during the sync, but it can be fixed later during the
  2032  		// snapshot generation.
  2033  		for j := 0; j < len(res.hashes[i]); j++ {
  2034  			batch.WriteStorageSnapshot(accountHash, res.hashes[i][j], res.slots[i][j])
  2035  			s.storageBytes += common.StorageSize(len(database.StorageSnapshotKey(accountHash, res.hashes[i][j])) + len(res.slots[i][j]))
  2036  
  2037  			// If we're storing large contracts, generate the trie nodes
  2038  			// on the fly to not trash the gluing points
  2039  			if i == len(res.hashes)-1 && res.subTask != nil {
  2040  				res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j])
  2041  			}
  2042  		}
  2043  	}
  2044  	// Large contracts could have generated new trie nodes, flush them to disk
  2045  	if res.subTask != nil {
  2046  		if res.subTask.done {
  2047  			root, _ := res.subTask.genTrie.Commit(nil)
  2048  			_, nodeSize, _ := res.subTask.trieDb.Size()
  2049  
  2050  			if err := res.subTask.trieDb.Commit(root, false, 0); err != nil {
  2051  				logger.Error("Failed to persist stack slots", "root", root, "err", err)
  2052  			} else if root == res.subTask.root {
  2053  				s.storageBytes += nodeSize
  2054  				// If the chunk's root is an overflown but full delivery, clear the heal request
  2055  				for i, account := range res.mainTask.res.hashes {
  2056  					if account == res.accounts[len(res.accounts)-1] {
  2057  						res.mainTask.needHeal[i] = false
  2058  					}
  2059  				}
  2060  			}
  2061  		}
  2062  	}
  2063  	// Flush anything written just now and update the stats
  2064  	if err := batch.Write(); err != nil {
  2065  		logger.Crit("Failed to persist storage slots", "err", err)
  2066  	}
  2067  	s.storageSynced += uint64(slots)
  2068  
  2069  	logger.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes)
  2070  
  2071  	// If this delivery completed the last pending task, forward the account task
  2072  	// to the next chunk
  2073  	if res.mainTask.pend == 0 {
  2074  		s.forwardAccountTask(res.mainTask)
  2075  		return
  2076  	}
  2077  	// Some accounts are still incomplete, leave as is for the storage and contract
  2078  	// task assigners to pick up and fill.
  2079  }
  2080  
  2081  // processTrienodeHealResponse integrates an already validated trienode response
  2082  // into the healer tasks.
  2083  func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) {
  2084  	for i, hash := range res.hashes {
  2085  		node := res.nodes[i]
  2086  
  2087  		// If the trie node was not delivered, reschedule it
  2088  		if node == nil {
  2089  			res.task.trieTasks[hash] = res.paths[i]
  2090  			continue
  2091  		}
  2092  		// Push the trie node into the state syncer
  2093  		s.trienodeHealSynced++
  2094  		s.trienodeHealBytes += common.StorageSize(len(node))
  2095  
  2096  		err := s.healer.scheduler.Process(statedb.SyncResult{Hash: hash, Data: node})
  2097  		switch err {
  2098  		case nil:
  2099  		case statedb.ErrAlreadyProcessed:
  2100  			s.trienodeHealDups++
  2101  		case statedb.ErrNotRequested:
  2102  			s.trienodeHealNops++
  2103  		default:
  2104  			logger.Error("Invalid trienode processed", "hash", hash, "err", err)
  2105  		}
  2106  	}
  2107  	batch := s.db.NewBatch(database.StateTrieDB)
  2108  	defer batch.Release()
  2109  	if _, err := s.healer.scheduler.Commit(batch); err != nil {
  2110  		logger.Error("Failed to commit healing data", "err", err)
  2111  	}
  2112  	if err := batch.Write(); err != nil {
  2113  		logger.Crit("Failed to persist healing data", "err", err)
  2114  	}
  2115  	logger.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize()))
  2116  }
  2117  
  2118  // processBytecodeHealResponse integrates an already validated bytecode response
  2119  // into the healer tasks.
  2120  func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) {
  2121  	for i, hash := range res.hashes {
  2122  		node := res.codes[i]
  2123  
  2124  		// If the trie node was not delivered, reschedule it
  2125  		if node == nil {
  2126  			res.task.codeTasks[hash] = struct{}{}
  2127  			continue
  2128  		}
  2129  		// Push the trie node into the state syncer
  2130  		s.bytecodeHealSynced++
  2131  		s.bytecodeHealBytes += common.StorageSize(len(node))
  2132  
  2133  		err := s.healer.scheduler.Process(statedb.SyncResult{Hash: hash, Data: node})
  2134  		switch err {
  2135  		case nil:
  2136  		case statedb.ErrAlreadyProcessed:
  2137  			s.bytecodeHealDups++
  2138  		case statedb.ErrNotRequested:
  2139  			s.bytecodeHealNops++
  2140  		default:
  2141  			logger.Error("Invalid bytecode processed", "hash", hash, "err", err)
  2142  		}
  2143  	}
  2144  	batch := s.db.NewBatch(database.StateTrieDB)
  2145  	defer batch.Release()
  2146  	if _, err := s.healer.scheduler.Commit(batch); err != nil {
  2147  		logger.Error("Failed to commit healing data", "err", err)
  2148  	}
  2149  	if err := batch.Write(); err != nil {
  2150  		logger.Crit("Failed to persist healing data", "err", err)
  2151  	}
  2152  	logger.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize()))
  2153  }
  2154  
  2155  // forwardAccountTask takes a filled account task and persists anything available
  2156  // into the database, after which it forwards the next account marker so that the
  2157  // task's next chunk may be filled.
  2158  func (s *Syncer) forwardAccountTask(task *accountTask) {
  2159  	// Remove any pending delivery
  2160  	res := task.res
  2161  	if res == nil {
  2162  		return // nothing to forward
  2163  	}
  2164  	task.res = nil
  2165  
  2166  	// Persist the received account segements. These flat state maybe
  2167  	// outdated during the sync, but it can be fixed later during the
  2168  	// snapshot generation.
  2169  	oldAccountBytes := s.accountBytes
  2170  
  2171  	batch := s.db.NewSnapshotDBBatch()
  2172  	defer batch.Release()
  2173  	for i, hash := range res.hashes {
  2174  		if task.needCode[i] || task.needState[i] {
  2175  			break
  2176  		}
  2177  		serializer := account.NewAccountSerializerWithAccount(res.accounts[i])
  2178  		bytes, err := rlp.EncodeToBytes(serializer)
  2179  		if err != nil {
  2180  			logger.Error("Failed to encode account")
  2181  		}
  2182  		batch.WriteAccountSnapshot(hash, bytes)
  2183  		s.accountBytes += common.StorageSize(len(database.AccountSnapshotKey(hash)) + len(bytes))
  2184  
  2185  		// If the task is complete, drop it into the trie to generate
  2186  		// account trie nodes for it
  2187  		if !task.needHeal[i] {
  2188  			task.genTrie.Update(hash[:], bytes)
  2189  		}
  2190  	}
  2191  	// Flush anything written just now and update the stats
  2192  	if err := batch.Write(); err != nil {
  2193  		logger.Crit("Failed to persist accounts", "err", err)
  2194  	}
  2195  	s.accountSynced += uint64(len(res.accounts))
  2196  
  2197  	// Task filling persisted, push it the chunk marker forward to the first
  2198  	// account still missing data.
  2199  	for i, hash := range res.hashes {
  2200  		if task.needCode[i] || task.needState[i] {
  2201  			return
  2202  		}
  2203  		task.Next = incHash(hash)
  2204  	}
  2205  	// All accounts marked as complete, track if the entire task is done
  2206  	task.done = !res.cont
  2207  
  2208  	// Trie could have generated trie nodes, push them to disk (we need to
  2209  	// flush after finalizing task.done. It's fine even if we crash and lose this
  2210  	// write as it will only cause more data to be downloaded during heal.
  2211  	if task.done {
  2212  		root, _ := task.genTrie.Commit(nil)
  2213  		_, nodeSize, _ := task.trieDb.Size()
  2214  
  2215  		if err := task.trieDb.Commit(root, false, 0); err != nil {
  2216  			logger.Error("Failed to persist account slots", "root", root.String(), "err", err)
  2217  		} else {
  2218  			s.accountBytes += nodeSize
  2219  		}
  2220  	}
  2221  	logger.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes)
  2222  }
  2223  
  2224  // OnAccounts is a callback method to invoke when a range of accounts are
  2225  // received from a remote peer.
  2226  func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error {
  2227  	size := common.StorageSize(len(hashes) * common.HashLength)
  2228  	for _, account := range accounts {
  2229  		size += common.StorageSize(len(account))
  2230  	}
  2231  	for _, node := range proof {
  2232  		size += common.StorageSize(len(node))
  2233  	}
  2234  	logger := peer.Log().NewWith("reqid", id)
  2235  	logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size)
  2236  
  2237  	// Whether or not the response is valid, we can mark the peer as idle and
  2238  	// notify the scheduler to assign a new task. If the response is invalid,
  2239  	// we'll drop the peer in a bit.
  2240  	s.lock.Lock()
  2241  	if _, ok := s.peers[peer.ID()]; ok {
  2242  		s.accountIdlers[peer.ID()] = struct{}{}
  2243  	}
  2244  	select {
  2245  	case s.update <- struct{}{}:
  2246  	default:
  2247  	}
  2248  	// Ensure the response is for a valid request
  2249  	req, ok := s.accountReqs[id]
  2250  	if !ok {
  2251  		// Request stale, perhaps the peer timed out but came through in the end
  2252  		logger.Warn("Unexpected account range packet")
  2253  		s.lock.Unlock()
  2254  		return nil
  2255  	}
  2256  	delete(s.accountReqs, id)
  2257  	s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size))
  2258  
  2259  	// Clean up the request timeout timer, we'll see how to proceed further based
  2260  	// on the actual delivered content
  2261  	if !req.timeout.Stop() {
  2262  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2263  		s.lock.Unlock()
  2264  		return nil
  2265  	}
  2266  	// Response is valid, but check if peer is signalling that it does not have
  2267  	// the requested data. For account range queries that means the state being
  2268  	// retrieved was either already pruned remotely, or the peer is not yet
  2269  	// synced to our head.
  2270  	if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 {
  2271  		logger.Debug("Peer rejected account range request", "root", s.root)
  2272  		s.statelessPeers[peer.ID()] = struct{}{}
  2273  		s.lock.Unlock()
  2274  
  2275  		// Signal this request as failed, and ready for rescheduling
  2276  		s.scheduleRevertAccountRequest(req)
  2277  		return nil
  2278  	}
  2279  	root := s.root
  2280  	s.lock.Unlock()
  2281  
  2282  	// Reconstruct a partial trie from the response and verify it
  2283  	keys := make([][]byte, len(hashes))
  2284  	for i, key := range hashes {
  2285  		keys[i] = common.CopyBytes(key[:])
  2286  	}
  2287  	nodes := make(NodeList, len(proof))
  2288  	for i, node := range proof {
  2289  		nodes[i] = node
  2290  	}
  2291  	proofdb := nodes.NodeSet()
  2292  
  2293  	var end []byte
  2294  	if len(keys) > 0 {
  2295  		end = keys[len(keys)-1]
  2296  	}
  2297  	cont, err := statedb.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb)
  2298  	if err != nil {
  2299  		logger.Warn("Account range failed proof", "err", err)
  2300  		// Signal this request as failed, and ready for rescheduling
  2301  		s.scheduleRevertAccountRequest(req)
  2302  		return err
  2303  	}
  2304  	accs := make([]account.Account, len(accounts))
  2305  	for i, accBytes := range accounts {
  2306  		serializer := account.NewAccountSerializer()
  2307  		if err := rlp.DecodeBytes(accBytes, serializer); err != nil {
  2308  			panic(err) // We created these blobs, we must be able to decode them
  2309  		}
  2310  		accs[i] = serializer.GetAccount()
  2311  	}
  2312  	response := &accountResponse{
  2313  		task:     req.task,
  2314  		hashes:   hashes,
  2315  		accounts: accs,
  2316  		cont:     cont,
  2317  	}
  2318  	select {
  2319  	case req.deliver <- response:
  2320  	case <-req.cancel:
  2321  	case <-req.stale:
  2322  	}
  2323  	return nil
  2324  }
  2325  
  2326  // OnByteCodes is a callback method to invoke when a batch of contract
  2327  // bytes codes are received from a remote peer.
  2328  func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2329  	s.lock.RLock()
  2330  	syncing := !s.snapped
  2331  	s.lock.RUnlock()
  2332  
  2333  	if syncing {
  2334  		return s.onByteCodes(peer, id, bytecodes)
  2335  	}
  2336  	return s.onHealByteCodes(peer, id, bytecodes)
  2337  }
  2338  
  2339  // onByteCodes is a callback method to invoke when a batch of contract
  2340  // bytes codes are received from a remote peer in the syncing phase.
  2341  func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2342  	var size common.StorageSize
  2343  	for _, code := range bytecodes {
  2344  		size += common.StorageSize(len(code))
  2345  	}
  2346  	logger := peer.Log().NewWith("reqid", id)
  2347  	logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2348  
  2349  	// Whether or not the response is valid, we can mark the peer as idle and
  2350  	// notify the scheduler to assign a new task. If the response is invalid,
  2351  	// we'll drop the peer in a bit.
  2352  	s.lock.Lock()
  2353  	if _, ok := s.peers[peer.ID()]; ok {
  2354  		s.bytecodeIdlers[peer.ID()] = struct{}{}
  2355  	}
  2356  	select {
  2357  	case s.update <- struct{}{}:
  2358  	default:
  2359  	}
  2360  	// Ensure the response is for a valid request
  2361  	req, ok := s.bytecodeReqs[id]
  2362  	if !ok {
  2363  		// Request stale, perhaps the peer timed out but came through in the end
  2364  		logger.Warn("Unexpected bytecode packet")
  2365  		s.lock.Unlock()
  2366  		return nil
  2367  	}
  2368  	delete(s.bytecodeReqs, id)
  2369  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2370  
  2371  	// Clean up the request timeout timer, we'll see how to proceed further based
  2372  	// on the actual delivered content
  2373  	if !req.timeout.Stop() {
  2374  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2375  		s.lock.Unlock()
  2376  		return nil
  2377  	}
  2378  
  2379  	// Response is valid, but check if peer is signalling that it does not have
  2380  	// the requested data. For bytecode range queries that means the peer is not
  2381  	// yet synced.
  2382  	if len(bytecodes) == 0 {
  2383  		logger.Debug("Peer rejected bytecode request")
  2384  		s.statelessPeers[peer.ID()] = struct{}{}
  2385  		s.lock.Unlock()
  2386  
  2387  		// Signal this request as failed, and ready for rescheduling
  2388  		s.scheduleRevertBytecodeRequest(req)
  2389  		return nil
  2390  	}
  2391  	s.lock.Unlock()
  2392  
  2393  	// Cross reference the requested bytecodes with the response to find gaps
  2394  	// that the serving node is missing
  2395  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2396  	hash := make([]byte, 32)
  2397  
  2398  	codes := make([][]byte, len(req.hashes))
  2399  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2400  		// Find the next hash that we've been served, leaving misses with nils
  2401  		hasher.Reset()
  2402  		hasher.Write(bytecodes[i])
  2403  		hasher.Read(hash)
  2404  
  2405  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2406  			j++
  2407  		}
  2408  		if j < len(req.hashes) {
  2409  			codes[j] = bytecodes[i]
  2410  			j++
  2411  			continue
  2412  		}
  2413  		// We've either ran out of hashes, or got unrequested data
  2414  		logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i)
  2415  		// Signal this request as failed, and ready for rescheduling
  2416  		s.scheduleRevertBytecodeRequest(req)
  2417  		return errors.New("unexpected bytecode")
  2418  	}
  2419  	// Response validated, send it to the scheduler for filling
  2420  	response := &bytecodeResponse{
  2421  		task:   req.task,
  2422  		hashes: req.hashes,
  2423  		codes:  codes,
  2424  	}
  2425  	select {
  2426  	case req.deliver <- response:
  2427  	case <-req.cancel:
  2428  	case <-req.stale:
  2429  	}
  2430  	return nil
  2431  }
  2432  
  2433  // OnStorage is a callback method to invoke when ranges of storage slots
  2434  // are received from a remote peer.
  2435  func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error {
  2436  	// Gather some trace stats to aid in debugging issues
  2437  	var (
  2438  		hashCount int
  2439  		slotCount int
  2440  		size      common.StorageSize
  2441  	)
  2442  	for _, hashset := range hashes {
  2443  		size += common.StorageSize(common.HashLength * len(hashset))
  2444  		hashCount += len(hashset)
  2445  	}
  2446  	for _, slotset := range slots {
  2447  		for _, slot := range slotset {
  2448  			size += common.StorageSize(len(slot))
  2449  		}
  2450  		slotCount += len(slotset)
  2451  	}
  2452  	for _, node := range proof {
  2453  		size += common.StorageSize(len(node))
  2454  	}
  2455  	logger := peer.Log().NewWith("reqid", id)
  2456  	logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size)
  2457  
  2458  	// Whether or not the response is valid, we can mark the peer as idle and
  2459  	// notify the scheduler to assign a new task. If the response is invalid,
  2460  	// we'll drop the peer in a bit.
  2461  	s.lock.Lock()
  2462  	if _, ok := s.peers[peer.ID()]; ok {
  2463  		s.storageIdlers[peer.ID()] = struct{}{}
  2464  	}
  2465  	select {
  2466  	case s.update <- struct{}{}:
  2467  	default:
  2468  	}
  2469  	// Ensure the response is for a valid request
  2470  	req, ok := s.storageReqs[id]
  2471  	if !ok {
  2472  		// Request stale, perhaps the peer timed out but came through in the end
  2473  		logger.Warn("Unexpected storage ranges packet")
  2474  		s.lock.Unlock()
  2475  		return nil
  2476  	}
  2477  	delete(s.storageReqs, id)
  2478  	s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size))
  2479  
  2480  	// Clean up the request timeout timer, we'll see how to proceed further based
  2481  	// on the actual delivered content
  2482  	if !req.timeout.Stop() {
  2483  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2484  		s.lock.Unlock()
  2485  		return nil
  2486  	}
  2487  
  2488  	// Reject the response if the hash sets and slot sets don't match, or if the
  2489  	// peer sent more data than requested.
  2490  	if len(hashes) != len(slots) {
  2491  		s.lock.Unlock()
  2492  		s.scheduleRevertStorageRequest(req) // reschedule request
  2493  		logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots))
  2494  		return errors.New("hash and slot set size mismatch")
  2495  	}
  2496  	if len(hashes) > len(req.accounts) {
  2497  		s.lock.Unlock()
  2498  		s.scheduleRevertStorageRequest(req) // reschedule request
  2499  		logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts))
  2500  		return errors.New("hash set larger than requested")
  2501  	}
  2502  	// Response is valid, but check if peer is signalling that it does not have
  2503  	// the requested data. For storage range queries that means the state being
  2504  	// retrieved was either already pruned remotely, or the peer is not yet
  2505  	// synced to our head.
  2506  	if len(hashes) == 0 {
  2507  		logger.Debug("Peer rejected storage request")
  2508  		s.statelessPeers[peer.ID()] = struct{}{}
  2509  		s.lock.Unlock()
  2510  		s.scheduleRevertStorageRequest(req) // reschedule request
  2511  		return nil
  2512  	}
  2513  	s.lock.Unlock()
  2514  
  2515  	// Reconstruct the partial tries from the response and verify them
  2516  	var cont bool
  2517  
  2518  	for i := 0; i < len(hashes); i++ {
  2519  		// Convert the keys and proofs into an internal format
  2520  		keys := make([][]byte, len(hashes[i]))
  2521  		for j, key := range hashes[i] {
  2522  			keys[j] = common.CopyBytes(key[:])
  2523  		}
  2524  		nodes := make(NodeList, 0, len(proof))
  2525  		if i == len(hashes)-1 {
  2526  			for _, node := range proof {
  2527  				nodes = append(nodes, node)
  2528  			}
  2529  		}
  2530  		var err error
  2531  		if len(nodes) == 0 {
  2532  			// No proof has been attached, the response must cover the entire key
  2533  			// space and hash to the origin root.
  2534  			_, err = statedb.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil)
  2535  			if err != nil {
  2536  				s.scheduleRevertStorageRequest(req) // reschedule request
  2537  				logger.Warn("Storage slots failed proof", "err", err)
  2538  				return err
  2539  			}
  2540  		} else {
  2541  			// A proof was attached, the response is only partial, check that the
  2542  			// returned data is indeed part of the storage trie
  2543  			proofdb := nodes.NodeSet()
  2544  
  2545  			var end []byte
  2546  			if len(keys) > 0 {
  2547  				end = keys[len(keys)-1]
  2548  			}
  2549  			cont, err = statedb.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb)
  2550  			if err != nil {
  2551  				s.scheduleRevertStorageRequest(req) // reschedule request
  2552  				logger.Warn("Storage range failed proof", "err", err)
  2553  				return err
  2554  			}
  2555  		}
  2556  	}
  2557  	// Partial tries reconstructed, send them to the scheduler for storage filling
  2558  	response := &storageResponse{
  2559  		mainTask: req.mainTask,
  2560  		subTask:  req.subTask,
  2561  		accounts: req.accounts,
  2562  		roots:    req.roots,
  2563  		hashes:   hashes,
  2564  		slots:    slots,
  2565  		cont:     cont,
  2566  	}
  2567  	select {
  2568  	case req.deliver <- response:
  2569  	case <-req.cancel:
  2570  	case <-req.stale:
  2571  	}
  2572  	return nil
  2573  }
  2574  
  2575  // OnTrieNodes is a callback method to invoke when a batch of trie nodes
  2576  // are received from a remote peer.
  2577  func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error {
  2578  	var size common.StorageSize
  2579  	for _, node := range trienodes {
  2580  		size += common.StorageSize(len(node))
  2581  	}
  2582  	logger := peer.Log().NewWith("reqid", id)
  2583  	logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size)
  2584  
  2585  	// Whether or not the response is valid, we can mark the peer as idle and
  2586  	// notify the scheduler to assign a new task. If the response is invalid,
  2587  	// we'll drop the peer in a bit.
  2588  	s.lock.Lock()
  2589  	if _, ok := s.peers[peer.ID()]; ok {
  2590  		s.trienodeHealIdlers[peer.ID()] = struct{}{}
  2591  	}
  2592  	select {
  2593  	case s.update <- struct{}{}:
  2594  	default:
  2595  	}
  2596  	// Ensure the response is for a valid request
  2597  	req, ok := s.trienodeHealReqs[id]
  2598  	if !ok {
  2599  		// Request stale, perhaps the peer timed out but came through in the end
  2600  		logger.Warn("Unexpected trienode heal packet")
  2601  		s.lock.Unlock()
  2602  		return nil
  2603  	}
  2604  	delete(s.trienodeHealReqs, id)
  2605  	s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes))
  2606  
  2607  	// Clean up the request timeout timer, we'll see how to proceed further based
  2608  	// on the actual delivered content
  2609  	if !req.timeout.Stop() {
  2610  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2611  		s.lock.Unlock()
  2612  		return nil
  2613  	}
  2614  
  2615  	// Response is valid, but check if peer is signalling that it does not have
  2616  	// the requested data. For bytecode range queries that means the peer is not
  2617  	// yet synced.
  2618  	if len(trienodes) == 0 {
  2619  		logger.Debug("Peer rejected trienode heal request")
  2620  		s.statelessPeers[peer.ID()] = struct{}{}
  2621  		s.lock.Unlock()
  2622  
  2623  		// Signal this request as failed, and ready for rescheduling
  2624  		s.scheduleRevertTrienodeHealRequest(req)
  2625  		return nil
  2626  	}
  2627  	s.lock.Unlock()
  2628  
  2629  	// Cross reference the requested trienodes with the response to find gaps
  2630  	// that the serving node is missing
  2631  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2632  	hash := make([]byte, 32)
  2633  
  2634  	nodes := make([][]byte, len(req.hashes))
  2635  	for i, j := 0, 0; i < len(trienodes); i++ {
  2636  		// Find the next hash that we've been served, leaving misses with nils
  2637  		hasher.Reset()
  2638  		hasher.Write(trienodes[i])
  2639  		hasher.Read(hash)
  2640  
  2641  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2642  			j++
  2643  		}
  2644  		if j < len(req.hashes) {
  2645  			nodes[j] = trienodes[i]
  2646  			j++
  2647  			continue
  2648  		}
  2649  		// We've either ran out of hashes, or got unrequested data
  2650  		logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i)
  2651  		// Signal this request as failed, and ready for rescheduling
  2652  		s.scheduleRevertTrienodeHealRequest(req)
  2653  		return errors.New("unexpected healing trienode")
  2654  	}
  2655  	// Response validated, send it to the scheduler for filling
  2656  	response := &trienodeHealResponse{
  2657  		task:   req.task,
  2658  		hashes: req.hashes,
  2659  		paths:  req.paths,
  2660  		nodes:  nodes,
  2661  	}
  2662  	select {
  2663  	case req.deliver <- response:
  2664  	case <-req.cancel:
  2665  	case <-req.stale:
  2666  	}
  2667  	return nil
  2668  }
  2669  
  2670  // onHealByteCodes is a callback method to invoke when a batch of contract
  2671  // bytes codes are received from a remote peer in the healing phase.
  2672  func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error {
  2673  	var size common.StorageSize
  2674  	for _, code := range bytecodes {
  2675  		size += common.StorageSize(len(code))
  2676  	}
  2677  	logger := peer.Log().NewWith("reqid", id)
  2678  	logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size)
  2679  
  2680  	// Whether or not the response is valid, we can mark the peer as idle and
  2681  	// notify the scheduler to assign a new task. If the response is invalid,
  2682  	// we'll drop the peer in a bit.
  2683  	s.lock.Lock()
  2684  	if _, ok := s.peers[peer.ID()]; ok {
  2685  		s.bytecodeHealIdlers[peer.ID()] = struct{}{}
  2686  	}
  2687  	select {
  2688  	case s.update <- struct{}{}:
  2689  	default:
  2690  	}
  2691  	// Ensure the response is for a valid request
  2692  	req, ok := s.bytecodeHealReqs[id]
  2693  	if !ok {
  2694  		// Request stale, perhaps the peer timed out but came through in the end
  2695  		logger.Warn("Unexpected bytecode heal packet")
  2696  		s.lock.Unlock()
  2697  		return nil
  2698  	}
  2699  	delete(s.bytecodeHealReqs, id)
  2700  	s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes))
  2701  
  2702  	// Clean up the request timeout timer, we'll see how to proceed further based
  2703  	// on the actual delivered content
  2704  	if !req.timeout.Stop() {
  2705  		// The timeout is already triggered, and this request will be reverted+rescheduled
  2706  		s.lock.Unlock()
  2707  		return nil
  2708  	}
  2709  
  2710  	// Response is valid, but check if peer is signalling that it does not have
  2711  	// the requested data. For bytecode range queries that means the peer is not
  2712  	// yet synced.
  2713  	if len(bytecodes) == 0 {
  2714  		logger.Debug("Peer rejected bytecode heal request")
  2715  		s.statelessPeers[peer.ID()] = struct{}{}
  2716  		s.lock.Unlock()
  2717  
  2718  		// Signal this request as failed, and ready for rescheduling
  2719  		s.scheduleRevertBytecodeHealRequest(req)
  2720  		return nil
  2721  	}
  2722  	s.lock.Unlock()
  2723  
  2724  	// Cross reference the requested bytecodes with the response to find gaps
  2725  	// that the serving node is missing
  2726  	hasher := sha3.NewLegacyKeccak256().(statedb.KeccakState)
  2727  	hash := make([]byte, 32)
  2728  
  2729  	codes := make([][]byte, len(req.hashes))
  2730  	for i, j := 0, 0; i < len(bytecodes); i++ {
  2731  		// Find the next hash that we've been served, leaving misses with nils
  2732  		hasher.Reset()
  2733  		hasher.Write(bytecodes[i])
  2734  		hasher.Read(hash)
  2735  
  2736  		for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) {
  2737  			j++
  2738  		}
  2739  		if j < len(req.hashes) {
  2740  			codes[j] = bytecodes[i]
  2741  			j++
  2742  			continue
  2743  		}
  2744  		// We've either ran out of hashes, or got unrequested data
  2745  		logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i)
  2746  		// Signal this request as failed, and ready for rescheduling
  2747  		s.scheduleRevertBytecodeHealRequest(req)
  2748  		return errors.New("unexpected healing bytecode")
  2749  	}
  2750  	// Response validated, send it to the scheduler for filling
  2751  	response := &bytecodeHealResponse{
  2752  		task:   req.task,
  2753  		hashes: req.hashes,
  2754  		codes:  codes,
  2755  	}
  2756  	select {
  2757  	case req.deliver <- response:
  2758  	case <-req.cancel:
  2759  	case <-req.stale:
  2760  	}
  2761  	return nil
  2762  }
  2763  
  2764  // onHealState is a callback method to invoke when a flat state(account
  2765  // or storage slot) is downloded during the healing stage. The flat states
  2766  // can be persisted blindly and can be fixed later in the generation stage.
  2767  // Note it's not concurrent safe, please handle the concurrent issue outside.
  2768  func (s *Syncer) onHealState(paths [][]byte, value []byte) error {
  2769  	if len(paths) == 1 {
  2770  		s.stateWriter.WriteAccountSnapshot(common.BytesToHash(paths[0]), value)
  2771  		s.accountHealed += 1
  2772  		s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(value))
  2773  	}
  2774  	if len(paths) == 2 {
  2775  		s.stateWriter.WriteStorageSnapshot(common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value)
  2776  		s.storageHealed += 1
  2777  		s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value))
  2778  	}
  2779  	if s.stateWriter.ValueSize() > database.IdealBatchSize {
  2780  		s.stateWriter.Write() // It's fine to ignore the error here
  2781  		s.stateWriter.Reset()
  2782  	}
  2783  	return nil
  2784  }
  2785  
  2786  // hashSpace is the total size of the 256 bit hash space for accounts.
  2787  var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil)
  2788  
  2789  // report calculates various status reports and provides it to the user.
  2790  func (s *Syncer) report(force bool) {
  2791  	if len(s.tasks) > 0 {
  2792  		s.reportSyncProgress(force)
  2793  		return
  2794  	}
  2795  	s.reportHealProgress(force)
  2796  }
  2797  
  2798  // reportSyncProgress calculates various status reports and provides it to the user.
  2799  func (s *Syncer) reportSyncProgress(force bool) {
  2800  	// Don't report all the events, just occasionally
  2801  	if !force && time.Since(s.logTime) < 8*time.Second {
  2802  		return
  2803  	}
  2804  	// Don't report anything until we have a meaningful progress
  2805  	synced := s.accountBytes + s.bytecodeBytes + s.storageBytes
  2806  	if synced == 0 {
  2807  		return
  2808  	}
  2809  	accountGaps := new(big.Int)
  2810  	for _, task := range s.tasks {
  2811  		accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big()))
  2812  	}
  2813  	accountFills := new(big.Int).Sub(hashSpace, accountGaps)
  2814  	if accountFills.BitLen() == 0 {
  2815  		return
  2816  	}
  2817  	s.logTime = time.Now()
  2818  	estBytes := float64(new(big.Int).Div(
  2819  		new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace),
  2820  		accountFills,
  2821  	).Uint64())
  2822  	// Don't report anything until we have a meaningful progress
  2823  	if estBytes < 1.0 {
  2824  		return
  2825  	}
  2826  	elapsed := time.Since(s.startTime)
  2827  	estTime := elapsed / time.Duration(synced) * time.Duration(estBytes)
  2828  
  2829  	// Create a mega progress report
  2830  	var (
  2831  		progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes)
  2832  		accounts = fmt.Sprintf("%v@%v", s.accountSynced, s.accountBytes.TerminalString())
  2833  		storage  = fmt.Sprintf("%v@%v", s.storageSynced, s.storageBytes.TerminalString())
  2834  		bytecode = fmt.Sprintf("%v@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString())
  2835  	)
  2836  	logger.Info("State sync in progress", "synced", progress, "state", synced,
  2837  		"accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed))
  2838  }
  2839  
  2840  // reportHealProgress calculates various status reports and provides it to the user.
  2841  func (s *Syncer) reportHealProgress(force bool) {
  2842  	// Don't report all the events, just occasionally
  2843  	if !force && time.Since(s.logTime) < 8*time.Second {
  2844  		return
  2845  	}
  2846  	s.logTime = time.Now()
  2847  
  2848  	// Create a mega progress report
  2849  	var (
  2850  		trienode = fmt.Sprintf("%v@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString())
  2851  		bytecode = fmt.Sprintf("%v@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString())
  2852  		accounts = fmt.Sprintf("%v@%v", s.accountHealed, s.accountHealedBytes.TerminalString())
  2853  		storage  = fmt.Sprintf("%v@%v", s.storageHealed, s.storageHealedBytes.TerminalString())
  2854  	)
  2855  	logger.Info("State heal in progress", "accounts", accounts, "slots", storage,
  2856  		"codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending())
  2857  }
  2858  
  2859  // estimateRemainingSlots tries to determine roughly how many slots are left in
  2860  // a contract storage, based on the number of keys and the last hash. This method
  2861  // assumes that the hashes are lexicographically ordered and evenly distributed.
  2862  func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) {
  2863  	if last == (common.Hash{}) {
  2864  		return 0, errors.New("last hash empty")
  2865  	}
  2866  	space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes)))
  2867  	space.Div(space, last.Big())
  2868  	if !space.IsUint64() {
  2869  		// Gigantic address space probably due to too few or malicious slots
  2870  		return 0, errors.New("too few slots for estimation")
  2871  	}
  2872  	return space.Uint64() - uint64(hashes), nil
  2873  }
  2874  
  2875  // capacitySort implements the Sort interface, allowing sorting by peer message
  2876  // throughput. Note, callers should use sort.Reverse to get the desired effect
  2877  // of highest capacity being at the front.
  2878  type capacitySort struct {
  2879  	ids  []string
  2880  	caps []int
  2881  }
  2882  
  2883  func (s *capacitySort) Len() int {
  2884  	return len(s.ids)
  2885  }
  2886  
  2887  func (s *capacitySort) Less(i, j int) bool {
  2888  	return s.caps[i] < s.caps[j]
  2889  }
  2890  
  2891  func (s *capacitySort) Swap(i, j int) {
  2892  	s.ids[i], s.ids[j] = s.ids[j], s.ids[i]
  2893  	s.caps[i], s.caps[j] = s.caps[j], s.caps[i]
  2894  }