github.com/carter-ya/go-ethereum@v0.0.0-20230628080049-d2309be3983b/eth/protocols/snap/sync.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "errors" 23 "fmt" 24 gomath "math" 25 "math/big" 26 "math/rand" 27 "sort" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/ethereum/go-ethereum/common" 33 "github.com/ethereum/go-ethereum/common/math" 34 "github.com/ethereum/go-ethereum/core/rawdb" 35 "github.com/ethereum/go-ethereum/core/state" 36 "github.com/ethereum/go-ethereum/core/state/snapshot" 37 "github.com/ethereum/go-ethereum/core/types" 38 "github.com/ethereum/go-ethereum/crypto" 39 "github.com/ethereum/go-ethereum/ethdb" 40 "github.com/ethereum/go-ethereum/event" 41 "github.com/ethereum/go-ethereum/light" 42 "github.com/ethereum/go-ethereum/log" 43 "github.com/ethereum/go-ethereum/p2p/msgrate" 44 "github.com/ethereum/go-ethereum/rlp" 45 "github.com/ethereum/go-ethereum/trie" 46 "golang.org/x/crypto/sha3" 47 ) 48 49 var ( 50 // emptyRoot is the known root hash of an empty trie. 51 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 52 53 // emptyCode is the known hash of the empty EVM bytecode. 54 emptyCode = crypto.Keccak256Hash(nil) 55 ) 56 57 const ( 58 // minRequestSize is the minimum number of bytes to request from a remote peer. 59 // This number is used as the low cap for account and storage range requests. 60 // Bytecode and trienode are limited inherently by item count (1). 61 minRequestSize = 64 * 1024 62 63 // maxRequestSize is the maximum number of bytes to request from a remote peer. 64 // This number is used as the high cap for account and storage range requests. 65 // Bytecode and trienode are limited more explicitly by the caps below. 66 maxRequestSize = 512 * 1024 67 68 // maxCodeRequestCount is the maximum number of bytecode blobs to request in a 69 // single query. If this number is too low, we're not filling responses fully 70 // and waste round trip times. If it's too high, we're capping responses and 71 // waste bandwidth. 72 // 73 // Depoyed bytecodes are currently capped at 24KB, so the minimum request 74 // size should be maxRequestSize / 24K. Assuming that most contracts do not 75 // come close to that, requesting 4x should be a good approximation. 76 maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 77 78 // maxTrieRequestCount is the maximum number of trie node blobs to request in 79 // a single query. If this number is too low, we're not filling responses fully 80 // and waste round trip times. If it's too high, we're capping responses and 81 // waste bandwidth. 82 maxTrieRequestCount = maxRequestSize / 512 83 84 // trienodeHealRateMeasurementImpact is the impact a single measurement has on 85 // the local node's trienode processing capacity. A value closer to 0 reacts 86 // slower to sudden changes, but it is also more stable against temporary hiccups. 87 trienodeHealRateMeasurementImpact = 0.005 88 89 // minTrienodeHealThrottle is the minimum divisor for throttling trie node 90 // heal requests to avoid overloading the local node and exessively expanding 91 // the state trie bedth wise. 92 minTrienodeHealThrottle = 1 93 94 // maxTrienodeHealThrottle is the maximum divisor for throttling trie node 95 // heal requests to avoid overloading the local node and exessively expanding 96 // the state trie bedth wise. 97 maxTrienodeHealThrottle = maxTrieRequestCount 98 99 // trienodeHealThrottleIncrease is the multiplier for the throttle when the 100 // rate of arriving data is higher than the rate of processing it. 101 trienodeHealThrottleIncrease = 1.33 102 103 // trienodeHealThrottleDecrease is the divisor for the throttle when the 104 // rate of arriving data is lower than the rate of processing it. 105 trienodeHealThrottleDecrease = 1.25 106 ) 107 108 var ( 109 // accountConcurrency is the number of chunks to split the account trie into 110 // to allow concurrent retrievals. 111 accountConcurrency = 16 112 113 // storageConcurrency is the number of chunks to split the a large contract 114 // storage trie into to allow concurrent retrievals. 115 storageConcurrency = 16 116 ) 117 118 // ErrCancelled is returned from snap syncing if the operation was prematurely 119 // terminated. 120 var ErrCancelled = errors.New("sync cancelled") 121 122 // accountRequest tracks a pending account range request to ensure responses are 123 // to actual requests and to validate any security constraints. 124 // 125 // Concurrency note: account requests and responses are handled concurrently from 126 // the main runloop to allow Merkle proof verifications on the peer's thread and 127 // to drop on invalid response. The request struct must contain all the data to 128 // construct the response without accessing runloop internals (i.e. task). That 129 // is only included to allow the runloop to match a response to the task being 130 // synced without having yet another set of maps. 131 type accountRequest struct { 132 peer string // Peer to which this request is assigned 133 id uint64 // Request ID of this request 134 time time.Time // Timestamp when the request was sent 135 136 deliver chan *accountResponse // Channel to deliver successful response on 137 revert chan *accountRequest // Channel to deliver request failure on 138 cancel chan struct{} // Channel to track sync cancellation 139 timeout *time.Timer // Timer to track delivery timeout 140 stale chan struct{} // Channel to signal the request was dropped 141 142 origin common.Hash // First account requested to allow continuation checks 143 limit common.Hash // Last account requested to allow non-overlapping chunking 144 145 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 146 } 147 148 // accountResponse is an already Merkle-verified remote response to an account 149 // range request. It contains the subtrie for the requested account range and 150 // the database that's going to be filled with the internal nodes on commit. 151 type accountResponse struct { 152 task *accountTask // Task which this request is filling 153 154 hashes []common.Hash // Account hashes in the returned range 155 accounts []*types.StateAccount // Expanded accounts in the returned range 156 157 cont bool // Whether the account range has a continuation 158 } 159 160 // bytecodeRequest tracks a pending bytecode request to ensure responses are to 161 // actual requests and to validate any security constraints. 162 // 163 // Concurrency note: bytecode requests and responses are handled concurrently from 164 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 165 // to drop on invalid response. The request struct must contain all the data to 166 // construct the response without accessing runloop internals (i.e. task). That 167 // is only included to allow the runloop to match a response to the task being 168 // synced without having yet another set of maps. 169 type bytecodeRequest struct { 170 peer string // Peer to which this request is assigned 171 id uint64 // Request ID of this request 172 time time.Time // Timestamp when the request was sent 173 174 deliver chan *bytecodeResponse // Channel to deliver successful response on 175 revert chan *bytecodeRequest // Channel to deliver request failure on 176 cancel chan struct{} // Channel to track sync cancellation 177 timeout *time.Timer // Timer to track delivery timeout 178 stale chan struct{} // Channel to signal the request was dropped 179 180 hashes []common.Hash // Bytecode hashes to validate responses 181 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 182 } 183 184 // bytecodeResponse is an already verified remote response to a bytecode request. 185 type bytecodeResponse struct { 186 task *accountTask // Task which this request is filling 187 188 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 189 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 190 } 191 192 // storageRequest tracks a pending storage ranges request to ensure responses are 193 // to actual requests and to validate any security constraints. 194 // 195 // Concurrency note: storage requests and responses are handled concurrently from 196 // the main runloop to allow Merkle proof verifications on the peer's thread and 197 // to drop on invalid response. The request struct must contain all the data to 198 // construct the response without accessing runloop internals (i.e. tasks). That 199 // is only included to allow the runloop to match a response to the task being 200 // synced without having yet another set of maps. 201 type storageRequest struct { 202 peer string // Peer to which this request is assigned 203 id uint64 // Request ID of this request 204 time time.Time // Timestamp when the request was sent 205 206 deliver chan *storageResponse // Channel to deliver successful response on 207 revert chan *storageRequest // Channel to deliver request failure on 208 cancel chan struct{} // Channel to track sync cancellation 209 timeout *time.Timer // Timer to track delivery timeout 210 stale chan struct{} // Channel to signal the request was dropped 211 212 accounts []common.Hash // Account hashes to validate responses 213 roots []common.Hash // Storage roots to validate responses 214 215 origin common.Hash // First storage slot requested to allow continuation checks 216 limit common.Hash // Last storage slot requested to allow non-overlapping chunking 217 218 mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!) 219 subTask *storageTask // Task which this response is filling (only access fields through the runloop!!) 220 } 221 222 // storageResponse is an already Merkle-verified remote response to a storage 223 // range request. It contains the subtries for the requested storage ranges and 224 // the databases that's going to be filled with the internal nodes on commit. 225 type storageResponse struct { 226 mainTask *accountTask // Task which this response belongs to 227 subTask *storageTask // Task which this response is filling 228 229 accounts []common.Hash // Account hashes requested, may be only partially filled 230 roots []common.Hash // Storage roots requested, may be only partially filled 231 232 hashes [][]common.Hash // Storage slot hashes in the returned range 233 slots [][][]byte // Storage slot values in the returned range 234 235 cont bool // Whether the last storage range has a continuation 236 } 237 238 // trienodeHealRequest tracks a pending state trie request to ensure responses 239 // are to actual requests and to validate any security constraints. 240 // 241 // Concurrency note: trie node requests and responses are handled concurrently from 242 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 243 // to drop on invalid response. The request struct must contain all the data to 244 // construct the response without accessing runloop internals (i.e. task). That 245 // is only included to allow the runloop to match a response to the task being 246 // synced without having yet another set of maps. 247 type trienodeHealRequest struct { 248 peer string // Peer to which this request is assigned 249 id uint64 // Request ID of this request 250 time time.Time // Timestamp when the request was sent 251 252 deliver chan *trienodeHealResponse // Channel to deliver successful response on 253 revert chan *trienodeHealRequest // Channel to deliver request failure on 254 cancel chan struct{} // Channel to track sync cancellation 255 timeout *time.Timer // Timer to track delivery timeout 256 stale chan struct{} // Channel to signal the request was dropped 257 258 paths []string // Trie node paths for identifying trie node 259 hashes []common.Hash // Trie node hashes to validate responses 260 261 task *healTask // Task which this request is filling (only access fields through the runloop!!) 262 } 263 264 // trienodeHealResponse is an already verified remote response to a trie node request. 265 type trienodeHealResponse struct { 266 task *healTask // Task which this request is filling 267 268 paths []string // Paths of the trie nodes 269 hashes []common.Hash // Hashes of the trie nodes to avoid double hashing 270 nodes [][]byte // Actual trie nodes to store into the database (nil = missing) 271 } 272 273 // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to 274 // actual requests and to validate any security constraints. 275 // 276 // Concurrency note: bytecode requests and responses are handled concurrently from 277 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 278 // to drop on invalid response. The request struct must contain all the data to 279 // construct the response without accessing runloop internals (i.e. task). That 280 // is only included to allow the runloop to match a response to the task being 281 // synced without having yet another set of maps. 282 type bytecodeHealRequest struct { 283 peer string // Peer to which this request is assigned 284 id uint64 // Request ID of this request 285 time time.Time // Timestamp when the request was sent 286 287 deliver chan *bytecodeHealResponse // Channel to deliver successful response on 288 revert chan *bytecodeHealRequest // Channel to deliver request failure on 289 cancel chan struct{} // Channel to track sync cancellation 290 timeout *time.Timer // Timer to track delivery timeout 291 stale chan struct{} // Channel to signal the request was dropped 292 293 hashes []common.Hash // Bytecode hashes to validate responses 294 task *healTask // Task which this request is filling (only access fields through the runloop!!) 295 } 296 297 // bytecodeHealResponse is an already verified remote response to a bytecode request. 298 type bytecodeHealResponse struct { 299 task *healTask // Task which this request is filling 300 301 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 302 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 303 } 304 305 // accountTask represents the sync task for a chunk of the account snapshot. 306 type accountTask struct { 307 // These fields get serialized to leveldb on shutdown 308 Next common.Hash // Next account to sync in this interval 309 Last common.Hash // Last account to sync in this interval 310 SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts 311 312 // These fields are internals used during runtime 313 req *accountRequest // Pending request to fill this task 314 res *accountResponse // Validate response filling this task 315 pend int // Number of pending subtasks for this round 316 317 needCode []bool // Flags whether the filling accounts need code retrieval 318 needState []bool // Flags whether the filling accounts need storage retrieval 319 needHeal []bool // Flags whether the filling accounts's state was chunked and need healing 320 321 codeTasks map[common.Hash]struct{} // Code hashes that need retrieval 322 stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval 323 324 genBatch ethdb.Batch // Batch used by the node generator 325 genTrie *trie.StackTrie // Node generator from storage slots 326 327 done bool // Flag whether the task can be removed 328 } 329 330 // storageTask represents the sync task for a chunk of the storage snapshot. 331 type storageTask struct { 332 Next common.Hash // Next account to sync in this interval 333 Last common.Hash // Last account to sync in this interval 334 335 // These fields are internals used during runtime 336 root common.Hash // Storage root hash for this instance 337 req *storageRequest // Pending request to fill this task 338 339 genBatch ethdb.Batch // Batch used by the node generator 340 genTrie *trie.StackTrie // Node generator from storage slots 341 342 done bool // Flag whether the task can be removed 343 } 344 345 // healTask represents the sync task for healing the snap-synced chunk boundaries. 346 type healTask struct { 347 scheduler *trie.Sync // State trie sync scheduler defining the tasks 348 349 trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path 350 codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash 351 } 352 353 // SyncProgress is a database entry to allow suspending and resuming a snapshot state 354 // sync. Opposed to full and fast sync, there is no way to restart a suspended 355 // snap sync without prior knowledge of the suspension point. 356 type SyncProgress struct { 357 Tasks []*accountTask // The suspended account tasks (contract tasks within) 358 359 // Status report during syncing phase 360 AccountSynced uint64 // Number of accounts downloaded 361 AccountBytes common.StorageSize // Number of account trie bytes persisted to disk 362 BytecodeSynced uint64 // Number of bytecodes downloaded 363 BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 364 StorageSynced uint64 // Number of storage slots downloaded 365 StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk 366 367 // Status report during healing phase 368 TrienodeHealSynced uint64 // Number of state trie nodes downloaded 369 TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 370 BytecodeHealSynced uint64 // Number of bytecodes downloaded 371 BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 372 } 373 374 // SyncPending is analogous to SyncProgress, but it's used to report on pending 375 // ephemeral sync progress that doesn't get persisted into the database. 376 type SyncPending struct { 377 TrienodeHeal uint64 // Number of state trie nodes pending 378 BytecodeHeal uint64 // Number of bytecodes pending 379 } 380 381 // SyncPeer abstracts out the methods required for a peer to be synced against 382 // with the goal of allowing the construction of mock peers without the full 383 // blown networking. 384 type SyncPeer interface { 385 // ID retrieves the peer's unique identifier. 386 ID() string 387 388 // RequestAccountRange fetches a batch of accounts rooted in a specific account 389 // trie, starting with the origin. 390 RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error 391 392 // RequestStorageRanges fetches a batch of storage slots belonging to one or 393 // more accounts. If slots from only one account is requested, an origin marker 394 // may also be used to retrieve from there. 395 RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error 396 397 // RequestByteCodes fetches a batch of bytecodes by hash. 398 RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error 399 400 // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in 401 // a specific state trie. 402 RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error 403 404 // Log retrieves the peer's own contextual logger. 405 Log() log.Logger 406 } 407 408 // Syncer is an Ethereum account and storage trie syncer based on snapshots and 409 // the snap protocol. It's purpose is to download all the accounts and storage 410 // slots from remote peers and reassemble chunks of the state trie, on top of 411 // which a state sync can be run to fix any gaps / overlaps. 412 // 413 // Every network request has a variety of failure events: 414 // - The peer disconnects after task assignment, failing to send the request 415 // - The peer disconnects after sending the request, before delivering on it 416 // - The peer remains connected, but does not deliver a response in time 417 // - The peer delivers a stale response after a previous timeout 418 // - The peer delivers a refusal to serve the requested state 419 type Syncer struct { 420 db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) 421 422 root common.Hash // Current state trie root being synced 423 tasks []*accountTask // Current account task set being synced 424 snapped bool // Flag to signal that snap phase is done 425 healer *healTask // Current state healing task being executed 426 update chan struct{} // Notification channel for possible sync progression 427 428 peers map[string]SyncPeer // Currently active peers to download from 429 peerJoin *event.Feed // Event feed to react to peers joining 430 peerDrop *event.Feed // Event feed to react to peers dropping 431 rates *msgrate.Trackers // Message throughput rates for peers 432 433 // Request tracking during syncing phase 434 statelessPeers map[string]struct{} // Peers that failed to deliver state data 435 accountIdlers map[string]struct{} // Peers that aren't serving account requests 436 bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests 437 storageIdlers map[string]struct{} // Peers that aren't serving storage requests 438 439 accountReqs map[uint64]*accountRequest // Account requests currently running 440 bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running 441 storageReqs map[uint64]*storageRequest // Storage requests currently running 442 443 accountSynced uint64 // Number of accounts downloaded 444 accountBytes common.StorageSize // Number of account trie bytes persisted to disk 445 bytecodeSynced uint64 // Number of bytecodes downloaded 446 bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 447 storageSynced uint64 // Number of storage slots downloaded 448 storageBytes common.StorageSize // Number of storage trie bytes persisted to disk 449 450 extProgress *SyncProgress // progress that can be exposed to external caller. 451 452 // Request tracking during healing phase 453 trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests 454 bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests 455 456 trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running 457 bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running 458 459 trienodeHealRate float64 // Average heal rate for processing trie node data 460 trienodeHealPend uint64 // Number of trie nodes currently pending for processing 461 trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested 462 trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated 463 464 trienodeHealSynced uint64 // Number of state trie nodes downloaded 465 trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 466 trienodeHealDups uint64 // Number of state trie nodes already processed 467 trienodeHealNops uint64 // Number of state trie nodes not requested 468 bytecodeHealSynced uint64 // Number of bytecodes downloaded 469 bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 470 bytecodeHealDups uint64 // Number of bytecodes already processed 471 bytecodeHealNops uint64 // Number of bytecodes not requested 472 473 stateWriter ethdb.Batch // Shared batch writer used for persisting raw states 474 accountHealed uint64 // Number of accounts downloaded during the healing stage 475 accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage 476 storageHealed uint64 // Number of storage slots downloaded during the healing stage 477 storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage 478 479 startTime time.Time // Time instance when snapshot sync started 480 logTime time.Time // Time instance when status was last reported 481 482 pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown 483 lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) 484 } 485 486 // NewSyncer creates a new snapshot syncer to download the Ethereum state over the 487 // snap protocol. 488 func NewSyncer(db ethdb.KeyValueStore) *Syncer { 489 return &Syncer{ 490 db: db, 491 492 peers: make(map[string]SyncPeer), 493 peerJoin: new(event.Feed), 494 peerDrop: new(event.Feed), 495 rates: msgrate.NewTrackers(log.New("proto", "snap")), 496 update: make(chan struct{}, 1), 497 498 accountIdlers: make(map[string]struct{}), 499 storageIdlers: make(map[string]struct{}), 500 bytecodeIdlers: make(map[string]struct{}), 501 502 accountReqs: make(map[uint64]*accountRequest), 503 storageReqs: make(map[uint64]*storageRequest), 504 bytecodeReqs: make(map[uint64]*bytecodeRequest), 505 506 trienodeHealIdlers: make(map[string]struct{}), 507 bytecodeHealIdlers: make(map[string]struct{}), 508 509 trienodeHealReqs: make(map[uint64]*trienodeHealRequest), 510 bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), 511 trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk 512 stateWriter: db.NewBatch(), 513 514 extProgress: new(SyncProgress), 515 } 516 } 517 518 // Register injects a new data source into the syncer's peerset. 519 func (s *Syncer) Register(peer SyncPeer) error { 520 // Make sure the peer is not registered yet 521 id := peer.ID() 522 523 s.lock.Lock() 524 if _, ok := s.peers[id]; ok { 525 log.Error("Snap peer already registered", "id", id) 526 527 s.lock.Unlock() 528 return errors.New("already registered") 529 } 530 s.peers[id] = peer 531 s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip())) 532 533 // Mark the peer as idle, even if no sync is running 534 s.accountIdlers[id] = struct{}{} 535 s.storageIdlers[id] = struct{}{} 536 s.bytecodeIdlers[id] = struct{}{} 537 s.trienodeHealIdlers[id] = struct{}{} 538 s.bytecodeHealIdlers[id] = struct{}{} 539 s.lock.Unlock() 540 541 // Notify any active syncs that a new peer can be assigned data 542 s.peerJoin.Send(id) 543 return nil 544 } 545 546 // Unregister injects a new data source into the syncer's peerset. 547 func (s *Syncer) Unregister(id string) error { 548 // Remove all traces of the peer from the registry 549 s.lock.Lock() 550 if _, ok := s.peers[id]; !ok { 551 log.Error("Snap peer not registered", "id", id) 552 553 s.lock.Unlock() 554 return errors.New("not registered") 555 } 556 delete(s.peers, id) 557 s.rates.Untrack(id) 558 559 // Remove status markers, even if no sync is running 560 delete(s.statelessPeers, id) 561 562 delete(s.accountIdlers, id) 563 delete(s.storageIdlers, id) 564 delete(s.bytecodeIdlers, id) 565 delete(s.trienodeHealIdlers, id) 566 delete(s.bytecodeHealIdlers, id) 567 s.lock.Unlock() 568 569 // Notify any active syncs that pending requests need to be reverted 570 s.peerDrop.Send(id) 571 return nil 572 } 573 574 // Sync starts (or resumes a previous) sync cycle to iterate over a state trie 575 // with the given root and reconstruct the nodes based on the snapshot leaves. 576 // Previously downloaded segments will not be redownloaded of fixed, rather any 577 // errors will be healed after the leaves are fully accumulated. 578 func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { 579 // Move the trie root from any previous value, revert stateless markers for 580 // any peers and initialize the syncer if it was not yet run 581 s.lock.Lock() 582 s.root = root 583 s.healer = &healTask{ 584 scheduler: state.NewStateSync(root, s.db, s.onHealState), 585 trieTasks: make(map[string]common.Hash), 586 codeTasks: make(map[common.Hash]struct{}), 587 } 588 s.statelessPeers = make(map[string]struct{}) 589 s.lock.Unlock() 590 591 if s.startTime == (time.Time{}) { 592 s.startTime = time.Now() 593 } 594 // Retrieve the previous sync status from LevelDB and abort if already synced 595 s.loadSyncStatus() 596 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 597 log.Debug("Snapshot sync already completed") 598 return nil 599 } 600 defer func() { // Persist any progress, independent of failure 601 for _, task := range s.tasks { 602 s.forwardAccountTask(task) 603 } 604 s.cleanAccountTasks() 605 s.saveSyncStatus() 606 }() 607 608 log.Debug("Starting snapshot sync cycle", "root", root) 609 610 // Flush out the last committed raw states 611 defer func() { 612 if s.stateWriter.ValueSize() > 0 { 613 s.stateWriter.Write() 614 s.stateWriter.Reset() 615 } 616 }() 617 defer s.report(true) 618 // commit any trie- and bytecode-healing data. 619 defer s.commitHealer(true) 620 621 // Whether sync completed or not, disregard any future packets 622 defer func() { 623 log.Debug("Terminating snapshot sync cycle", "root", root) 624 s.lock.Lock() 625 s.accountReqs = make(map[uint64]*accountRequest) 626 s.storageReqs = make(map[uint64]*storageRequest) 627 s.bytecodeReqs = make(map[uint64]*bytecodeRequest) 628 s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) 629 s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) 630 s.lock.Unlock() 631 }() 632 // Keep scheduling sync tasks 633 peerJoin := make(chan string, 16) 634 peerJoinSub := s.peerJoin.Subscribe(peerJoin) 635 defer peerJoinSub.Unsubscribe() 636 637 peerDrop := make(chan string, 16) 638 peerDropSub := s.peerDrop.Subscribe(peerDrop) 639 defer peerDropSub.Unsubscribe() 640 641 // Create a set of unique channels for this sync cycle. We need these to be 642 // ephemeral so a data race doesn't accidentally deliver something stale on 643 // a persistent channel across syncs (yup, this happened) 644 var ( 645 accountReqFails = make(chan *accountRequest) 646 storageReqFails = make(chan *storageRequest) 647 bytecodeReqFails = make(chan *bytecodeRequest) 648 accountResps = make(chan *accountResponse) 649 storageResps = make(chan *storageResponse) 650 bytecodeResps = make(chan *bytecodeResponse) 651 trienodeHealReqFails = make(chan *trienodeHealRequest) 652 bytecodeHealReqFails = make(chan *bytecodeHealRequest) 653 trienodeHealResps = make(chan *trienodeHealResponse) 654 bytecodeHealResps = make(chan *bytecodeHealResponse) 655 ) 656 for { 657 // Remove all completed tasks and terminate sync if everything's done 658 s.cleanStorageTasks() 659 s.cleanAccountTasks() 660 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 661 return nil 662 } 663 // Assign all the data retrieval tasks to any free peers 664 s.assignAccountTasks(accountResps, accountReqFails, cancel) 665 s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel) 666 s.assignStorageTasks(storageResps, storageReqFails, cancel) 667 668 if len(s.tasks) == 0 { 669 // Sync phase done, run heal phase 670 s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel) 671 s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel) 672 } 673 // Update sync progress 674 s.lock.Lock() 675 s.extProgress = &SyncProgress{ 676 AccountSynced: s.accountSynced, 677 AccountBytes: s.accountBytes, 678 BytecodeSynced: s.bytecodeSynced, 679 BytecodeBytes: s.bytecodeBytes, 680 StorageSynced: s.storageSynced, 681 StorageBytes: s.storageBytes, 682 TrienodeHealSynced: s.trienodeHealSynced, 683 TrienodeHealBytes: s.trienodeHealBytes, 684 BytecodeHealSynced: s.bytecodeHealSynced, 685 BytecodeHealBytes: s.bytecodeHealBytes, 686 } 687 s.lock.Unlock() 688 // Wait for something to happen 689 select { 690 case <-s.update: 691 // Something happened (new peer, delivery, timeout), recheck tasks 692 case <-peerJoin: 693 // A new peer joined, try to schedule it new tasks 694 case id := <-peerDrop: 695 s.revertRequests(id) 696 case <-cancel: 697 return ErrCancelled 698 699 case req := <-accountReqFails: 700 s.revertAccountRequest(req) 701 case req := <-bytecodeReqFails: 702 s.revertBytecodeRequest(req) 703 case req := <-storageReqFails: 704 s.revertStorageRequest(req) 705 case req := <-trienodeHealReqFails: 706 s.revertTrienodeHealRequest(req) 707 case req := <-bytecodeHealReqFails: 708 s.revertBytecodeHealRequest(req) 709 710 case res := <-accountResps: 711 s.processAccountResponse(res) 712 case res := <-bytecodeResps: 713 s.processBytecodeResponse(res) 714 case res := <-storageResps: 715 s.processStorageResponse(res) 716 case res := <-trienodeHealResps: 717 s.processTrienodeHealResponse(res) 718 case res := <-bytecodeHealResps: 719 s.processBytecodeHealResponse(res) 720 } 721 // Report stats if something meaningful happened 722 s.report(false) 723 } 724 } 725 726 // loadSyncStatus retrieves a previously aborted sync status from the database, 727 // or generates a fresh one if none is available. 728 func (s *Syncer) loadSyncStatus() { 729 var progress SyncProgress 730 731 if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { 732 if err := json.Unmarshal(status, &progress); err != nil { 733 log.Error("Failed to decode snap sync status", "err", err) 734 } else { 735 for _, task := range progress.Tasks { 736 log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) 737 } 738 s.tasks = progress.Tasks 739 for _, task := range s.tasks { 740 task.genBatch = ethdb.HookedBatch{ 741 Batch: s.db.NewBatch(), 742 OnPut: func(key []byte, value []byte) { 743 s.accountBytes += common.StorageSize(len(key) + len(value)) 744 }, 745 } 746 task.genTrie = trie.NewStackTrie(task.genBatch) 747 748 for accountHash, subtasks := range task.SubTasks { 749 for _, subtask := range subtasks { 750 subtask.genBatch = ethdb.HookedBatch{ 751 Batch: s.db.NewBatch(), 752 OnPut: func(key []byte, value []byte) { 753 s.storageBytes += common.StorageSize(len(key) + len(value)) 754 }, 755 } 756 subtask.genTrie = trie.NewStackTrieWithOwner(subtask.genBatch, accountHash) 757 } 758 } 759 } 760 s.lock.Lock() 761 defer s.lock.Unlock() 762 763 s.snapped = len(s.tasks) == 0 764 765 s.accountSynced = progress.AccountSynced 766 s.accountBytes = progress.AccountBytes 767 s.bytecodeSynced = progress.BytecodeSynced 768 s.bytecodeBytes = progress.BytecodeBytes 769 s.storageSynced = progress.StorageSynced 770 s.storageBytes = progress.StorageBytes 771 772 s.trienodeHealSynced = progress.TrienodeHealSynced 773 s.trienodeHealBytes = progress.TrienodeHealBytes 774 s.bytecodeHealSynced = progress.BytecodeHealSynced 775 s.bytecodeHealBytes = progress.BytecodeHealBytes 776 return 777 } 778 } 779 // Either we've failed to decode the previous state, or there was none. 780 // Start a fresh sync by chunking up the account range and scheduling 781 // them for retrieval. 782 s.tasks = nil 783 s.accountSynced, s.accountBytes = 0, 0 784 s.bytecodeSynced, s.bytecodeBytes = 0, 0 785 s.storageSynced, s.storageBytes = 0, 0 786 s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 787 s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 788 789 var next common.Hash 790 step := new(big.Int).Sub( 791 new(big.Int).Div( 792 new(big.Int).Exp(common.Big2, common.Big256, nil), 793 big.NewInt(int64(accountConcurrency)), 794 ), common.Big1, 795 ) 796 for i := 0; i < accountConcurrency; i++ { 797 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 798 if i == accountConcurrency-1 { 799 // Make sure we don't overflow if the step is not a proper divisor 800 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 801 } 802 batch := ethdb.HookedBatch{ 803 Batch: s.db.NewBatch(), 804 OnPut: func(key []byte, value []byte) { 805 s.accountBytes += common.StorageSize(len(key) + len(value)) 806 }, 807 } 808 s.tasks = append(s.tasks, &accountTask{ 809 Next: next, 810 Last: last, 811 SubTasks: make(map[common.Hash][]*storageTask), 812 genBatch: batch, 813 genTrie: trie.NewStackTrie(batch), 814 }) 815 log.Debug("Created account sync task", "from", next, "last", last) 816 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 817 } 818 } 819 820 // saveSyncStatus marshals the remaining sync tasks into leveldb. 821 func (s *Syncer) saveSyncStatus() { 822 // Serialize any partial progress to disk before spinning down 823 for _, task := range s.tasks { 824 if err := task.genBatch.Write(); err != nil { 825 log.Error("Failed to persist account slots", "err", err) 826 } 827 for _, subtasks := range task.SubTasks { 828 for _, subtask := range subtasks { 829 if err := subtask.genBatch.Write(); err != nil { 830 log.Error("Failed to persist storage slots", "err", err) 831 } 832 } 833 } 834 } 835 // Store the actual progress markers 836 progress := &SyncProgress{ 837 Tasks: s.tasks, 838 AccountSynced: s.accountSynced, 839 AccountBytes: s.accountBytes, 840 BytecodeSynced: s.bytecodeSynced, 841 BytecodeBytes: s.bytecodeBytes, 842 StorageSynced: s.storageSynced, 843 StorageBytes: s.storageBytes, 844 TrienodeHealSynced: s.trienodeHealSynced, 845 TrienodeHealBytes: s.trienodeHealBytes, 846 BytecodeHealSynced: s.bytecodeHealSynced, 847 BytecodeHealBytes: s.bytecodeHealBytes, 848 } 849 status, err := json.Marshal(progress) 850 if err != nil { 851 panic(err) // This can only fail during implementation 852 } 853 rawdb.WriteSnapshotSyncStatus(s.db, status) 854 } 855 856 // Progress returns the snap sync status statistics. 857 func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { 858 s.lock.Lock() 859 defer s.lock.Unlock() 860 pending := new(SyncPending) 861 if s.healer != nil { 862 pending.TrienodeHeal = uint64(len(s.healer.trieTasks)) 863 pending.BytecodeHeal = uint64(len(s.healer.codeTasks)) 864 } 865 return s.extProgress, pending 866 } 867 868 // cleanAccountTasks removes account range retrieval tasks that have already been 869 // completed. 870 func (s *Syncer) cleanAccountTasks() { 871 // If the sync was already done before, don't even bother 872 if len(s.tasks) == 0 { 873 return 874 } 875 // Sync wasn't finished previously, check for any task that can be finalized 876 for i := 0; i < len(s.tasks); i++ { 877 if s.tasks[i].done { 878 s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) 879 i-- 880 } 881 } 882 // If everything was just finalized just, generate the account trie and start heal 883 if len(s.tasks) == 0 { 884 s.lock.Lock() 885 s.snapped = true 886 s.lock.Unlock() 887 888 // Push the final sync report 889 s.reportSyncProgress(true) 890 } 891 } 892 893 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks 894 // within, cleaning any that have been completed. 895 func (s *Syncer) cleanStorageTasks() { 896 for _, task := range s.tasks { 897 for account, subtasks := range task.SubTasks { 898 // Remove storage range retrieval tasks that completed 899 for j := 0; j < len(subtasks); j++ { 900 if subtasks[j].done { 901 subtasks = append(subtasks[:j], subtasks[j+1:]...) 902 j-- 903 } 904 } 905 if len(subtasks) > 0 { 906 task.SubTasks[account] = subtasks 907 continue 908 } 909 // If all storage chunks are done, mark the account as done too 910 for j, hash := range task.res.hashes { 911 if hash == account { 912 task.needState[j] = false 913 } 914 } 915 delete(task.SubTasks, account) 916 task.pend-- 917 918 // If this was the last pending task, forward the account task 919 if task.pend == 0 { 920 s.forwardAccountTask(task) 921 } 922 } 923 } 924 } 925 926 // assignAccountTasks attempts to match idle peers to pending account range 927 // retrievals. 928 func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) { 929 s.lock.Lock() 930 defer s.lock.Unlock() 931 932 // Sort the peers by download capacity to use faster ones if many available 933 idlers := &capacitySort{ 934 ids: make([]string, 0, len(s.accountIdlers)), 935 caps: make([]int, 0, len(s.accountIdlers)), 936 } 937 targetTTL := s.rates.TargetTimeout() 938 for id := range s.accountIdlers { 939 if _, ok := s.statelessPeers[id]; ok { 940 continue 941 } 942 idlers.ids = append(idlers.ids, id) 943 idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL)) 944 } 945 if len(idlers.ids) == 0 { 946 return 947 } 948 sort.Sort(sort.Reverse(idlers)) 949 950 // Iterate over all the tasks and try to find a pending one 951 for _, task := range s.tasks { 952 // Skip any tasks already filling 953 if task.req != nil || task.res != nil { 954 continue 955 } 956 // Task pending retrieval, try to find an idle peer. If no such peer 957 // exists, we probably assigned tasks for all (or they are stateless). 958 // Abort the entire assignment mechanism. 959 if len(idlers.ids) == 0 { 960 return 961 } 962 var ( 963 idle = idlers.ids[0] 964 peer = s.peers[idle] 965 cap = idlers.caps[0] 966 ) 967 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 968 969 // Matched a pending task to an idle peer, allocate a unique request id 970 var reqid uint64 971 for { 972 reqid = uint64(rand.Int63()) 973 if reqid == 0 { 974 continue 975 } 976 if _, ok := s.accountReqs[reqid]; ok { 977 continue 978 } 979 break 980 } 981 // Generate the network query and send it to the peer 982 req := &accountRequest{ 983 peer: idle, 984 id: reqid, 985 time: time.Now(), 986 deliver: success, 987 revert: fail, 988 cancel: cancel, 989 stale: make(chan struct{}), 990 origin: task.Next, 991 limit: task.Last, 992 task: task, 993 } 994 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 995 peer.Log().Debug("Account range request timed out", "reqid", reqid) 996 s.rates.Update(idle, AccountRangeMsg, 0, 0) 997 s.scheduleRevertAccountRequest(req) 998 }) 999 s.accountReqs[reqid] = req 1000 delete(s.accountIdlers, idle) 1001 1002 s.pend.Add(1) 1003 go func(root common.Hash) { 1004 defer s.pend.Done() 1005 1006 // Attempt to send the remote request and revert if it fails 1007 if cap > maxRequestSize { 1008 cap = maxRequestSize 1009 } 1010 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1011 cap = minRequestSize 1012 } 1013 if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil { 1014 peer.Log().Debug("Failed to request account range", "err", err) 1015 s.scheduleRevertAccountRequest(req) 1016 } 1017 }(s.root) 1018 1019 // Inject the request into the task to block further assignments 1020 task.req = req 1021 } 1022 } 1023 1024 // assignBytecodeTasks attempts to match idle peers to pending code retrievals. 1025 func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) { 1026 s.lock.Lock() 1027 defer s.lock.Unlock() 1028 1029 // Sort the peers by download capacity to use faster ones if many available 1030 idlers := &capacitySort{ 1031 ids: make([]string, 0, len(s.bytecodeIdlers)), 1032 caps: make([]int, 0, len(s.bytecodeIdlers)), 1033 } 1034 targetTTL := s.rates.TargetTimeout() 1035 for id := range s.bytecodeIdlers { 1036 if _, ok := s.statelessPeers[id]; ok { 1037 continue 1038 } 1039 idlers.ids = append(idlers.ids, id) 1040 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1041 } 1042 if len(idlers.ids) == 0 { 1043 return 1044 } 1045 sort.Sort(sort.Reverse(idlers)) 1046 1047 // Iterate over all the tasks and try to find a pending one 1048 for _, task := range s.tasks { 1049 // Skip any tasks not in the bytecode retrieval phase 1050 if task.res == nil { 1051 continue 1052 } 1053 // Skip tasks that are already retrieving (or done with) all codes 1054 if len(task.codeTasks) == 0 { 1055 continue 1056 } 1057 // Task pending retrieval, try to find an idle peer. If no such peer 1058 // exists, we probably assigned tasks for all (or they are stateless). 1059 // Abort the entire assignment mechanism. 1060 if len(idlers.ids) == 0 { 1061 return 1062 } 1063 var ( 1064 idle = idlers.ids[0] 1065 peer = s.peers[idle] 1066 cap = idlers.caps[0] 1067 ) 1068 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1069 1070 // Matched a pending task to an idle peer, allocate a unique request id 1071 var reqid uint64 1072 for { 1073 reqid = uint64(rand.Int63()) 1074 if reqid == 0 { 1075 continue 1076 } 1077 if _, ok := s.bytecodeReqs[reqid]; ok { 1078 continue 1079 } 1080 break 1081 } 1082 // Generate the network query and send it to the peer 1083 if cap > maxCodeRequestCount { 1084 cap = maxCodeRequestCount 1085 } 1086 hashes := make([]common.Hash, 0, cap) 1087 for hash := range task.codeTasks { 1088 delete(task.codeTasks, hash) 1089 hashes = append(hashes, hash) 1090 if len(hashes) >= cap { 1091 break 1092 } 1093 } 1094 req := &bytecodeRequest{ 1095 peer: idle, 1096 id: reqid, 1097 time: time.Now(), 1098 deliver: success, 1099 revert: fail, 1100 cancel: cancel, 1101 stale: make(chan struct{}), 1102 hashes: hashes, 1103 task: task, 1104 } 1105 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1106 peer.Log().Debug("Bytecode request timed out", "reqid", reqid) 1107 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1108 s.scheduleRevertBytecodeRequest(req) 1109 }) 1110 s.bytecodeReqs[reqid] = req 1111 delete(s.bytecodeIdlers, idle) 1112 1113 s.pend.Add(1) 1114 go func() { 1115 defer s.pend.Done() 1116 1117 // Attempt to send the remote request and revert if it fails 1118 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1119 log.Debug("Failed to request bytecodes", "err", err) 1120 s.scheduleRevertBytecodeRequest(req) 1121 } 1122 }() 1123 } 1124 } 1125 1126 // assignStorageTasks attempts to match idle peers to pending storage range 1127 // retrievals. 1128 func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) { 1129 s.lock.Lock() 1130 defer s.lock.Unlock() 1131 1132 // Sort the peers by download capacity to use faster ones if many available 1133 idlers := &capacitySort{ 1134 ids: make([]string, 0, len(s.storageIdlers)), 1135 caps: make([]int, 0, len(s.storageIdlers)), 1136 } 1137 targetTTL := s.rates.TargetTimeout() 1138 for id := range s.storageIdlers { 1139 if _, ok := s.statelessPeers[id]; ok { 1140 continue 1141 } 1142 idlers.ids = append(idlers.ids, id) 1143 idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL)) 1144 } 1145 if len(idlers.ids) == 0 { 1146 return 1147 } 1148 sort.Sort(sort.Reverse(idlers)) 1149 1150 // Iterate over all the tasks and try to find a pending one 1151 for _, task := range s.tasks { 1152 // Skip any tasks not in the storage retrieval phase 1153 if task.res == nil { 1154 continue 1155 } 1156 // Skip tasks that are already retrieving (or done with) all small states 1157 if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { 1158 continue 1159 } 1160 // Task pending retrieval, try to find an idle peer. If no such peer 1161 // exists, we probably assigned tasks for all (or they are stateless). 1162 // Abort the entire assignment mechanism. 1163 if len(idlers.ids) == 0 { 1164 return 1165 } 1166 var ( 1167 idle = idlers.ids[0] 1168 peer = s.peers[idle] 1169 cap = idlers.caps[0] 1170 ) 1171 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1172 1173 // Matched a pending task to an idle peer, allocate a unique request id 1174 var reqid uint64 1175 for { 1176 reqid = uint64(rand.Int63()) 1177 if reqid == 0 { 1178 continue 1179 } 1180 if _, ok := s.storageReqs[reqid]; ok { 1181 continue 1182 } 1183 break 1184 } 1185 // Generate the network query and send it to the peer. If there are 1186 // large contract tasks pending, complete those before diving into 1187 // even more new contracts. 1188 if cap > maxRequestSize { 1189 cap = maxRequestSize 1190 } 1191 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1192 cap = minRequestSize 1193 } 1194 storageSets := cap / 1024 1195 1196 var ( 1197 accounts = make([]common.Hash, 0, storageSets) 1198 roots = make([]common.Hash, 0, storageSets) 1199 subtask *storageTask 1200 ) 1201 for account, subtasks := range task.SubTasks { 1202 for _, st := range subtasks { 1203 // Skip any subtasks already filling 1204 if st.req != nil { 1205 continue 1206 } 1207 // Found an incomplete storage chunk, schedule it 1208 accounts = append(accounts, account) 1209 roots = append(roots, st.root) 1210 subtask = st 1211 break // Large contract chunks are downloaded individually 1212 } 1213 if subtask != nil { 1214 break // Large contract chunks are downloaded individually 1215 } 1216 } 1217 if subtask == nil { 1218 // No large contract required retrieval, but small ones available 1219 for account, root := range task.stateTasks { 1220 delete(task.stateTasks, account) 1221 1222 accounts = append(accounts, account) 1223 roots = append(roots, root) 1224 1225 if len(accounts) >= storageSets { 1226 break 1227 } 1228 } 1229 } 1230 // If nothing was found, it means this task is actually already fully 1231 // retrieving, but large contracts are hard to detect. Skip to the next. 1232 if len(accounts) == 0 { 1233 continue 1234 } 1235 req := &storageRequest{ 1236 peer: idle, 1237 id: reqid, 1238 time: time.Now(), 1239 deliver: success, 1240 revert: fail, 1241 cancel: cancel, 1242 stale: make(chan struct{}), 1243 accounts: accounts, 1244 roots: roots, 1245 mainTask: task, 1246 subTask: subtask, 1247 } 1248 if subtask != nil { 1249 req.origin = subtask.Next 1250 req.limit = subtask.Last 1251 } 1252 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1253 peer.Log().Debug("Storage request timed out", "reqid", reqid) 1254 s.rates.Update(idle, StorageRangesMsg, 0, 0) 1255 s.scheduleRevertStorageRequest(req) 1256 }) 1257 s.storageReqs[reqid] = req 1258 delete(s.storageIdlers, idle) 1259 1260 s.pend.Add(1) 1261 go func(root common.Hash) { 1262 defer s.pend.Done() 1263 1264 // Attempt to send the remote request and revert if it fails 1265 var origin, limit []byte 1266 if subtask != nil { 1267 origin, limit = req.origin[:], req.limit[:] 1268 } 1269 if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil { 1270 log.Debug("Failed to request storage", "err", err) 1271 s.scheduleRevertStorageRequest(req) 1272 } 1273 }(s.root) 1274 1275 // Inject the request into the subtask to block further assignments 1276 if subtask != nil { 1277 subtask.req = req 1278 } 1279 } 1280 } 1281 1282 // assignTrienodeHealTasks attempts to match idle peers to trie node requests to 1283 // heal any trie errors caused by the snap sync's chunked retrieval model. 1284 func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { 1285 s.lock.Lock() 1286 defer s.lock.Unlock() 1287 1288 // Sort the peers by download capacity to use faster ones if many available 1289 idlers := &capacitySort{ 1290 ids: make([]string, 0, len(s.trienodeHealIdlers)), 1291 caps: make([]int, 0, len(s.trienodeHealIdlers)), 1292 } 1293 targetTTL := s.rates.TargetTimeout() 1294 for id := range s.trienodeHealIdlers { 1295 if _, ok := s.statelessPeers[id]; ok { 1296 continue 1297 } 1298 idlers.ids = append(idlers.ids, id) 1299 idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL)) 1300 } 1301 if len(idlers.ids) == 0 { 1302 return 1303 } 1304 sort.Sort(sort.Reverse(idlers)) 1305 1306 // Iterate over pending tasks and try to find a peer to retrieve with 1307 for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1308 // If there are not enough trie tasks queued to fully assign, fill the 1309 // queue from the state sync scheduler. The trie synced schedules these 1310 // together with bytecodes, so we need to queue them combined. 1311 var ( 1312 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1313 want = maxTrieRequestCount + maxCodeRequestCount 1314 ) 1315 if have < want { 1316 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1317 for i, path := range paths { 1318 s.healer.trieTasks[path] = hashes[i] 1319 } 1320 for _, hash := range codes { 1321 s.healer.codeTasks[hash] = struct{}{} 1322 } 1323 } 1324 // If all the heal tasks are bytecodes or already downloading, bail 1325 if len(s.healer.trieTasks) == 0 { 1326 return 1327 } 1328 // Task pending retrieval, try to find an idle peer. If no such peer 1329 // exists, we probably assigned tasks for all (or they are stateless). 1330 // Abort the entire assignment mechanism. 1331 if len(idlers.ids) == 0 { 1332 return 1333 } 1334 var ( 1335 idle = idlers.ids[0] 1336 peer = s.peers[idle] 1337 cap = idlers.caps[0] 1338 ) 1339 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1340 1341 // Matched a pending task to an idle peer, allocate a unique request id 1342 var reqid uint64 1343 for { 1344 reqid = uint64(rand.Int63()) 1345 if reqid == 0 { 1346 continue 1347 } 1348 if _, ok := s.trienodeHealReqs[reqid]; ok { 1349 continue 1350 } 1351 break 1352 } 1353 // Generate the network query and send it to the peer 1354 if cap > maxTrieRequestCount { 1355 cap = maxTrieRequestCount 1356 } 1357 cap = int(float64(cap) / s.trienodeHealThrottle) 1358 if cap <= 0 { 1359 cap = 1 1360 } 1361 var ( 1362 hashes = make([]common.Hash, 0, cap) 1363 paths = make([]string, 0, cap) 1364 pathsets = make([]TrieNodePathSet, 0, cap) 1365 ) 1366 for path, hash := range s.healer.trieTasks { 1367 delete(s.healer.trieTasks, path) 1368 1369 paths = append(paths, path) 1370 hashes = append(hashes, hash) 1371 if len(paths) >= cap { 1372 break 1373 } 1374 } 1375 // Group requests by account hash 1376 paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) 1377 req := &trienodeHealRequest{ 1378 peer: idle, 1379 id: reqid, 1380 time: time.Now(), 1381 deliver: success, 1382 revert: fail, 1383 cancel: cancel, 1384 stale: make(chan struct{}), 1385 paths: paths, 1386 hashes: hashes, 1387 task: s.healer, 1388 } 1389 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1390 peer.Log().Debug("Trienode heal request timed out", "reqid", reqid) 1391 s.rates.Update(idle, TrieNodesMsg, 0, 0) 1392 s.scheduleRevertTrienodeHealRequest(req) 1393 }) 1394 s.trienodeHealReqs[reqid] = req 1395 delete(s.trienodeHealIdlers, idle) 1396 1397 s.pend.Add(1) 1398 go func(root common.Hash) { 1399 defer s.pend.Done() 1400 1401 // Attempt to send the remote request and revert if it fails 1402 if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil { 1403 log.Debug("Failed to request trienode healers", "err", err) 1404 s.scheduleRevertTrienodeHealRequest(req) 1405 } 1406 }(s.root) 1407 } 1408 } 1409 1410 // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to 1411 // heal any trie errors caused by the snap sync's chunked retrieval model. 1412 func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { 1413 s.lock.Lock() 1414 defer s.lock.Unlock() 1415 1416 // Sort the peers by download capacity to use faster ones if many available 1417 idlers := &capacitySort{ 1418 ids: make([]string, 0, len(s.bytecodeHealIdlers)), 1419 caps: make([]int, 0, len(s.bytecodeHealIdlers)), 1420 } 1421 targetTTL := s.rates.TargetTimeout() 1422 for id := range s.bytecodeHealIdlers { 1423 if _, ok := s.statelessPeers[id]; ok { 1424 continue 1425 } 1426 idlers.ids = append(idlers.ids, id) 1427 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1428 } 1429 if len(idlers.ids) == 0 { 1430 return 1431 } 1432 sort.Sort(sort.Reverse(idlers)) 1433 1434 // Iterate over pending tasks and try to find a peer to retrieve with 1435 for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1436 // If there are not enough trie tasks queued to fully assign, fill the 1437 // queue from the state sync scheduler. The trie synced schedules these 1438 // together with trie nodes, so we need to queue them combined. 1439 var ( 1440 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1441 want = maxTrieRequestCount + maxCodeRequestCount 1442 ) 1443 if have < want { 1444 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1445 for i, path := range paths { 1446 s.healer.trieTasks[path] = hashes[i] 1447 } 1448 for _, hash := range codes { 1449 s.healer.codeTasks[hash] = struct{}{} 1450 } 1451 } 1452 // If all the heal tasks are trienodes or already downloading, bail 1453 if len(s.healer.codeTasks) == 0 { 1454 return 1455 } 1456 // Task pending retrieval, try to find an idle peer. If no such peer 1457 // exists, we probably assigned tasks for all (or they are stateless). 1458 // Abort the entire assignment mechanism. 1459 if len(idlers.ids) == 0 { 1460 return 1461 } 1462 var ( 1463 idle = idlers.ids[0] 1464 peer = s.peers[idle] 1465 cap = idlers.caps[0] 1466 ) 1467 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1468 1469 // Matched a pending task to an idle peer, allocate a unique request id 1470 var reqid uint64 1471 for { 1472 reqid = uint64(rand.Int63()) 1473 if reqid == 0 { 1474 continue 1475 } 1476 if _, ok := s.bytecodeHealReqs[reqid]; ok { 1477 continue 1478 } 1479 break 1480 } 1481 // Generate the network query and send it to the peer 1482 if cap > maxCodeRequestCount { 1483 cap = maxCodeRequestCount 1484 } 1485 hashes := make([]common.Hash, 0, cap) 1486 for hash := range s.healer.codeTasks { 1487 delete(s.healer.codeTasks, hash) 1488 1489 hashes = append(hashes, hash) 1490 if len(hashes) >= cap { 1491 break 1492 } 1493 } 1494 req := &bytecodeHealRequest{ 1495 peer: idle, 1496 id: reqid, 1497 time: time.Now(), 1498 deliver: success, 1499 revert: fail, 1500 cancel: cancel, 1501 stale: make(chan struct{}), 1502 hashes: hashes, 1503 task: s.healer, 1504 } 1505 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1506 peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid) 1507 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1508 s.scheduleRevertBytecodeHealRequest(req) 1509 }) 1510 s.bytecodeHealReqs[reqid] = req 1511 delete(s.bytecodeHealIdlers, idle) 1512 1513 s.pend.Add(1) 1514 go func() { 1515 defer s.pend.Done() 1516 1517 // Attempt to send the remote request and revert if it fails 1518 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1519 log.Debug("Failed to request bytecode healers", "err", err) 1520 s.scheduleRevertBytecodeHealRequest(req) 1521 } 1522 }() 1523 } 1524 } 1525 1526 // revertRequests locates all the currently pending requests from a particular 1527 // peer and reverts them, rescheduling for others to fulfill. 1528 func (s *Syncer) revertRequests(peer string) { 1529 // Gather the requests first, revertals need the lock too 1530 s.lock.Lock() 1531 var accountReqs []*accountRequest 1532 for _, req := range s.accountReqs { 1533 if req.peer == peer { 1534 accountReqs = append(accountReqs, req) 1535 } 1536 } 1537 var bytecodeReqs []*bytecodeRequest 1538 for _, req := range s.bytecodeReqs { 1539 if req.peer == peer { 1540 bytecodeReqs = append(bytecodeReqs, req) 1541 } 1542 } 1543 var storageReqs []*storageRequest 1544 for _, req := range s.storageReqs { 1545 if req.peer == peer { 1546 storageReqs = append(storageReqs, req) 1547 } 1548 } 1549 var trienodeHealReqs []*trienodeHealRequest 1550 for _, req := range s.trienodeHealReqs { 1551 if req.peer == peer { 1552 trienodeHealReqs = append(trienodeHealReqs, req) 1553 } 1554 } 1555 var bytecodeHealReqs []*bytecodeHealRequest 1556 for _, req := range s.bytecodeHealReqs { 1557 if req.peer == peer { 1558 bytecodeHealReqs = append(bytecodeHealReqs, req) 1559 } 1560 } 1561 s.lock.Unlock() 1562 1563 // Revert all the requests matching the peer 1564 for _, req := range accountReqs { 1565 s.revertAccountRequest(req) 1566 } 1567 for _, req := range bytecodeReqs { 1568 s.revertBytecodeRequest(req) 1569 } 1570 for _, req := range storageReqs { 1571 s.revertStorageRequest(req) 1572 } 1573 for _, req := range trienodeHealReqs { 1574 s.revertTrienodeHealRequest(req) 1575 } 1576 for _, req := range bytecodeHealReqs { 1577 s.revertBytecodeHealRequest(req) 1578 } 1579 } 1580 1581 // scheduleRevertAccountRequest asks the event loop to clean up an account range 1582 // request and return all failed retrieval tasks to the scheduler for reassignment. 1583 func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { 1584 select { 1585 case req.revert <- req: 1586 // Sync event loop notified 1587 case <-req.cancel: 1588 // Sync cycle got cancelled 1589 case <-req.stale: 1590 // Request already reverted 1591 } 1592 } 1593 1594 // revertAccountRequest cleans up an account range request and returns all failed 1595 // retrieval tasks to the scheduler for reassignment. 1596 // 1597 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1598 // On peer threads, use scheduleRevertAccountRequest. 1599 func (s *Syncer) revertAccountRequest(req *accountRequest) { 1600 log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) 1601 select { 1602 case <-req.stale: 1603 log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id) 1604 return 1605 default: 1606 } 1607 close(req.stale) 1608 1609 // Remove the request from the tracked set 1610 s.lock.Lock() 1611 delete(s.accountReqs, req.id) 1612 s.lock.Unlock() 1613 1614 // If there's a timeout timer still running, abort it and mark the account 1615 // task as not-pending, ready for rescheduling 1616 req.timeout.Stop() 1617 if req.task.req == req { 1618 req.task.req = nil 1619 } 1620 } 1621 1622 // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request 1623 // and return all failed retrieval tasks to the scheduler for reassignment. 1624 func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { 1625 select { 1626 case req.revert <- req: 1627 // Sync event loop notified 1628 case <-req.cancel: 1629 // Sync cycle got cancelled 1630 case <-req.stale: 1631 // Request already reverted 1632 } 1633 } 1634 1635 // revertBytecodeRequest cleans up a bytecode request and returns all failed 1636 // retrieval tasks to the scheduler for reassignment. 1637 // 1638 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1639 // On peer threads, use scheduleRevertBytecodeRequest. 1640 func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { 1641 log.Debug("Reverting bytecode request", "peer", req.peer) 1642 select { 1643 case <-req.stale: 1644 log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id) 1645 return 1646 default: 1647 } 1648 close(req.stale) 1649 1650 // Remove the request from the tracked set 1651 s.lock.Lock() 1652 delete(s.bytecodeReqs, req.id) 1653 s.lock.Unlock() 1654 1655 // If there's a timeout timer still running, abort it and mark the code 1656 // retrievals as not-pending, ready for rescheduling 1657 req.timeout.Stop() 1658 for _, hash := range req.hashes { 1659 req.task.codeTasks[hash] = struct{}{} 1660 } 1661 } 1662 1663 // scheduleRevertStorageRequest asks the event loop to clean up a storage range 1664 // request and return all failed retrieval tasks to the scheduler for reassignment. 1665 func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { 1666 select { 1667 case req.revert <- req: 1668 // Sync event loop notified 1669 case <-req.cancel: 1670 // Sync cycle got cancelled 1671 case <-req.stale: 1672 // Request already reverted 1673 } 1674 } 1675 1676 // revertStorageRequest cleans up a storage range request and returns all failed 1677 // retrieval tasks to the scheduler for reassignment. 1678 // 1679 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1680 // On peer threads, use scheduleRevertStorageRequest. 1681 func (s *Syncer) revertStorageRequest(req *storageRequest) { 1682 log.Debug("Reverting storage request", "peer", req.peer) 1683 select { 1684 case <-req.stale: 1685 log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id) 1686 return 1687 default: 1688 } 1689 close(req.stale) 1690 1691 // Remove the request from the tracked set 1692 s.lock.Lock() 1693 delete(s.storageReqs, req.id) 1694 s.lock.Unlock() 1695 1696 // If there's a timeout timer still running, abort it and mark the storage 1697 // task as not-pending, ready for rescheduling 1698 req.timeout.Stop() 1699 if req.subTask != nil { 1700 req.subTask.req = nil 1701 } else { 1702 for i, account := range req.accounts { 1703 req.mainTask.stateTasks[account] = req.roots[i] 1704 } 1705 } 1706 } 1707 1708 // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal 1709 // request and return all failed retrieval tasks to the scheduler for reassignment. 1710 func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { 1711 select { 1712 case req.revert <- req: 1713 // Sync event loop notified 1714 case <-req.cancel: 1715 // Sync cycle got cancelled 1716 case <-req.stale: 1717 // Request already reverted 1718 } 1719 } 1720 1721 // revertTrienodeHealRequest cleans up a trienode heal request and returns all 1722 // failed retrieval tasks to the scheduler for reassignment. 1723 // 1724 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1725 // On peer threads, use scheduleRevertTrienodeHealRequest. 1726 func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { 1727 log.Debug("Reverting trienode heal request", "peer", req.peer) 1728 select { 1729 case <-req.stale: 1730 log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) 1731 return 1732 default: 1733 } 1734 close(req.stale) 1735 1736 // Remove the request from the tracked set 1737 s.lock.Lock() 1738 delete(s.trienodeHealReqs, req.id) 1739 s.lock.Unlock() 1740 1741 // If there's a timeout timer still running, abort it and mark the trie node 1742 // retrievals as not-pending, ready for rescheduling 1743 req.timeout.Stop() 1744 for i, path := range req.paths { 1745 req.task.trieTasks[path] = req.hashes[i] 1746 } 1747 } 1748 1749 // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal 1750 // request and return all failed retrieval tasks to the scheduler for reassignment. 1751 func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { 1752 select { 1753 case req.revert <- req: 1754 // Sync event loop notified 1755 case <-req.cancel: 1756 // Sync cycle got cancelled 1757 case <-req.stale: 1758 // Request already reverted 1759 } 1760 } 1761 1762 // revertBytecodeHealRequest cleans up a bytecode heal request and returns all 1763 // failed retrieval tasks to the scheduler for reassignment. 1764 // 1765 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1766 // On peer threads, use scheduleRevertBytecodeHealRequest. 1767 func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { 1768 log.Debug("Reverting bytecode heal request", "peer", req.peer) 1769 select { 1770 case <-req.stale: 1771 log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) 1772 return 1773 default: 1774 } 1775 close(req.stale) 1776 1777 // Remove the request from the tracked set 1778 s.lock.Lock() 1779 delete(s.bytecodeHealReqs, req.id) 1780 s.lock.Unlock() 1781 1782 // If there's a timeout timer still running, abort it and mark the code 1783 // retrievals as not-pending, ready for rescheduling 1784 req.timeout.Stop() 1785 for _, hash := range req.hashes { 1786 req.task.codeTasks[hash] = struct{}{} 1787 } 1788 } 1789 1790 // processAccountResponse integrates an already validated account range response 1791 // into the account tasks. 1792 func (s *Syncer) processAccountResponse(res *accountResponse) { 1793 // Switch the task from pending to filling 1794 res.task.req = nil 1795 res.task.res = res 1796 1797 // Ensure that the response doesn't overflow into the subsequent task 1798 last := res.task.Last.Big() 1799 for i, hash := range res.hashes { 1800 // Mark the range complete if the last is already included. 1801 // Keep iteration to delete the extra states if exists. 1802 cmp := hash.Big().Cmp(last) 1803 if cmp == 0 { 1804 res.cont = false 1805 continue 1806 } 1807 if cmp > 0 { 1808 // Chunk overflown, cut off excess 1809 res.hashes = res.hashes[:i] 1810 res.accounts = res.accounts[:i] 1811 res.cont = false // Mark range completed 1812 break 1813 } 1814 } 1815 // Iterate over all the accounts and assemble which ones need further sub- 1816 // filling before the entire account range can be persisted. 1817 res.task.needCode = make([]bool, len(res.accounts)) 1818 res.task.needState = make([]bool, len(res.accounts)) 1819 res.task.needHeal = make([]bool, len(res.accounts)) 1820 1821 res.task.codeTasks = make(map[common.Hash]struct{}) 1822 res.task.stateTasks = make(map[common.Hash]common.Hash) 1823 1824 resumed := make(map[common.Hash]struct{}) 1825 1826 res.task.pend = 0 1827 for i, account := range res.accounts { 1828 // Check if the account is a contract with an unknown code 1829 if !bytes.Equal(account.CodeHash, emptyCode[:]) { 1830 if !rawdb.HasCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)) { 1831 res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} 1832 res.task.needCode[i] = true 1833 res.task.pend++ 1834 } 1835 } 1836 // Check if the account is a contract with an unknown storage trie 1837 if account.Root != emptyRoot { 1838 if ok, err := s.db.Has(account.Root[:]); err != nil || !ok { 1839 // If there was a previous large state retrieval in progress, 1840 // don't restart it from scratch. This happens if a sync cycle 1841 // is interrupted and resumed later. However, *do* update the 1842 // previous root hash. 1843 if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok { 1844 log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root) 1845 for _, subtask := range subtasks { 1846 subtask.root = account.Root 1847 } 1848 res.task.needHeal[i] = true 1849 resumed[res.hashes[i]] = struct{}{} 1850 } else { 1851 res.task.stateTasks[res.hashes[i]] = account.Root 1852 } 1853 res.task.needState[i] = true 1854 res.task.pend++ 1855 } 1856 } 1857 } 1858 // Delete any subtasks that have been aborted but not resumed. This may undo 1859 // some progress if a new peer gives us less accounts than an old one, but for 1860 // now we have to live with that. 1861 for hash := range res.task.SubTasks { 1862 if _, ok := resumed[hash]; !ok { 1863 log.Debug("Aborting suspended storage retrieval", "account", hash) 1864 delete(res.task.SubTasks, hash) 1865 } 1866 } 1867 // If the account range contained no contracts, or all have been fully filled 1868 // beforehand, short circuit storage filling and forward to the next task 1869 if res.task.pend == 0 { 1870 s.forwardAccountTask(res.task) 1871 return 1872 } 1873 // Some accounts are incomplete, leave as is for the storage and contract 1874 // task assigners to pick up and fill. 1875 } 1876 1877 // processBytecodeResponse integrates an already validated bytecode response 1878 // into the account tasks. 1879 func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { 1880 batch := s.db.NewBatch() 1881 1882 var ( 1883 codes uint64 1884 ) 1885 for i, hash := range res.hashes { 1886 code := res.codes[i] 1887 1888 // If the bytecode was not delivered, reschedule it 1889 if code == nil { 1890 res.task.codeTasks[hash] = struct{}{} 1891 continue 1892 } 1893 // Code was delivered, mark it not needed any more 1894 for j, account := range res.task.res.accounts { 1895 if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { 1896 res.task.needCode[j] = false 1897 res.task.pend-- 1898 } 1899 } 1900 // Push the bytecode into a database batch 1901 codes++ 1902 rawdb.WriteCode(batch, hash, code) 1903 } 1904 bytes := common.StorageSize(batch.ValueSize()) 1905 if err := batch.Write(); err != nil { 1906 log.Crit("Failed to persist bytecodes", "err", err) 1907 } 1908 s.bytecodeSynced += codes 1909 s.bytecodeBytes += bytes 1910 1911 log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes) 1912 1913 // If this delivery completed the last pending task, forward the account task 1914 // to the next chunk 1915 if res.task.pend == 0 { 1916 s.forwardAccountTask(res.task) 1917 return 1918 } 1919 // Some accounts are still incomplete, leave as is for the storage and contract 1920 // task assigners to pick up and fill. 1921 } 1922 1923 // processStorageResponse integrates an already validated storage response 1924 // into the account tasks. 1925 func (s *Syncer) processStorageResponse(res *storageResponse) { 1926 // Switch the subtask from pending to idle 1927 if res.subTask != nil { 1928 res.subTask.req = nil 1929 } 1930 batch := ethdb.HookedBatch{ 1931 Batch: s.db.NewBatch(), 1932 OnPut: func(key []byte, value []byte) { 1933 s.storageBytes += common.StorageSize(len(key) + len(value)) 1934 }, 1935 } 1936 var ( 1937 slots int 1938 oldStorageBytes = s.storageBytes 1939 ) 1940 // Iterate over all the accounts and reconstruct their storage tries from the 1941 // delivered slots 1942 for i, account := range res.accounts { 1943 // If the account was not delivered, reschedule it 1944 if i >= len(res.hashes) { 1945 res.mainTask.stateTasks[account] = res.roots[i] 1946 continue 1947 } 1948 // State was delivered, if complete mark as not needed any more, otherwise 1949 // mark the account as needing healing 1950 for j, hash := range res.mainTask.res.hashes { 1951 if account != hash { 1952 continue 1953 } 1954 acc := res.mainTask.res.accounts[j] 1955 1956 // If the packet contains multiple contract storage slots, all 1957 // but the last are surely complete. The last contract may be 1958 // chunked, so check it's continuation flag. 1959 if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { 1960 res.mainTask.needState[j] = false 1961 res.mainTask.pend-- 1962 } 1963 // If the last contract was chunked, mark it as needing healing 1964 // to avoid writing it out to disk prematurely. 1965 if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { 1966 res.mainTask.needHeal[j] = true 1967 } 1968 // If the last contract was chunked, we need to switch to large 1969 // contract handling mode 1970 if res.subTask == nil && i == len(res.hashes)-1 && res.cont { 1971 // If we haven't yet started a large-contract retrieval, create 1972 // the subtasks for it within the main account task 1973 if tasks, ok := res.mainTask.SubTasks[account]; !ok { 1974 var ( 1975 keys = res.hashes[i] 1976 chunks = uint64(storageConcurrency) 1977 lastKey common.Hash 1978 ) 1979 if len(keys) > 0 { 1980 lastKey = keys[len(keys)-1] 1981 } 1982 // If the number of slots remaining is low, decrease the 1983 // number of chunks. Somewhere on the order of 10-15K slots 1984 // fit into a packet of 500KB. A key/slot pair is maximum 64 1985 // bytes, so pessimistically maxRequestSize/64 = 8K. 1986 // 1987 // Chunk so that at least 2 packets are needed to fill a task. 1988 if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil { 1989 if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks { 1990 chunks = n + 1 1991 } 1992 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks) 1993 } else { 1994 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks) 1995 } 1996 r := newHashRange(lastKey, chunks) 1997 1998 // Our first task is the one that was just filled by this response. 1999 batch := ethdb.HookedBatch{ 2000 Batch: s.db.NewBatch(), 2001 OnPut: func(key []byte, value []byte) { 2002 s.storageBytes += common.StorageSize(len(key) + len(value)) 2003 }, 2004 } 2005 tasks = append(tasks, &storageTask{ 2006 Next: common.Hash{}, 2007 Last: r.End(), 2008 root: acc.Root, 2009 genBatch: batch, 2010 genTrie: trie.NewStackTrieWithOwner(batch, account), 2011 }) 2012 for r.Next() { 2013 batch := ethdb.HookedBatch{ 2014 Batch: s.db.NewBatch(), 2015 OnPut: func(key []byte, value []byte) { 2016 s.storageBytes += common.StorageSize(len(key) + len(value)) 2017 }, 2018 } 2019 tasks = append(tasks, &storageTask{ 2020 Next: r.Start(), 2021 Last: r.End(), 2022 root: acc.Root, 2023 genBatch: batch, 2024 genTrie: trie.NewStackTrieWithOwner(batch, account), 2025 }) 2026 } 2027 for _, task := range tasks { 2028 log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", task.Next, "last", task.Last) 2029 } 2030 res.mainTask.SubTasks[account] = tasks 2031 2032 // Since we've just created the sub-tasks, this response 2033 // is surely for the first one (zero origin) 2034 res.subTask = tasks[0] 2035 } 2036 } 2037 // If we're in large contract delivery mode, forward the subtask 2038 if res.subTask != nil { 2039 // Ensure the response doesn't overflow into the subsequent task 2040 last := res.subTask.Last.Big() 2041 // Find the first overflowing key. While at it, mark res as complete 2042 // if we find the range to include or pass the 'last' 2043 index := sort.Search(len(res.hashes[i]), func(k int) bool { 2044 cmp := res.hashes[i][k].Big().Cmp(last) 2045 if cmp >= 0 { 2046 res.cont = false 2047 } 2048 return cmp > 0 2049 }) 2050 if index >= 0 { 2051 // cut off excess 2052 res.hashes[i] = res.hashes[i][:index] 2053 res.slots[i] = res.slots[i][:index] 2054 } 2055 // Forward the relevant storage chunk (even if created just now) 2056 if res.cont { 2057 res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1]) 2058 } else { 2059 res.subTask.done = true 2060 } 2061 } 2062 } 2063 // Iterate over all the complete contracts, reconstruct the trie nodes and 2064 // push them to disk. If the contract is chunked, the trie nodes will be 2065 // reconstructed later. 2066 slots += len(res.hashes[i]) 2067 2068 if i < len(res.hashes)-1 || res.subTask == nil { 2069 tr := trie.NewStackTrieWithOwner(batch, account) 2070 for j := 0; j < len(res.hashes[i]); j++ { 2071 tr.Update(res.hashes[i][j][:], res.slots[i][j]) 2072 } 2073 tr.Commit() 2074 } 2075 // Persist the received storage segments. These flat state maybe 2076 // outdated during the sync, but it can be fixed later during the 2077 // snapshot generation. 2078 for j := 0; j < len(res.hashes[i]); j++ { 2079 rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) 2080 2081 // If we're storing large contracts, generate the trie nodes 2082 // on the fly to not trash the gluing points 2083 if i == len(res.hashes)-1 && res.subTask != nil { 2084 res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j]) 2085 } 2086 } 2087 } 2088 // Large contracts could have generated new trie nodes, flush them to disk 2089 if res.subTask != nil { 2090 if res.subTask.done { 2091 if root, err := res.subTask.genTrie.Commit(); err != nil { 2092 log.Error("Failed to commit stack slots", "err", err) 2093 } else if root == res.subTask.root { 2094 // If the chunk's root is an overflown but full delivery, clear the heal request 2095 for i, account := range res.mainTask.res.hashes { 2096 if account == res.accounts[len(res.accounts)-1] { 2097 res.mainTask.needHeal[i] = false 2098 } 2099 } 2100 } 2101 } 2102 if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || res.subTask.done { 2103 if err := res.subTask.genBatch.Write(); err != nil { 2104 log.Error("Failed to persist stack slots", "err", err) 2105 } 2106 res.subTask.genBatch.Reset() 2107 } 2108 } 2109 // Flush anything written just now and update the stats 2110 if err := batch.Write(); err != nil { 2111 log.Crit("Failed to persist storage slots", "err", err) 2112 } 2113 s.storageSynced += uint64(slots) 2114 2115 log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes) 2116 2117 // If this delivery completed the last pending task, forward the account task 2118 // to the next chunk 2119 if res.mainTask.pend == 0 { 2120 s.forwardAccountTask(res.mainTask) 2121 return 2122 } 2123 // Some accounts are still incomplete, leave as is for the storage and contract 2124 // task assigners to pick up and fill. 2125 } 2126 2127 // processTrienodeHealResponse integrates an already validated trienode response 2128 // into the healer tasks. 2129 func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { 2130 var ( 2131 start = time.Now() 2132 fills int 2133 ) 2134 for i, hash := range res.hashes { 2135 node := res.nodes[i] 2136 2137 // If the trie node was not delivered, reschedule it 2138 if node == nil { 2139 res.task.trieTasks[res.paths[i]] = res.hashes[i] 2140 continue 2141 } 2142 fills++ 2143 2144 // Push the trie node into the state syncer 2145 s.trienodeHealSynced++ 2146 s.trienodeHealBytes += common.StorageSize(len(node)) 2147 2148 err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node}) 2149 switch err { 2150 case nil: 2151 case trie.ErrAlreadyProcessed: 2152 s.trienodeHealDups++ 2153 case trie.ErrNotRequested: 2154 s.trienodeHealNops++ 2155 default: 2156 log.Error("Invalid trienode processed", "hash", hash, "err", err) 2157 } 2158 } 2159 s.commitHealer(false) 2160 2161 // Calculate the processing rate of one filled trie node 2162 rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) 2163 2164 // Update the currently measured trienode queueing and processing throughput. 2165 // 2166 // The processing rate needs to be updated uniformly independent if we've 2167 // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in 2168 // the face of varying network packets. As such, we cannot just measure the 2169 // time it took to process N trie nodes and update once, we need one update 2170 // per trie node. 2171 // 2172 // Naively, that would be: 2173 // 2174 // for i:=0; i<fills; i++ { 2175 // healRate = (1-measurementImpact)*oldRate + measurementImpact*newRate 2176 // } 2177 // 2178 // Essentially, a recursive expansion of HR = (1-MI)*HR + MI*NR. 2179 // 2180 // We can expand that formula for the Nth item as: 2181 // HR(N) = (1-MI)^N*OR + (1-MI)^(N-1)*MI*NR + (1-MI)^(N-2)*MI*NR + ... + (1-MI)^0*MI*NR 2182 // 2183 // The above is a geometric sequence that can be summed to: 2184 // HR(N) = (1-MI)^N*(OR-NR) + NR 2185 s.trienodeHealRate = gomath.Pow(1-trienodeHealRateMeasurementImpact, float64(fills))*(s.trienodeHealRate-rate) + rate 2186 2187 pending := atomic.LoadUint64(&s.trienodeHealPend) 2188 if time.Since(s.trienodeHealThrottled) > time.Second { 2189 // Periodically adjust the trie node throttler 2190 if float64(pending) > 2*s.trienodeHealRate { 2191 s.trienodeHealThrottle *= trienodeHealThrottleIncrease 2192 } else { 2193 s.trienodeHealThrottle /= trienodeHealThrottleDecrease 2194 } 2195 if s.trienodeHealThrottle > maxTrienodeHealThrottle { 2196 s.trienodeHealThrottle = maxTrienodeHealThrottle 2197 } else if s.trienodeHealThrottle < minTrienodeHealThrottle { 2198 s.trienodeHealThrottle = minTrienodeHealThrottle 2199 } 2200 s.trienodeHealThrottled = time.Now() 2201 2202 log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) 2203 } 2204 } 2205 2206 func (s *Syncer) commitHealer(force bool) { 2207 if !force && s.healer.scheduler.MemSize() < ethdb.IdealBatchSize { 2208 return 2209 } 2210 batch := s.db.NewBatch() 2211 if err := s.healer.scheduler.Commit(batch); err != nil { 2212 log.Error("Failed to commit healing data", "err", err) 2213 } 2214 if err := batch.Write(); err != nil { 2215 log.Crit("Failed to persist healing data", "err", err) 2216 } 2217 log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) 2218 } 2219 2220 // processBytecodeHealResponse integrates an already validated bytecode response 2221 // into the healer tasks. 2222 func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { 2223 for i, hash := range res.hashes { 2224 node := res.codes[i] 2225 2226 // If the trie node was not delivered, reschedule it 2227 if node == nil { 2228 res.task.codeTasks[hash] = struct{}{} 2229 continue 2230 } 2231 // Push the trie node into the state syncer 2232 s.bytecodeHealSynced++ 2233 s.bytecodeHealBytes += common.StorageSize(len(node)) 2234 2235 err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) 2236 switch err { 2237 case nil: 2238 case trie.ErrAlreadyProcessed: 2239 s.bytecodeHealDups++ 2240 case trie.ErrNotRequested: 2241 s.bytecodeHealNops++ 2242 default: 2243 log.Error("Invalid bytecode processed", "hash", hash, "err", err) 2244 } 2245 } 2246 s.commitHealer(false) 2247 } 2248 2249 // forwardAccountTask takes a filled account task and persists anything available 2250 // into the database, after which it forwards the next account marker so that the 2251 // task's next chunk may be filled. 2252 func (s *Syncer) forwardAccountTask(task *accountTask) { 2253 // Remove any pending delivery 2254 res := task.res 2255 if res == nil { 2256 return // nothing to forward 2257 } 2258 task.res = nil 2259 2260 // Persist the received account segments. These flat state maybe 2261 // outdated during the sync, but it can be fixed later during the 2262 // snapshot generation. 2263 oldAccountBytes := s.accountBytes 2264 2265 batch := ethdb.HookedBatch{ 2266 Batch: s.db.NewBatch(), 2267 OnPut: func(key []byte, value []byte) { 2268 s.accountBytes += common.StorageSize(len(key) + len(value)) 2269 }, 2270 } 2271 for i, hash := range res.hashes { 2272 if task.needCode[i] || task.needState[i] { 2273 break 2274 } 2275 slim := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) 2276 rawdb.WriteAccountSnapshot(batch, hash, slim) 2277 2278 // If the task is complete, drop it into the stack trie to generate 2279 // account trie nodes for it 2280 if !task.needHeal[i] { 2281 full, err := snapshot.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted 2282 if err != nil { 2283 panic(err) // Really shouldn't ever happen 2284 } 2285 task.genTrie.Update(hash[:], full) 2286 } 2287 } 2288 // Flush anything written just now and update the stats 2289 if err := batch.Write(); err != nil { 2290 log.Crit("Failed to persist accounts", "err", err) 2291 } 2292 s.accountSynced += uint64(len(res.accounts)) 2293 2294 // Task filling persisted, push it the chunk marker forward to the first 2295 // account still missing data. 2296 for i, hash := range res.hashes { 2297 if task.needCode[i] || task.needState[i] { 2298 return 2299 } 2300 task.Next = incHash(hash) 2301 } 2302 // All accounts marked as complete, track if the entire task is done 2303 task.done = !res.cont 2304 2305 // Stack trie could have generated trie nodes, push them to disk (we need to 2306 // flush after finalizing task.done. It's fine even if we crash and lose this 2307 // write as it will only cause more data to be downloaded during heal. 2308 if task.done { 2309 if _, err := task.genTrie.Commit(); err != nil { 2310 log.Error("Failed to commit stack account", "err", err) 2311 } 2312 } 2313 if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { 2314 if err := task.genBatch.Write(); err != nil { 2315 log.Error("Failed to persist stack account", "err", err) 2316 } 2317 task.genBatch.Reset() 2318 } 2319 log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes) 2320 } 2321 2322 // OnAccounts is a callback method to invoke when a range of accounts are 2323 // received from a remote peer. 2324 func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { 2325 size := common.StorageSize(len(hashes) * common.HashLength) 2326 for _, account := range accounts { 2327 size += common.StorageSize(len(account)) 2328 } 2329 for _, node := range proof { 2330 size += common.StorageSize(len(node)) 2331 } 2332 logger := peer.Log().New("reqid", id) 2333 logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) 2334 2335 // Whether or not the response is valid, we can mark the peer as idle and 2336 // notify the scheduler to assign a new task. If the response is invalid, 2337 // we'll drop the peer in a bit. 2338 defer func() { 2339 s.lock.Lock() 2340 defer s.lock.Unlock() 2341 if _, ok := s.peers[peer.ID()]; ok { 2342 s.accountIdlers[peer.ID()] = struct{}{} 2343 } 2344 select { 2345 case s.update <- struct{}{}: 2346 default: 2347 } 2348 }() 2349 s.lock.Lock() 2350 // Ensure the response is for a valid request 2351 req, ok := s.accountReqs[id] 2352 if !ok { 2353 // Request stale, perhaps the peer timed out but came through in the end 2354 logger.Warn("Unexpected account range packet") 2355 s.lock.Unlock() 2356 return nil 2357 } 2358 delete(s.accountReqs, id) 2359 s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size)) 2360 2361 // Clean up the request timeout timer, we'll see how to proceed further based 2362 // on the actual delivered content 2363 if !req.timeout.Stop() { 2364 // The timeout is already triggered, and this request will be reverted+rescheduled 2365 s.lock.Unlock() 2366 return nil 2367 } 2368 // Response is valid, but check if peer is signalling that it does not have 2369 // the requested data. For account range queries that means the state being 2370 // retrieved was either already pruned remotely, or the peer is not yet 2371 // synced to our head. 2372 if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { 2373 logger.Debug("Peer rejected account range request", "root", s.root) 2374 s.statelessPeers[peer.ID()] = struct{}{} 2375 s.lock.Unlock() 2376 2377 // Signal this request as failed, and ready for rescheduling 2378 s.scheduleRevertAccountRequest(req) 2379 return nil 2380 } 2381 root := s.root 2382 s.lock.Unlock() 2383 2384 // Reconstruct a partial trie from the response and verify it 2385 keys := make([][]byte, len(hashes)) 2386 for i, key := range hashes { 2387 keys[i] = common.CopyBytes(key[:]) 2388 } 2389 nodes := make(light.NodeList, len(proof)) 2390 for i, node := range proof { 2391 nodes[i] = node 2392 } 2393 proofdb := nodes.NodeSet() 2394 2395 var end []byte 2396 if len(keys) > 0 { 2397 end = keys[len(keys)-1] 2398 } 2399 cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) 2400 if err != nil { 2401 logger.Warn("Account range failed proof", "err", err) 2402 // Signal this request as failed, and ready for rescheduling 2403 s.scheduleRevertAccountRequest(req) 2404 return err 2405 } 2406 accs := make([]*types.StateAccount, len(accounts)) 2407 for i, account := range accounts { 2408 acc := new(types.StateAccount) 2409 if err := rlp.DecodeBytes(account, acc); err != nil { 2410 panic(err) // We created these blobs, we must be able to decode them 2411 } 2412 accs[i] = acc 2413 } 2414 response := &accountResponse{ 2415 task: req.task, 2416 hashes: hashes, 2417 accounts: accs, 2418 cont: cont, 2419 } 2420 select { 2421 case req.deliver <- response: 2422 case <-req.cancel: 2423 case <-req.stale: 2424 } 2425 return nil 2426 } 2427 2428 // OnByteCodes is a callback method to invoke when a batch of contract 2429 // bytes codes are received from a remote peer. 2430 func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2431 s.lock.RLock() 2432 syncing := !s.snapped 2433 s.lock.RUnlock() 2434 2435 if syncing { 2436 return s.onByteCodes(peer, id, bytecodes) 2437 } 2438 return s.onHealByteCodes(peer, id, bytecodes) 2439 } 2440 2441 // onByteCodes is a callback method to invoke when a batch of contract 2442 // bytes codes are received from a remote peer in the syncing phase. 2443 func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2444 var size common.StorageSize 2445 for _, code := range bytecodes { 2446 size += common.StorageSize(len(code)) 2447 } 2448 logger := peer.Log().New("reqid", id) 2449 logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2450 2451 // Whether or not the response is valid, we can mark the peer as idle and 2452 // notify the scheduler to assign a new task. If the response is invalid, 2453 // we'll drop the peer in a bit. 2454 defer func() { 2455 s.lock.Lock() 2456 defer s.lock.Unlock() 2457 if _, ok := s.peers[peer.ID()]; ok { 2458 s.bytecodeIdlers[peer.ID()] = struct{}{} 2459 } 2460 select { 2461 case s.update <- struct{}{}: 2462 default: 2463 } 2464 }() 2465 s.lock.Lock() 2466 // Ensure the response is for a valid request 2467 req, ok := s.bytecodeReqs[id] 2468 if !ok { 2469 // Request stale, perhaps the peer timed out but came through in the end 2470 logger.Warn("Unexpected bytecode packet") 2471 s.lock.Unlock() 2472 return nil 2473 } 2474 delete(s.bytecodeReqs, id) 2475 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2476 2477 // Clean up the request timeout timer, we'll see how to proceed further based 2478 // on the actual delivered content 2479 if !req.timeout.Stop() { 2480 // The timeout is already triggered, and this request will be reverted+rescheduled 2481 s.lock.Unlock() 2482 return nil 2483 } 2484 2485 // Response is valid, but check if peer is signalling that it does not have 2486 // the requested data. For bytecode range queries that means the peer is not 2487 // yet synced. 2488 if len(bytecodes) == 0 { 2489 logger.Debug("Peer rejected bytecode request") 2490 s.statelessPeers[peer.ID()] = struct{}{} 2491 s.lock.Unlock() 2492 2493 // Signal this request as failed, and ready for rescheduling 2494 s.scheduleRevertBytecodeRequest(req) 2495 return nil 2496 } 2497 s.lock.Unlock() 2498 2499 // Cross reference the requested bytecodes with the response to find gaps 2500 // that the serving node is missing 2501 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2502 hash := make([]byte, 32) 2503 2504 codes := make([][]byte, len(req.hashes)) 2505 for i, j := 0, 0; i < len(bytecodes); i++ { 2506 // Find the next hash that we've been served, leaving misses with nils 2507 hasher.Reset() 2508 hasher.Write(bytecodes[i]) 2509 hasher.Read(hash) 2510 2511 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2512 j++ 2513 } 2514 if j < len(req.hashes) { 2515 codes[j] = bytecodes[i] 2516 j++ 2517 continue 2518 } 2519 // We've either ran out of hashes, or got unrequested data 2520 logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) 2521 // Signal this request as failed, and ready for rescheduling 2522 s.scheduleRevertBytecodeRequest(req) 2523 return errors.New("unexpected bytecode") 2524 } 2525 // Response validated, send it to the scheduler for filling 2526 response := &bytecodeResponse{ 2527 task: req.task, 2528 hashes: req.hashes, 2529 codes: codes, 2530 } 2531 select { 2532 case req.deliver <- response: 2533 case <-req.cancel: 2534 case <-req.stale: 2535 } 2536 return nil 2537 } 2538 2539 // OnStorage is a callback method to invoke when ranges of storage slots 2540 // are received from a remote peer. 2541 func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { 2542 // Gather some trace stats to aid in debugging issues 2543 var ( 2544 hashCount int 2545 slotCount int 2546 size common.StorageSize 2547 ) 2548 for _, hashset := range hashes { 2549 size += common.StorageSize(common.HashLength * len(hashset)) 2550 hashCount += len(hashset) 2551 } 2552 for _, slotset := range slots { 2553 for _, slot := range slotset { 2554 size += common.StorageSize(len(slot)) 2555 } 2556 slotCount += len(slotset) 2557 } 2558 for _, node := range proof { 2559 size += common.StorageSize(len(node)) 2560 } 2561 logger := peer.Log().New("reqid", id) 2562 logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) 2563 2564 // Whether or not the response is valid, we can mark the peer as idle and 2565 // notify the scheduler to assign a new task. If the response is invalid, 2566 // we'll drop the peer in a bit. 2567 defer func() { 2568 s.lock.Lock() 2569 defer s.lock.Unlock() 2570 if _, ok := s.peers[peer.ID()]; ok { 2571 s.storageIdlers[peer.ID()] = struct{}{} 2572 } 2573 select { 2574 case s.update <- struct{}{}: 2575 default: 2576 } 2577 }() 2578 s.lock.Lock() 2579 // Ensure the response is for a valid request 2580 req, ok := s.storageReqs[id] 2581 if !ok { 2582 // Request stale, perhaps the peer timed out but came through in the end 2583 logger.Warn("Unexpected storage ranges packet") 2584 s.lock.Unlock() 2585 return nil 2586 } 2587 delete(s.storageReqs, id) 2588 s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size)) 2589 2590 // Clean up the request timeout timer, we'll see how to proceed further based 2591 // on the actual delivered content 2592 if !req.timeout.Stop() { 2593 // The timeout is already triggered, and this request will be reverted+rescheduled 2594 s.lock.Unlock() 2595 return nil 2596 } 2597 2598 // Reject the response if the hash sets and slot sets don't match, or if the 2599 // peer sent more data than requested. 2600 if len(hashes) != len(slots) { 2601 s.lock.Unlock() 2602 s.scheduleRevertStorageRequest(req) // reschedule request 2603 logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) 2604 return errors.New("hash and slot set size mismatch") 2605 } 2606 if len(hashes) > len(req.accounts) { 2607 s.lock.Unlock() 2608 s.scheduleRevertStorageRequest(req) // reschedule request 2609 logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) 2610 return errors.New("hash set larger than requested") 2611 } 2612 // Response is valid, but check if peer is signalling that it does not have 2613 // the requested data. For storage range queries that means the state being 2614 // retrieved was either already pruned remotely, or the peer is not yet 2615 // synced to our head. 2616 if len(hashes) == 0 { 2617 logger.Debug("Peer rejected storage request") 2618 s.statelessPeers[peer.ID()] = struct{}{} 2619 s.lock.Unlock() 2620 s.scheduleRevertStorageRequest(req) // reschedule request 2621 return nil 2622 } 2623 s.lock.Unlock() 2624 2625 // Reconstruct the partial tries from the response and verify them 2626 var cont bool 2627 2628 for i := 0; i < len(hashes); i++ { 2629 // Convert the keys and proofs into an internal format 2630 keys := make([][]byte, len(hashes[i])) 2631 for j, key := range hashes[i] { 2632 keys[j] = common.CopyBytes(key[:]) 2633 } 2634 nodes := make(light.NodeList, 0, len(proof)) 2635 if i == len(hashes)-1 { 2636 for _, node := range proof { 2637 nodes = append(nodes, node) 2638 } 2639 } 2640 var err error 2641 if len(nodes) == 0 { 2642 // No proof has been attached, the response must cover the entire key 2643 // space and hash to the origin root. 2644 _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) 2645 if err != nil { 2646 s.scheduleRevertStorageRequest(req) // reschedule request 2647 logger.Warn("Storage slots failed proof", "err", err) 2648 return err 2649 } 2650 } else { 2651 // A proof was attached, the response is only partial, check that the 2652 // returned data is indeed part of the storage trie 2653 proofdb := nodes.NodeSet() 2654 2655 var end []byte 2656 if len(keys) > 0 { 2657 end = keys[len(keys)-1] 2658 } 2659 cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) 2660 if err != nil { 2661 s.scheduleRevertStorageRequest(req) // reschedule request 2662 logger.Warn("Storage range failed proof", "err", err) 2663 return err 2664 } 2665 } 2666 } 2667 // Partial tries reconstructed, send them to the scheduler for storage filling 2668 response := &storageResponse{ 2669 mainTask: req.mainTask, 2670 subTask: req.subTask, 2671 accounts: req.accounts, 2672 roots: req.roots, 2673 hashes: hashes, 2674 slots: slots, 2675 cont: cont, 2676 } 2677 select { 2678 case req.deliver <- response: 2679 case <-req.cancel: 2680 case <-req.stale: 2681 } 2682 return nil 2683 } 2684 2685 // OnTrieNodes is a callback method to invoke when a batch of trie nodes 2686 // are received from a remote peer. 2687 func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { 2688 var size common.StorageSize 2689 for _, node := range trienodes { 2690 size += common.StorageSize(len(node)) 2691 } 2692 logger := peer.Log().New("reqid", id) 2693 logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) 2694 2695 // Whether or not the response is valid, we can mark the peer as idle and 2696 // notify the scheduler to assign a new task. If the response is invalid, 2697 // we'll drop the peer in a bit. 2698 defer func() { 2699 s.lock.Lock() 2700 defer s.lock.Unlock() 2701 if _, ok := s.peers[peer.ID()]; ok { 2702 s.trienodeHealIdlers[peer.ID()] = struct{}{} 2703 } 2704 select { 2705 case s.update <- struct{}{}: 2706 default: 2707 } 2708 }() 2709 s.lock.Lock() 2710 // Ensure the response is for a valid request 2711 req, ok := s.trienodeHealReqs[id] 2712 if !ok { 2713 // Request stale, perhaps the peer timed out but came through in the end 2714 logger.Warn("Unexpected trienode heal packet") 2715 s.lock.Unlock() 2716 return nil 2717 } 2718 delete(s.trienodeHealReqs, id) 2719 s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes)) 2720 2721 // Clean up the request timeout timer, we'll see how to proceed further based 2722 // on the actual delivered content 2723 if !req.timeout.Stop() { 2724 // The timeout is already triggered, and this request will be reverted+rescheduled 2725 s.lock.Unlock() 2726 return nil 2727 } 2728 2729 // Response is valid, but check if peer is signalling that it does not have 2730 // the requested data. For bytecode range queries that means the peer is not 2731 // yet synced. 2732 if len(trienodes) == 0 { 2733 logger.Debug("Peer rejected trienode heal request") 2734 s.statelessPeers[peer.ID()] = struct{}{} 2735 s.lock.Unlock() 2736 2737 // Signal this request as failed, and ready for rescheduling 2738 s.scheduleRevertTrienodeHealRequest(req) 2739 return nil 2740 } 2741 s.lock.Unlock() 2742 2743 // Cross reference the requested trienodes with the response to find gaps 2744 // that the serving node is missing 2745 var ( 2746 hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) 2747 hash = make([]byte, 32) 2748 nodes = make([][]byte, len(req.hashes)) 2749 fills uint64 2750 ) 2751 for i, j := 0, 0; i < len(trienodes); i++ { 2752 // Find the next hash that we've been served, leaving misses with nils 2753 hasher.Reset() 2754 hasher.Write(trienodes[i]) 2755 hasher.Read(hash) 2756 2757 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2758 j++ 2759 } 2760 if j < len(req.hashes) { 2761 nodes[j] = trienodes[i] 2762 fills++ 2763 j++ 2764 continue 2765 } 2766 // We've either ran out of hashes, or got unrequested data 2767 logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) 2768 2769 // Signal this request as failed, and ready for rescheduling 2770 s.scheduleRevertTrienodeHealRequest(req) 2771 return errors.New("unexpected healing trienode") 2772 } 2773 // Response validated, send it to the scheduler for filling 2774 atomic.AddUint64(&s.trienodeHealPend, fills) 2775 defer func() { 2776 atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1)) 2777 }() 2778 response := &trienodeHealResponse{ 2779 paths: req.paths, 2780 task: req.task, 2781 hashes: req.hashes, 2782 nodes: nodes, 2783 } 2784 select { 2785 case req.deliver <- response: 2786 case <-req.cancel: 2787 case <-req.stale: 2788 } 2789 return nil 2790 } 2791 2792 // onHealByteCodes is a callback method to invoke when a batch of contract 2793 // bytes codes are received from a remote peer in the healing phase. 2794 func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2795 var size common.StorageSize 2796 for _, code := range bytecodes { 2797 size += common.StorageSize(len(code)) 2798 } 2799 logger := peer.Log().New("reqid", id) 2800 logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2801 2802 // Whether or not the response is valid, we can mark the peer as idle and 2803 // notify the scheduler to assign a new task. If the response is invalid, 2804 // we'll drop the peer in a bit. 2805 defer func() { 2806 s.lock.Lock() 2807 defer s.lock.Unlock() 2808 if _, ok := s.peers[peer.ID()]; ok { 2809 s.bytecodeHealIdlers[peer.ID()] = struct{}{} 2810 } 2811 select { 2812 case s.update <- struct{}{}: 2813 default: 2814 } 2815 }() 2816 s.lock.Lock() 2817 // Ensure the response is for a valid request 2818 req, ok := s.bytecodeHealReqs[id] 2819 if !ok { 2820 // Request stale, perhaps the peer timed out but came through in the end 2821 logger.Warn("Unexpected bytecode heal packet") 2822 s.lock.Unlock() 2823 return nil 2824 } 2825 delete(s.bytecodeHealReqs, id) 2826 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2827 2828 // Clean up the request timeout timer, we'll see how to proceed further based 2829 // on the actual delivered content 2830 if !req.timeout.Stop() { 2831 // The timeout is already triggered, and this request will be reverted+rescheduled 2832 s.lock.Unlock() 2833 return nil 2834 } 2835 2836 // Response is valid, but check if peer is signalling that it does not have 2837 // the requested data. For bytecode range queries that means the peer is not 2838 // yet synced. 2839 if len(bytecodes) == 0 { 2840 logger.Debug("Peer rejected bytecode heal request") 2841 s.statelessPeers[peer.ID()] = struct{}{} 2842 s.lock.Unlock() 2843 2844 // Signal this request as failed, and ready for rescheduling 2845 s.scheduleRevertBytecodeHealRequest(req) 2846 return nil 2847 } 2848 s.lock.Unlock() 2849 2850 // Cross reference the requested bytecodes with the response to find gaps 2851 // that the serving node is missing 2852 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2853 hash := make([]byte, 32) 2854 2855 codes := make([][]byte, len(req.hashes)) 2856 for i, j := 0, 0; i < len(bytecodes); i++ { 2857 // Find the next hash that we've been served, leaving misses with nils 2858 hasher.Reset() 2859 hasher.Write(bytecodes[i]) 2860 hasher.Read(hash) 2861 2862 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2863 j++ 2864 } 2865 if j < len(req.hashes) { 2866 codes[j] = bytecodes[i] 2867 j++ 2868 continue 2869 } 2870 // We've either ran out of hashes, or got unrequested data 2871 logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) 2872 // Signal this request as failed, and ready for rescheduling 2873 s.scheduleRevertBytecodeHealRequest(req) 2874 return errors.New("unexpected healing bytecode") 2875 } 2876 // Response validated, send it to the scheduler for filling 2877 response := &bytecodeHealResponse{ 2878 task: req.task, 2879 hashes: req.hashes, 2880 codes: codes, 2881 } 2882 select { 2883 case req.deliver <- response: 2884 case <-req.cancel: 2885 case <-req.stale: 2886 } 2887 return nil 2888 } 2889 2890 // onHealState is a callback method to invoke when a flat state(account 2891 // or storage slot) is downloaded during the healing stage. The flat states 2892 // can be persisted blindly and can be fixed later in the generation stage. 2893 // Note it's not concurrent safe, please handle the concurrent issue outside. 2894 func (s *Syncer) onHealState(paths [][]byte, value []byte) error { 2895 if len(paths) == 1 { 2896 var account types.StateAccount 2897 if err := rlp.DecodeBytes(value, &account); err != nil { 2898 return nil // Returning the error here would drop the remote peer 2899 } 2900 blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) 2901 rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) 2902 s.accountHealed += 1 2903 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) 2904 } 2905 if len(paths) == 2 { 2906 rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) 2907 s.storageHealed += 1 2908 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) 2909 } 2910 if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { 2911 s.stateWriter.Write() // It's fine to ignore the error here 2912 s.stateWriter.Reset() 2913 } 2914 return nil 2915 } 2916 2917 // hashSpace is the total size of the 256 bit hash space for accounts. 2918 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) 2919 2920 // report calculates various status reports and provides it to the user. 2921 func (s *Syncer) report(force bool) { 2922 if len(s.tasks) > 0 { 2923 s.reportSyncProgress(force) 2924 return 2925 } 2926 s.reportHealProgress(force) 2927 } 2928 2929 // reportSyncProgress calculates various status reports and provides it to the user. 2930 func (s *Syncer) reportSyncProgress(force bool) { 2931 // Don't report all the events, just occasionally 2932 if !force && time.Since(s.logTime) < 8*time.Second { 2933 return 2934 } 2935 // Don't report anything until we have a meaningful progress 2936 synced := s.accountBytes + s.bytecodeBytes + s.storageBytes 2937 if synced == 0 { 2938 return 2939 } 2940 accountGaps := new(big.Int) 2941 for _, task := range s.tasks { 2942 accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big())) 2943 } 2944 accountFills := new(big.Int).Sub(hashSpace, accountGaps) 2945 if accountFills.BitLen() == 0 { 2946 return 2947 } 2948 s.logTime = time.Now() 2949 estBytes := float64(new(big.Int).Div( 2950 new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), 2951 accountFills, 2952 ).Uint64()) 2953 // Don't report anything until we have a meaningful progress 2954 if estBytes < 1.0 { 2955 return 2956 } 2957 elapsed := time.Since(s.startTime) 2958 estTime := elapsed / time.Duration(synced) * time.Duration(estBytes) 2959 2960 // Create a mega progress report 2961 var ( 2962 progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes) 2963 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountSynced), s.accountBytes.TerminalString()) 2964 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageSynced), s.storageBytes.TerminalString()) 2965 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeSynced), s.bytecodeBytes.TerminalString()) 2966 ) 2967 log.Info("Syncing: state download in progress", "synced", progress, "state", synced, 2968 "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) 2969 } 2970 2971 // reportHealProgress calculates various status reports and provides it to the user. 2972 func (s *Syncer) reportHealProgress(force bool) { 2973 // Don't report all the events, just occasionally 2974 if !force && time.Since(s.logTime) < 8*time.Second { 2975 return 2976 } 2977 s.logTime = time.Now() 2978 2979 // Create a mega progress report 2980 var ( 2981 trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString()) 2982 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString()) 2983 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString()) 2984 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString()) 2985 ) 2986 log.Info("Syncing: state healing in progress", "accounts", accounts, "slots", storage, 2987 "codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending()) 2988 } 2989 2990 // estimateRemainingSlots tries to determine roughly how many slots are left in 2991 // a contract storage, based on the number of keys and the last hash. This method 2992 // assumes that the hashes are lexicographically ordered and evenly distributed. 2993 func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { 2994 if last == (common.Hash{}) { 2995 return 0, errors.New("last hash empty") 2996 } 2997 space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes))) 2998 space.Div(space, last.Big()) 2999 if !space.IsUint64() { 3000 // Gigantic address space probably due to too few or malicious slots 3001 return 0, errors.New("too few slots for estimation") 3002 } 3003 return space.Uint64() - uint64(hashes), nil 3004 } 3005 3006 // capacitySort implements the Sort interface, allowing sorting by peer message 3007 // throughput. Note, callers should use sort.Reverse to get the desired effect 3008 // of highest capacity being at the front. 3009 type capacitySort struct { 3010 ids []string 3011 caps []int 3012 } 3013 3014 func (s *capacitySort) Len() int { 3015 return len(s.ids) 3016 } 3017 3018 func (s *capacitySort) Less(i, j int) bool { 3019 return s.caps[i] < s.caps[j] 3020 } 3021 3022 func (s *capacitySort) Swap(i, j int) { 3023 s.ids[i], s.ids[j] = s.ids[j], s.ids[i] 3024 s.caps[i], s.caps[j] = s.caps[j], s.caps[i] 3025 } 3026 3027 // healRequestSort implements the Sort interface, allowing sorting trienode 3028 // heal requests, which is a prerequisite for merging storage-requests. 3029 type healRequestSort struct { 3030 paths []string 3031 hashes []common.Hash 3032 syncPaths []trie.SyncPath 3033 } 3034 3035 func (t *healRequestSort) Len() int { 3036 return len(t.hashes) 3037 } 3038 3039 func (t *healRequestSort) Less(i, j int) bool { 3040 a := t.syncPaths[i] 3041 b := t.syncPaths[j] 3042 switch bytes.Compare(a[0], b[0]) { 3043 case -1: 3044 return true 3045 case 1: 3046 return false 3047 } 3048 // identical first part 3049 if len(a) < len(b) { 3050 return true 3051 } 3052 if len(b) < len(a) { 3053 return false 3054 } 3055 if len(a) == 2 { 3056 return bytes.Compare(a[1], b[1]) < 0 3057 } 3058 return false 3059 } 3060 3061 func (t *healRequestSort) Swap(i, j int) { 3062 t.paths[i], t.paths[j] = t.paths[j], t.paths[i] 3063 t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] 3064 t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i] 3065 } 3066 3067 // Merge merges the pathsets, so that several storage requests concerning the 3068 // same account are merged into one, to reduce bandwidth. 3069 // OBS: This operation is moot if t has not first been sorted. 3070 func (t *healRequestSort) Merge() []TrieNodePathSet { 3071 var result []TrieNodePathSet 3072 for _, path := range t.syncPaths { 3073 pathset := TrieNodePathSet([][]byte(path)) 3074 if len(path) == 1 { 3075 // It's an account reference. 3076 result = append(result, pathset) 3077 } else { 3078 // It's a storage reference. 3079 end := len(result) - 1 3080 if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) { 3081 // The account doesn't match last, create a new entry. 3082 result = append(result, pathset) 3083 } else { 3084 // It's the same account as the previous one, add to the storage 3085 // paths of that request. 3086 result[end] = append(result[end], pathset[1]) 3087 } 3088 } 3089 } 3090 return result 3091 } 3092 3093 // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates 3094 // the TrieNodePaths and merges paths which belongs to the same account path. 3095 func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { 3096 var syncPaths []trie.SyncPath 3097 for _, path := range paths { 3098 syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) 3099 } 3100 n := &healRequestSort{paths, hashes, syncPaths} 3101 sort.Sort(n) 3102 pathsets := n.Merge() 3103 return n.paths, n.hashes, n.syncPaths, pathsets 3104 }