github.com/theQRL/go-zond@v0.1.1/zond/protocols/snap/sync.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "errors" 23 "fmt" 24 gomath "math" 25 "math/big" 26 "math/rand" 27 "sort" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/theQRL/go-zond/common" 33 "github.com/theQRL/go-zond/common/math" 34 "github.com/theQRL/go-zond/core/rawdb" 35 "github.com/theQRL/go-zond/core/state" 36 "github.com/theQRL/go-zond/core/types" 37 "github.com/theQRL/go-zond/crypto" 38 "github.com/theQRL/go-zond/zonddb" 39 "github.com/theQRL/go-zond/event" 40 "github.com/theQRL/go-zond/light" 41 "github.com/theQRL/go-zond/log" 42 "github.com/theQRL/go-zond/p2p/msgrate" 43 "github.com/theQRL/go-zond/rlp" 44 "github.com/theQRL/go-zond/trie" 45 "golang.org/x/crypto/sha3" 46 ) 47 48 const ( 49 // minRequestSize is the minimum number of bytes to request from a remote peer. 50 // This number is used as the low cap for account and storage range requests. 51 // Bytecode and trienode are limited inherently by item count (1). 52 minRequestSize = 64 * 1024 53 54 // maxRequestSize is the maximum number of bytes to request from a remote peer. 55 // This number is used as the high cap for account and storage range requests. 56 // Bytecode and trienode are limited more explicitly by the caps below. 57 maxRequestSize = 512 * 1024 58 59 // maxCodeRequestCount is the maximum number of bytecode blobs to request in a 60 // single query. If this number is too low, we're not filling responses fully 61 // and waste round trip times. If it's too high, we're capping responses and 62 // waste bandwidth. 63 // 64 // Deployed bytecodes are currently capped at 24KB, so the minimum request 65 // size should be maxRequestSize / 24K. Assuming that most contracts do not 66 // come close to that, requesting 4x should be a good approximation. 67 maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 68 69 // maxTrieRequestCount is the maximum number of trie node blobs to request in 70 // a single query. If this number is too low, we're not filling responses fully 71 // and waste round trip times. If it's too high, we're capping responses and 72 // waste bandwidth. 73 maxTrieRequestCount = maxRequestSize / 512 74 75 // trienodeHealRateMeasurementImpact is the impact a single measurement has on 76 // the local node's trienode processing capacity. A value closer to 0 reacts 77 // slower to sudden changes, but it is also more stable against temporary hiccups. 78 trienodeHealRateMeasurementImpact = 0.005 79 80 // minTrienodeHealThrottle is the minimum divisor for throttling trie node 81 // heal requests to avoid overloading the local node and excessively expanding 82 // the state trie breadth wise. 83 minTrienodeHealThrottle = 1 84 85 // maxTrienodeHealThrottle is the maximum divisor for throttling trie node 86 // heal requests to avoid overloading the local node and exessively expanding 87 // the state trie bedth wise. 88 maxTrienodeHealThrottle = maxTrieRequestCount 89 90 // trienodeHealThrottleIncrease is the multiplier for the throttle when the 91 // rate of arriving data is higher than the rate of processing it. 92 trienodeHealThrottleIncrease = 1.33 93 94 // trienodeHealThrottleDecrease is the divisor for the throttle when the 95 // rate of arriving data is lower than the rate of processing it. 96 trienodeHealThrottleDecrease = 1.25 97 ) 98 99 var ( 100 // accountConcurrency is the number of chunks to split the account trie into 101 // to allow concurrent retrievals. 102 accountConcurrency = 16 103 104 // storageConcurrency is the number of chunks to split the a large contract 105 // storage trie into to allow concurrent retrievals. 106 storageConcurrency = 16 107 ) 108 109 // ErrCancelled is returned from snap syncing if the operation was prematurely 110 // terminated. 111 var ErrCancelled = errors.New("sync cancelled") 112 113 // accountRequest tracks a pending account range request to ensure responses are 114 // to actual requests and to validate any security constraints. 115 // 116 // Concurrency note: account requests and responses are handled concurrently from 117 // the main runloop to allow Merkle proof verifications on the peer's thread and 118 // to drop on invalid response. The request struct must contain all the data to 119 // construct the response without accessing runloop internals (i.e. task). That 120 // is only included to allow the runloop to match a response to the task being 121 // synced without having yet another set of maps. 122 type accountRequest struct { 123 peer string // Peer to which this request is assigned 124 id uint64 // Request ID of this request 125 time time.Time // Timestamp when the request was sent 126 127 deliver chan *accountResponse // Channel to deliver successful response on 128 revert chan *accountRequest // Channel to deliver request failure on 129 cancel chan struct{} // Channel to track sync cancellation 130 timeout *time.Timer // Timer to track delivery timeout 131 stale chan struct{} // Channel to signal the request was dropped 132 133 origin common.Hash // First account requested to allow continuation checks 134 limit common.Hash // Last account requested to allow non-overlapping chunking 135 136 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 137 } 138 139 // accountResponse is an already Merkle-verified remote response to an account 140 // range request. It contains the subtrie for the requested account range and 141 // the database that's going to be filled with the internal nodes on commit. 142 type accountResponse struct { 143 task *accountTask // Task which this request is filling 144 145 hashes []common.Hash // Account hashes in the returned range 146 accounts []*types.StateAccount // Expanded accounts in the returned range 147 148 cont bool // Whether the account range has a continuation 149 } 150 151 // bytecodeRequest tracks a pending bytecode request to ensure responses are to 152 // actual requests and to validate any security constraints. 153 // 154 // Concurrency note: bytecode requests and responses are handled concurrently from 155 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 156 // to drop on invalid response. The request struct must contain all the data to 157 // construct the response without accessing runloop internals (i.e. task). That 158 // is only included to allow the runloop to match a response to the task being 159 // synced without having yet another set of maps. 160 type bytecodeRequest struct { 161 peer string // Peer to which this request is assigned 162 id uint64 // Request ID of this request 163 time time.Time // Timestamp when the request was sent 164 165 deliver chan *bytecodeResponse // Channel to deliver successful response on 166 revert chan *bytecodeRequest // Channel to deliver request failure on 167 cancel chan struct{} // Channel to track sync cancellation 168 timeout *time.Timer // Timer to track delivery timeout 169 stale chan struct{} // Channel to signal the request was dropped 170 171 hashes []common.Hash // Bytecode hashes to validate responses 172 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 173 } 174 175 // bytecodeResponse is an already verified remote response to a bytecode request. 176 type bytecodeResponse struct { 177 task *accountTask // Task which this request is filling 178 179 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 180 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 181 } 182 183 // storageRequest tracks a pending storage ranges request to ensure responses are 184 // to actual requests and to validate any security constraints. 185 // 186 // Concurrency note: storage requests and responses are handled concurrently from 187 // the main runloop to allow Merkle proof verifications on the peer's thread and 188 // to drop on invalid response. The request struct must contain all the data to 189 // construct the response without accessing runloop internals (i.e. tasks). That 190 // is only included to allow the runloop to match a response to the task being 191 // synced without having yet another set of maps. 192 type storageRequest struct { 193 peer string // Peer to which this request is assigned 194 id uint64 // Request ID of this request 195 time time.Time // Timestamp when the request was sent 196 197 deliver chan *storageResponse // Channel to deliver successful response on 198 revert chan *storageRequest // Channel to deliver request failure on 199 cancel chan struct{} // Channel to track sync cancellation 200 timeout *time.Timer // Timer to track delivery timeout 201 stale chan struct{} // Channel to signal the request was dropped 202 203 accounts []common.Hash // Account hashes to validate responses 204 roots []common.Hash // Storage roots to validate responses 205 206 origin common.Hash // First storage slot requested to allow continuation checks 207 limit common.Hash // Last storage slot requested to allow non-overlapping chunking 208 209 mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!) 210 subTask *storageTask // Task which this response is filling (only access fields through the runloop!!) 211 } 212 213 // storageResponse is an already Merkle-verified remote response to a storage 214 // range request. It contains the subtries for the requested storage ranges and 215 // the databases that's going to be filled with the internal nodes on commit. 216 type storageResponse struct { 217 mainTask *accountTask // Task which this response belongs to 218 subTask *storageTask // Task which this response is filling 219 220 accounts []common.Hash // Account hashes requested, may be only partially filled 221 roots []common.Hash // Storage roots requested, may be only partially filled 222 223 hashes [][]common.Hash // Storage slot hashes in the returned range 224 slots [][][]byte // Storage slot values in the returned range 225 226 cont bool // Whether the last storage range has a continuation 227 } 228 229 // trienodeHealRequest tracks a pending state trie request to ensure responses 230 // are to actual requests and to validate any security constraints. 231 // 232 // Concurrency note: trie node requests and responses are handled concurrently from 233 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 234 // to drop on invalid response. The request struct must contain all the data to 235 // construct the response without accessing runloop internals (i.e. task). That 236 // is only included to allow the runloop to match a response to the task being 237 // synced without having yet another set of maps. 238 type trienodeHealRequest struct { 239 peer string // Peer to which this request is assigned 240 id uint64 // Request ID of this request 241 time time.Time // Timestamp when the request was sent 242 243 deliver chan *trienodeHealResponse // Channel to deliver successful response on 244 revert chan *trienodeHealRequest // Channel to deliver request failure on 245 cancel chan struct{} // Channel to track sync cancellation 246 timeout *time.Timer // Timer to track delivery timeout 247 stale chan struct{} // Channel to signal the request was dropped 248 249 paths []string // Trie node paths for identifying trie node 250 hashes []common.Hash // Trie node hashes to validate responses 251 252 task *healTask // Task which this request is filling (only access fields through the runloop!!) 253 } 254 255 // trienodeHealResponse is an already verified remote response to a trie node request. 256 type trienodeHealResponse struct { 257 task *healTask // Task which this request is filling 258 259 paths []string // Paths of the trie nodes 260 hashes []common.Hash // Hashes of the trie nodes to avoid double hashing 261 nodes [][]byte // Actual trie nodes to store into the database (nil = missing) 262 } 263 264 // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to 265 // actual requests and to validate any security constraints. 266 // 267 // Concurrency note: bytecode requests and responses are handled concurrently from 268 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 269 // to drop on invalid response. The request struct must contain all the data to 270 // construct the response without accessing runloop internals (i.e. task). That 271 // is only included to allow the runloop to match a response to the task being 272 // synced without having yet another set of maps. 273 type bytecodeHealRequest struct { 274 peer string // Peer to which this request is assigned 275 id uint64 // Request ID of this request 276 time time.Time // Timestamp when the request was sent 277 278 deliver chan *bytecodeHealResponse // Channel to deliver successful response on 279 revert chan *bytecodeHealRequest // Channel to deliver request failure on 280 cancel chan struct{} // Channel to track sync cancellation 281 timeout *time.Timer // Timer to track delivery timeout 282 stale chan struct{} // Channel to signal the request was dropped 283 284 hashes []common.Hash // Bytecode hashes to validate responses 285 task *healTask // Task which this request is filling (only access fields through the runloop!!) 286 } 287 288 // bytecodeHealResponse is an already verified remote response to a bytecode request. 289 type bytecodeHealResponse struct { 290 task *healTask // Task which this request is filling 291 292 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 293 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 294 } 295 296 // accountTask represents the sync task for a chunk of the account snapshot. 297 type accountTask struct { 298 // These fields get serialized to leveldb on shutdown 299 Next common.Hash // Next account to sync in this interval 300 Last common.Hash // Last account to sync in this interval 301 SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts 302 303 // These fields are internals used during runtime 304 req *accountRequest // Pending request to fill this task 305 res *accountResponse // Validate response filling this task 306 pend int // Number of pending subtasks for this round 307 308 needCode []bool // Flags whether the filling accounts need code retrieval 309 needState []bool // Flags whether the filling accounts need storage retrieval 310 needHeal []bool // Flags whether the filling accounts's state was chunked and need healing 311 312 codeTasks map[common.Hash]struct{} // Code hashes that need retrieval 313 stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval 314 315 genBatch zonddb.Batch // Batch used by the node generator 316 genTrie *trie.StackTrie // Node generator from storage slots 317 318 done bool // Flag whether the task can be removed 319 } 320 321 // storageTask represents the sync task for a chunk of the storage snapshot. 322 type storageTask struct { 323 Next common.Hash // Next account to sync in this interval 324 Last common.Hash // Last account to sync in this interval 325 326 // These fields are internals used during runtime 327 root common.Hash // Storage root hash for this instance 328 req *storageRequest // Pending request to fill this task 329 330 genBatch zonddb.Batch // Batch used by the node generator 331 genTrie *trie.StackTrie // Node generator from storage slots 332 333 done bool // Flag whether the task can be removed 334 } 335 336 // healTask represents the sync task for healing the snap-synced chunk boundaries. 337 type healTask struct { 338 scheduler *trie.Sync // State trie sync scheduler defining the tasks 339 340 trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path 341 codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash 342 } 343 344 // SyncProgress is a database entry to allow suspending and resuming a snapshot state 345 // sync. Opposed to full and fast sync, there is no way to restart a suspended 346 // snap sync without prior knowledge of the suspension point. 347 type SyncProgress struct { 348 Tasks []*accountTask // The suspended account tasks (contract tasks within) 349 350 // Status report during syncing phase 351 AccountSynced uint64 // Number of accounts downloaded 352 AccountBytes common.StorageSize // Number of account trie bytes persisted to disk 353 BytecodeSynced uint64 // Number of bytecodes downloaded 354 BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 355 StorageSynced uint64 // Number of storage slots downloaded 356 StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk 357 358 // Status report during healing phase 359 TrienodeHealSynced uint64 // Number of state trie nodes downloaded 360 TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 361 BytecodeHealSynced uint64 // Number of bytecodes downloaded 362 BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 363 } 364 365 // SyncPending is analogous to SyncProgress, but it's used to report on pending 366 // ephemeral sync progress that doesn't get persisted into the database. 367 type SyncPending struct { 368 TrienodeHeal uint64 // Number of state trie nodes pending 369 BytecodeHeal uint64 // Number of bytecodes pending 370 } 371 372 // SyncPeer abstracts out the methods required for a peer to be synced against 373 // with the goal of allowing the construction of mock peers without the full 374 // blown networking. 375 type SyncPeer interface { 376 // ID retrieves the peer's unique identifier. 377 ID() string 378 379 // RequestAccountRange fetches a batch of accounts rooted in a specific account 380 // trie, starting with the origin. 381 RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error 382 383 // RequestStorageRanges fetches a batch of storage slots belonging to one or 384 // more accounts. If slots from only one account is requested, an origin marker 385 // may also be used to retrieve from there. 386 RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error 387 388 // RequestByteCodes fetches a batch of bytecodes by hash. 389 RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error 390 391 // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in 392 // a specific state trie. 393 RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error 394 395 // Log retrieves the peer's own contextual logger. 396 Log() log.Logger 397 } 398 399 // Syncer is an Ethereum account and storage trie syncer based on snapshots and 400 // the snap protocol. It's purpose is to download all the accounts and storage 401 // slots from remote peers and reassemble chunks of the state trie, on top of 402 // which a state sync can be run to fix any gaps / overlaps. 403 // 404 // Every network request has a variety of failure events: 405 // - The peer disconnects after task assignment, failing to send the request 406 // - The peer disconnects after sending the request, before delivering on it 407 // - The peer remains connected, but does not deliver a response in time 408 // - The peer delivers a stale response after a previous timeout 409 // - The peer delivers a refusal to serve the requested state 410 type Syncer struct { 411 db zonddb.KeyValueStore // Database to store the trie nodes into (and dedup) 412 scheme string // Node scheme used in node database 413 414 root common.Hash // Current state trie root being synced 415 tasks []*accountTask // Current account task set being synced 416 snapped bool // Flag to signal that snap phase is done 417 healer *healTask // Current state healing task being executed 418 update chan struct{} // Notification channel for possible sync progression 419 420 peers map[string]SyncPeer // Currently active peers to download from 421 peerJoin *event.Feed // Event feed to react to peers joining 422 peerDrop *event.Feed // Event feed to react to peers dropping 423 rates *msgrate.Trackers // Message throughput rates for peers 424 425 // Request tracking during syncing phase 426 statelessPeers map[string]struct{} // Peers that failed to deliver state data 427 accountIdlers map[string]struct{} // Peers that aren't serving account requests 428 bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests 429 storageIdlers map[string]struct{} // Peers that aren't serving storage requests 430 431 accountReqs map[uint64]*accountRequest // Account requests currently running 432 bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running 433 storageReqs map[uint64]*storageRequest // Storage requests currently running 434 435 accountSynced uint64 // Number of accounts downloaded 436 accountBytes common.StorageSize // Number of account trie bytes persisted to disk 437 bytecodeSynced uint64 // Number of bytecodes downloaded 438 bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 439 storageSynced uint64 // Number of storage slots downloaded 440 storageBytes common.StorageSize // Number of storage trie bytes persisted to disk 441 442 extProgress *SyncProgress // progress that can be exposed to external caller. 443 444 // Request tracking during healing phase 445 trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests 446 bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests 447 448 trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running 449 bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running 450 451 trienodeHealRate float64 // Average heal rate for processing trie node data 452 trienodeHealPend atomic.Uint64 // Number of trie nodes currently pending for processing 453 trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested 454 trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated 455 456 trienodeHealSynced uint64 // Number of state trie nodes downloaded 457 trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 458 trienodeHealDups uint64 // Number of state trie nodes already processed 459 trienodeHealNops uint64 // Number of state trie nodes not requested 460 bytecodeHealSynced uint64 // Number of bytecodes downloaded 461 bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 462 bytecodeHealDups uint64 // Number of bytecodes already processed 463 bytecodeHealNops uint64 // Number of bytecodes not requested 464 465 stateWriter zonddb.Batch // Shared batch writer used for persisting raw states 466 accountHealed uint64 // Number of accounts downloaded during the healing stage 467 accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage 468 storageHealed uint64 // Number of storage slots downloaded during the healing stage 469 storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage 470 471 startTime time.Time // Time instance when snapshot sync started 472 logTime time.Time // Time instance when status was last reported 473 474 pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown 475 lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) 476 } 477 478 // NewSyncer creates a new snapshot syncer to download the Ethereum state over the 479 // snap protocol. 480 func NewSyncer(db zonddb.KeyValueStore, scheme string) *Syncer { 481 return &Syncer{ 482 db: db, 483 scheme: scheme, 484 485 peers: make(map[string]SyncPeer), 486 peerJoin: new(event.Feed), 487 peerDrop: new(event.Feed), 488 rates: msgrate.NewTrackers(log.New("proto", "snap")), 489 update: make(chan struct{}, 1), 490 491 accountIdlers: make(map[string]struct{}), 492 storageIdlers: make(map[string]struct{}), 493 bytecodeIdlers: make(map[string]struct{}), 494 495 accountReqs: make(map[uint64]*accountRequest), 496 storageReqs: make(map[uint64]*storageRequest), 497 bytecodeReqs: make(map[uint64]*bytecodeRequest), 498 499 trienodeHealIdlers: make(map[string]struct{}), 500 bytecodeHealIdlers: make(map[string]struct{}), 501 502 trienodeHealReqs: make(map[uint64]*trienodeHealRequest), 503 bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), 504 trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk 505 stateWriter: db.NewBatch(), 506 507 extProgress: new(SyncProgress), 508 } 509 } 510 511 // Register injects a new data source into the syncer's peerset. 512 func (s *Syncer) Register(peer SyncPeer) error { 513 // Make sure the peer is not registered yet 514 id := peer.ID() 515 516 s.lock.Lock() 517 if _, ok := s.peers[id]; ok { 518 log.Error("Snap peer already registered", "id", id) 519 520 s.lock.Unlock() 521 return errors.New("already registered") 522 } 523 s.peers[id] = peer 524 s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip())) 525 526 // Mark the peer as idle, even if no sync is running 527 s.accountIdlers[id] = struct{}{} 528 s.storageIdlers[id] = struct{}{} 529 s.bytecodeIdlers[id] = struct{}{} 530 s.trienodeHealIdlers[id] = struct{}{} 531 s.bytecodeHealIdlers[id] = struct{}{} 532 s.lock.Unlock() 533 534 // Notify any active syncs that a new peer can be assigned data 535 s.peerJoin.Send(id) 536 return nil 537 } 538 539 // Unregister injects a new data source into the syncer's peerset. 540 func (s *Syncer) Unregister(id string) error { 541 // Remove all traces of the peer from the registry 542 s.lock.Lock() 543 if _, ok := s.peers[id]; !ok { 544 log.Error("Snap peer not registered", "id", id) 545 546 s.lock.Unlock() 547 return errors.New("not registered") 548 } 549 delete(s.peers, id) 550 s.rates.Untrack(id) 551 552 // Remove status markers, even if no sync is running 553 delete(s.statelessPeers, id) 554 555 delete(s.accountIdlers, id) 556 delete(s.storageIdlers, id) 557 delete(s.bytecodeIdlers, id) 558 delete(s.trienodeHealIdlers, id) 559 delete(s.bytecodeHealIdlers, id) 560 s.lock.Unlock() 561 562 // Notify any active syncs that pending requests need to be reverted 563 s.peerDrop.Send(id) 564 return nil 565 } 566 567 // Sync starts (or resumes a previous) sync cycle to iterate over a state trie 568 // with the given root and reconstruct the nodes based on the snapshot leaves. 569 // Previously downloaded segments will not be redownloaded of fixed, rather any 570 // errors will be healed after the leaves are fully accumulated. 571 func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { 572 // Move the trie root from any previous value, revert stateless markers for 573 // any peers and initialize the syncer if it was not yet run 574 s.lock.Lock() 575 s.root = root 576 s.healer = &healTask{ 577 scheduler: state.NewStateSync(root, s.db, s.onHealState, s.scheme), 578 trieTasks: make(map[string]common.Hash), 579 codeTasks: make(map[common.Hash]struct{}), 580 } 581 s.statelessPeers = make(map[string]struct{}) 582 s.lock.Unlock() 583 584 if s.startTime == (time.Time{}) { 585 s.startTime = time.Now() 586 } 587 // Retrieve the previous sync status from LevelDB and abort if already synced 588 s.loadSyncStatus() 589 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 590 log.Debug("Snapshot sync already completed") 591 return nil 592 } 593 defer func() { // Persist any progress, independent of failure 594 for _, task := range s.tasks { 595 s.forwardAccountTask(task) 596 } 597 s.cleanAccountTasks() 598 s.saveSyncStatus() 599 }() 600 601 log.Debug("Starting snapshot sync cycle", "root", root) 602 603 // Flush out the last committed raw states 604 defer func() { 605 if s.stateWriter.ValueSize() > 0 { 606 s.stateWriter.Write() 607 s.stateWriter.Reset() 608 } 609 }() 610 defer s.report(true) 611 // commit any trie- and bytecode-healing data. 612 defer s.commitHealer(true) 613 614 // Whether sync completed or not, disregard any future packets 615 defer func() { 616 log.Debug("Terminating snapshot sync cycle", "root", root) 617 s.lock.Lock() 618 s.accountReqs = make(map[uint64]*accountRequest) 619 s.storageReqs = make(map[uint64]*storageRequest) 620 s.bytecodeReqs = make(map[uint64]*bytecodeRequest) 621 s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) 622 s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) 623 s.lock.Unlock() 624 }() 625 // Keep scheduling sync tasks 626 peerJoin := make(chan string, 16) 627 peerJoinSub := s.peerJoin.Subscribe(peerJoin) 628 defer peerJoinSub.Unsubscribe() 629 630 peerDrop := make(chan string, 16) 631 peerDropSub := s.peerDrop.Subscribe(peerDrop) 632 defer peerDropSub.Unsubscribe() 633 634 // Create a set of unique channels for this sync cycle. We need these to be 635 // ephemeral so a data race doesn't accidentally deliver something stale on 636 // a persistent channel across syncs (yup, this happened) 637 var ( 638 accountReqFails = make(chan *accountRequest) 639 storageReqFails = make(chan *storageRequest) 640 bytecodeReqFails = make(chan *bytecodeRequest) 641 accountResps = make(chan *accountResponse) 642 storageResps = make(chan *storageResponse) 643 bytecodeResps = make(chan *bytecodeResponse) 644 trienodeHealReqFails = make(chan *trienodeHealRequest) 645 bytecodeHealReqFails = make(chan *bytecodeHealRequest) 646 trienodeHealResps = make(chan *trienodeHealResponse) 647 bytecodeHealResps = make(chan *bytecodeHealResponse) 648 ) 649 for { 650 // Remove all completed tasks and terminate sync if everything's done 651 s.cleanStorageTasks() 652 s.cleanAccountTasks() 653 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 654 return nil 655 } 656 // Assign all the data retrieval tasks to any free peers 657 s.assignAccountTasks(accountResps, accountReqFails, cancel) 658 s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel) 659 s.assignStorageTasks(storageResps, storageReqFails, cancel) 660 661 if len(s.tasks) == 0 { 662 // Sync phase done, run heal phase 663 s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel) 664 s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel) 665 } 666 // Update sync progress 667 s.lock.Lock() 668 s.extProgress = &SyncProgress{ 669 AccountSynced: s.accountSynced, 670 AccountBytes: s.accountBytes, 671 BytecodeSynced: s.bytecodeSynced, 672 BytecodeBytes: s.bytecodeBytes, 673 StorageSynced: s.storageSynced, 674 StorageBytes: s.storageBytes, 675 TrienodeHealSynced: s.trienodeHealSynced, 676 TrienodeHealBytes: s.trienodeHealBytes, 677 BytecodeHealSynced: s.bytecodeHealSynced, 678 BytecodeHealBytes: s.bytecodeHealBytes, 679 } 680 s.lock.Unlock() 681 // Wait for something to happen 682 select { 683 case <-s.update: 684 // Something happened (new peer, delivery, timeout), recheck tasks 685 case <-peerJoin: 686 // A new peer joined, try to schedule it new tasks 687 case id := <-peerDrop: 688 s.revertRequests(id) 689 case <-cancel: 690 return ErrCancelled 691 692 case req := <-accountReqFails: 693 s.revertAccountRequest(req) 694 case req := <-bytecodeReqFails: 695 s.revertBytecodeRequest(req) 696 case req := <-storageReqFails: 697 s.revertStorageRequest(req) 698 case req := <-trienodeHealReqFails: 699 s.revertTrienodeHealRequest(req) 700 case req := <-bytecodeHealReqFails: 701 s.revertBytecodeHealRequest(req) 702 703 case res := <-accountResps: 704 s.processAccountResponse(res) 705 case res := <-bytecodeResps: 706 s.processBytecodeResponse(res) 707 case res := <-storageResps: 708 s.processStorageResponse(res) 709 case res := <-trienodeHealResps: 710 s.processTrienodeHealResponse(res) 711 case res := <-bytecodeHealResps: 712 s.processBytecodeHealResponse(res) 713 } 714 // Report stats if something meaningful happened 715 s.report(false) 716 } 717 } 718 719 // loadSyncStatus retrieves a previously aborted sync status from the database, 720 // or generates a fresh one if none is available. 721 func (s *Syncer) loadSyncStatus() { 722 var progress SyncProgress 723 724 if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { 725 if err := json.Unmarshal(status, &progress); err != nil { 726 log.Error("Failed to decode snap sync status", "err", err) 727 } else { 728 for _, task := range progress.Tasks { 729 log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) 730 } 731 s.tasks = progress.Tasks 732 for _, task := range s.tasks { 733 task := task // closure for task.genBatch in the stacktrie writer callback 734 735 task.genBatch = zonddb.HookedBatch{ 736 Batch: s.db.NewBatch(), 737 OnPut: func(key []byte, value []byte) { 738 s.accountBytes += common.StorageSize(len(key) + len(value)) 739 }, 740 } 741 task.genTrie = trie.NewStackTrie(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 742 rawdb.WriteTrieNode(task.genBatch, owner, path, hash, val, s.scheme) 743 }) 744 for accountHash, subtasks := range task.SubTasks { 745 for _, subtask := range subtasks { 746 subtask := subtask // closure for subtask.genBatch in the stacktrie writer callback 747 748 subtask.genBatch = zonddb.HookedBatch{ 749 Batch: s.db.NewBatch(), 750 OnPut: func(key []byte, value []byte) { 751 s.storageBytes += common.StorageSize(len(key) + len(value)) 752 }, 753 } 754 subtask.genTrie = trie.NewStackTrieWithOwner(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 755 rawdb.WriteTrieNode(subtask.genBatch, owner, path, hash, val, s.scheme) 756 }, accountHash) 757 } 758 } 759 } 760 s.lock.Lock() 761 defer s.lock.Unlock() 762 763 s.snapped = len(s.tasks) == 0 764 765 s.accountSynced = progress.AccountSynced 766 s.accountBytes = progress.AccountBytes 767 s.bytecodeSynced = progress.BytecodeSynced 768 s.bytecodeBytes = progress.BytecodeBytes 769 s.storageSynced = progress.StorageSynced 770 s.storageBytes = progress.StorageBytes 771 772 s.trienodeHealSynced = progress.TrienodeHealSynced 773 s.trienodeHealBytes = progress.TrienodeHealBytes 774 s.bytecodeHealSynced = progress.BytecodeHealSynced 775 s.bytecodeHealBytes = progress.BytecodeHealBytes 776 return 777 } 778 } 779 // Either we've failed to decode the previous state, or there was none. 780 // Start a fresh sync by chunking up the account range and scheduling 781 // them for retrieval. 782 s.tasks = nil 783 s.accountSynced, s.accountBytes = 0, 0 784 s.bytecodeSynced, s.bytecodeBytes = 0, 0 785 s.storageSynced, s.storageBytes = 0, 0 786 s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 787 s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 788 789 var next common.Hash 790 step := new(big.Int).Sub( 791 new(big.Int).Div( 792 new(big.Int).Exp(common.Big2, common.Big256, nil), 793 big.NewInt(int64(accountConcurrency)), 794 ), common.Big1, 795 ) 796 for i := 0; i < accountConcurrency; i++ { 797 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 798 if i == accountConcurrency-1 { 799 // Make sure we don't overflow if the step is not a proper divisor 800 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 801 } 802 batch := zonddb.HookedBatch{ 803 Batch: s.db.NewBatch(), 804 OnPut: func(key []byte, value []byte) { 805 s.accountBytes += common.StorageSize(len(key) + len(value)) 806 }, 807 } 808 s.tasks = append(s.tasks, &accountTask{ 809 Next: next, 810 Last: last, 811 SubTasks: make(map[common.Hash][]*storageTask), 812 genBatch: batch, 813 genTrie: trie.NewStackTrie(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 814 rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) 815 }), 816 }) 817 log.Debug("Created account sync task", "from", next, "last", last) 818 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 819 } 820 } 821 822 // saveSyncStatus marshals the remaining sync tasks into leveldb. 823 func (s *Syncer) saveSyncStatus() { 824 // Serialize any partial progress to disk before spinning down 825 for _, task := range s.tasks { 826 if err := task.genBatch.Write(); err != nil { 827 log.Error("Failed to persist account slots", "err", err) 828 } 829 for _, subtasks := range task.SubTasks { 830 for _, subtask := range subtasks { 831 if err := subtask.genBatch.Write(); err != nil { 832 log.Error("Failed to persist storage slots", "err", err) 833 } 834 } 835 } 836 } 837 // Store the actual progress markers 838 progress := &SyncProgress{ 839 Tasks: s.tasks, 840 AccountSynced: s.accountSynced, 841 AccountBytes: s.accountBytes, 842 BytecodeSynced: s.bytecodeSynced, 843 BytecodeBytes: s.bytecodeBytes, 844 StorageSynced: s.storageSynced, 845 StorageBytes: s.storageBytes, 846 TrienodeHealSynced: s.trienodeHealSynced, 847 TrienodeHealBytes: s.trienodeHealBytes, 848 BytecodeHealSynced: s.bytecodeHealSynced, 849 BytecodeHealBytes: s.bytecodeHealBytes, 850 } 851 status, err := json.Marshal(progress) 852 if err != nil { 853 panic(err) // This can only fail during implementation 854 } 855 rawdb.WriteSnapshotSyncStatus(s.db, status) 856 } 857 858 // Progress returns the snap sync status statistics. 859 func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { 860 s.lock.Lock() 861 defer s.lock.Unlock() 862 pending := new(SyncPending) 863 if s.healer != nil { 864 pending.TrienodeHeal = uint64(len(s.healer.trieTasks)) 865 pending.BytecodeHeal = uint64(len(s.healer.codeTasks)) 866 } 867 return s.extProgress, pending 868 } 869 870 // cleanAccountTasks removes account range retrieval tasks that have already been 871 // completed. 872 func (s *Syncer) cleanAccountTasks() { 873 // If the sync was already done before, don't even bother 874 if len(s.tasks) == 0 { 875 return 876 } 877 // Sync wasn't finished previously, check for any task that can be finalized 878 for i := 0; i < len(s.tasks); i++ { 879 if s.tasks[i].done { 880 s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) 881 i-- 882 } 883 } 884 // If everything was just finalized just, generate the account trie and start heal 885 if len(s.tasks) == 0 { 886 s.lock.Lock() 887 s.snapped = true 888 s.lock.Unlock() 889 890 // Push the final sync report 891 s.reportSyncProgress(true) 892 } 893 } 894 895 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks 896 // within, cleaning any that have been completed. 897 func (s *Syncer) cleanStorageTasks() { 898 for _, task := range s.tasks { 899 for account, subtasks := range task.SubTasks { 900 // Remove storage range retrieval tasks that completed 901 for j := 0; j < len(subtasks); j++ { 902 if subtasks[j].done { 903 subtasks = append(subtasks[:j], subtasks[j+1:]...) 904 j-- 905 } 906 } 907 if len(subtasks) > 0 { 908 task.SubTasks[account] = subtasks 909 continue 910 } 911 // If all storage chunks are done, mark the account as done too 912 for j, hash := range task.res.hashes { 913 if hash == account { 914 task.needState[j] = false 915 } 916 } 917 delete(task.SubTasks, account) 918 task.pend-- 919 920 // If this was the last pending task, forward the account task 921 if task.pend == 0 { 922 s.forwardAccountTask(task) 923 } 924 } 925 } 926 } 927 928 // assignAccountTasks attempts to match idle peers to pending account range 929 // retrievals. 930 func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) { 931 s.lock.Lock() 932 defer s.lock.Unlock() 933 934 // Sort the peers by download capacity to use faster ones if many available 935 idlers := &capacitySort{ 936 ids: make([]string, 0, len(s.accountIdlers)), 937 caps: make([]int, 0, len(s.accountIdlers)), 938 } 939 targetTTL := s.rates.TargetTimeout() 940 for id := range s.accountIdlers { 941 if _, ok := s.statelessPeers[id]; ok { 942 continue 943 } 944 idlers.ids = append(idlers.ids, id) 945 idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL)) 946 } 947 if len(idlers.ids) == 0 { 948 return 949 } 950 sort.Sort(sort.Reverse(idlers)) 951 952 // Iterate over all the tasks and try to find a pending one 953 for _, task := range s.tasks { 954 // Skip any tasks already filling 955 if task.req != nil || task.res != nil { 956 continue 957 } 958 // Task pending retrieval, try to find an idle peer. If no such peer 959 // exists, we probably assigned tasks for all (or they are stateless). 960 // Abort the entire assignment mechanism. 961 if len(idlers.ids) == 0 { 962 return 963 } 964 var ( 965 idle = idlers.ids[0] 966 peer = s.peers[idle] 967 cap = idlers.caps[0] 968 ) 969 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 970 971 // Matched a pending task to an idle peer, allocate a unique request id 972 var reqid uint64 973 for { 974 reqid = uint64(rand.Int63()) 975 if reqid == 0 { 976 continue 977 } 978 if _, ok := s.accountReqs[reqid]; ok { 979 continue 980 } 981 break 982 } 983 // Generate the network query and send it to the peer 984 req := &accountRequest{ 985 peer: idle, 986 id: reqid, 987 time: time.Now(), 988 deliver: success, 989 revert: fail, 990 cancel: cancel, 991 stale: make(chan struct{}), 992 origin: task.Next, 993 limit: task.Last, 994 task: task, 995 } 996 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 997 peer.Log().Debug("Account range request timed out", "reqid", reqid) 998 s.rates.Update(idle, AccountRangeMsg, 0, 0) 999 s.scheduleRevertAccountRequest(req) 1000 }) 1001 s.accountReqs[reqid] = req 1002 delete(s.accountIdlers, idle) 1003 1004 s.pend.Add(1) 1005 go func(root common.Hash) { 1006 defer s.pend.Done() 1007 1008 // Attempt to send the remote request and revert if it fails 1009 if cap > maxRequestSize { 1010 cap = maxRequestSize 1011 } 1012 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1013 cap = minRequestSize 1014 } 1015 if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil { 1016 peer.Log().Debug("Failed to request account range", "err", err) 1017 s.scheduleRevertAccountRequest(req) 1018 } 1019 }(s.root) 1020 1021 // Inject the request into the task to block further assignments 1022 task.req = req 1023 } 1024 } 1025 1026 // assignBytecodeTasks attempts to match idle peers to pending code retrievals. 1027 func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) { 1028 s.lock.Lock() 1029 defer s.lock.Unlock() 1030 1031 // Sort the peers by download capacity to use faster ones if many available 1032 idlers := &capacitySort{ 1033 ids: make([]string, 0, len(s.bytecodeIdlers)), 1034 caps: make([]int, 0, len(s.bytecodeIdlers)), 1035 } 1036 targetTTL := s.rates.TargetTimeout() 1037 for id := range s.bytecodeIdlers { 1038 if _, ok := s.statelessPeers[id]; ok { 1039 continue 1040 } 1041 idlers.ids = append(idlers.ids, id) 1042 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1043 } 1044 if len(idlers.ids) == 0 { 1045 return 1046 } 1047 sort.Sort(sort.Reverse(idlers)) 1048 1049 // Iterate over all the tasks and try to find a pending one 1050 for _, task := range s.tasks { 1051 // Skip any tasks not in the bytecode retrieval phase 1052 if task.res == nil { 1053 continue 1054 } 1055 // Skip tasks that are already retrieving (or done with) all codes 1056 if len(task.codeTasks) == 0 { 1057 continue 1058 } 1059 // Task pending retrieval, try to find an idle peer. If no such peer 1060 // exists, we probably assigned tasks for all (or they are stateless). 1061 // Abort the entire assignment mechanism. 1062 if len(idlers.ids) == 0 { 1063 return 1064 } 1065 var ( 1066 idle = idlers.ids[0] 1067 peer = s.peers[idle] 1068 cap = idlers.caps[0] 1069 ) 1070 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1071 1072 // Matched a pending task to an idle peer, allocate a unique request id 1073 var reqid uint64 1074 for { 1075 reqid = uint64(rand.Int63()) 1076 if reqid == 0 { 1077 continue 1078 } 1079 if _, ok := s.bytecodeReqs[reqid]; ok { 1080 continue 1081 } 1082 break 1083 } 1084 // Generate the network query and send it to the peer 1085 if cap > maxCodeRequestCount { 1086 cap = maxCodeRequestCount 1087 } 1088 hashes := make([]common.Hash, 0, cap) 1089 for hash := range task.codeTasks { 1090 delete(task.codeTasks, hash) 1091 hashes = append(hashes, hash) 1092 if len(hashes) >= cap { 1093 break 1094 } 1095 } 1096 req := &bytecodeRequest{ 1097 peer: idle, 1098 id: reqid, 1099 time: time.Now(), 1100 deliver: success, 1101 revert: fail, 1102 cancel: cancel, 1103 stale: make(chan struct{}), 1104 hashes: hashes, 1105 task: task, 1106 } 1107 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1108 peer.Log().Debug("Bytecode request timed out", "reqid", reqid) 1109 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1110 s.scheduleRevertBytecodeRequest(req) 1111 }) 1112 s.bytecodeReqs[reqid] = req 1113 delete(s.bytecodeIdlers, idle) 1114 1115 s.pend.Add(1) 1116 go func() { 1117 defer s.pend.Done() 1118 1119 // Attempt to send the remote request and revert if it fails 1120 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1121 log.Debug("Failed to request bytecodes", "err", err) 1122 s.scheduleRevertBytecodeRequest(req) 1123 } 1124 }() 1125 } 1126 } 1127 1128 // assignStorageTasks attempts to match idle peers to pending storage range 1129 // retrievals. 1130 func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) { 1131 s.lock.Lock() 1132 defer s.lock.Unlock() 1133 1134 // Sort the peers by download capacity to use faster ones if many available 1135 idlers := &capacitySort{ 1136 ids: make([]string, 0, len(s.storageIdlers)), 1137 caps: make([]int, 0, len(s.storageIdlers)), 1138 } 1139 targetTTL := s.rates.TargetTimeout() 1140 for id := range s.storageIdlers { 1141 if _, ok := s.statelessPeers[id]; ok { 1142 continue 1143 } 1144 idlers.ids = append(idlers.ids, id) 1145 idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL)) 1146 } 1147 if len(idlers.ids) == 0 { 1148 return 1149 } 1150 sort.Sort(sort.Reverse(idlers)) 1151 1152 // Iterate over all the tasks and try to find a pending one 1153 for _, task := range s.tasks { 1154 // Skip any tasks not in the storage retrieval phase 1155 if task.res == nil { 1156 continue 1157 } 1158 // Skip tasks that are already retrieving (or done with) all small states 1159 if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { 1160 continue 1161 } 1162 // Task pending retrieval, try to find an idle peer. If no such peer 1163 // exists, we probably assigned tasks for all (or they are stateless). 1164 // Abort the entire assignment mechanism. 1165 if len(idlers.ids) == 0 { 1166 return 1167 } 1168 var ( 1169 idle = idlers.ids[0] 1170 peer = s.peers[idle] 1171 cap = idlers.caps[0] 1172 ) 1173 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1174 1175 // Matched a pending task to an idle peer, allocate a unique request id 1176 var reqid uint64 1177 for { 1178 reqid = uint64(rand.Int63()) 1179 if reqid == 0 { 1180 continue 1181 } 1182 if _, ok := s.storageReqs[reqid]; ok { 1183 continue 1184 } 1185 break 1186 } 1187 // Generate the network query and send it to the peer. If there are 1188 // large contract tasks pending, complete those before diving into 1189 // even more new contracts. 1190 if cap > maxRequestSize { 1191 cap = maxRequestSize 1192 } 1193 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1194 cap = minRequestSize 1195 } 1196 storageSets := cap / 1024 1197 1198 var ( 1199 accounts = make([]common.Hash, 0, storageSets) 1200 roots = make([]common.Hash, 0, storageSets) 1201 subtask *storageTask 1202 ) 1203 for account, subtasks := range task.SubTasks { 1204 for _, st := range subtasks { 1205 // Skip any subtasks already filling 1206 if st.req != nil { 1207 continue 1208 } 1209 // Found an incomplete storage chunk, schedule it 1210 accounts = append(accounts, account) 1211 roots = append(roots, st.root) 1212 subtask = st 1213 break // Large contract chunks are downloaded individually 1214 } 1215 if subtask != nil { 1216 break // Large contract chunks are downloaded individually 1217 } 1218 } 1219 if subtask == nil { 1220 // No large contract required retrieval, but small ones available 1221 for account, root := range task.stateTasks { 1222 delete(task.stateTasks, account) 1223 1224 accounts = append(accounts, account) 1225 roots = append(roots, root) 1226 1227 if len(accounts) >= storageSets { 1228 break 1229 } 1230 } 1231 } 1232 // If nothing was found, it means this task is actually already fully 1233 // retrieving, but large contracts are hard to detect. Skip to the next. 1234 if len(accounts) == 0 { 1235 continue 1236 } 1237 req := &storageRequest{ 1238 peer: idle, 1239 id: reqid, 1240 time: time.Now(), 1241 deliver: success, 1242 revert: fail, 1243 cancel: cancel, 1244 stale: make(chan struct{}), 1245 accounts: accounts, 1246 roots: roots, 1247 mainTask: task, 1248 subTask: subtask, 1249 } 1250 if subtask != nil { 1251 req.origin = subtask.Next 1252 req.limit = subtask.Last 1253 } 1254 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1255 peer.Log().Debug("Storage request timed out", "reqid", reqid) 1256 s.rates.Update(idle, StorageRangesMsg, 0, 0) 1257 s.scheduleRevertStorageRequest(req) 1258 }) 1259 s.storageReqs[reqid] = req 1260 delete(s.storageIdlers, idle) 1261 1262 s.pend.Add(1) 1263 go func(root common.Hash) { 1264 defer s.pend.Done() 1265 1266 // Attempt to send the remote request and revert if it fails 1267 var origin, limit []byte 1268 if subtask != nil { 1269 origin, limit = req.origin[:], req.limit[:] 1270 } 1271 if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil { 1272 log.Debug("Failed to request storage", "err", err) 1273 s.scheduleRevertStorageRequest(req) 1274 } 1275 }(s.root) 1276 1277 // Inject the request into the subtask to block further assignments 1278 if subtask != nil { 1279 subtask.req = req 1280 } 1281 } 1282 } 1283 1284 // assignTrienodeHealTasks attempts to match idle peers to trie node requests to 1285 // heal any trie errors caused by the snap sync's chunked retrieval model. 1286 func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { 1287 s.lock.Lock() 1288 defer s.lock.Unlock() 1289 1290 // Sort the peers by download capacity to use faster ones if many available 1291 idlers := &capacitySort{ 1292 ids: make([]string, 0, len(s.trienodeHealIdlers)), 1293 caps: make([]int, 0, len(s.trienodeHealIdlers)), 1294 } 1295 targetTTL := s.rates.TargetTimeout() 1296 for id := range s.trienodeHealIdlers { 1297 if _, ok := s.statelessPeers[id]; ok { 1298 continue 1299 } 1300 idlers.ids = append(idlers.ids, id) 1301 idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL)) 1302 } 1303 if len(idlers.ids) == 0 { 1304 return 1305 } 1306 sort.Sort(sort.Reverse(idlers)) 1307 1308 // Iterate over pending tasks and try to find a peer to retrieve with 1309 for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1310 // If there are not enough trie tasks queued to fully assign, fill the 1311 // queue from the state sync scheduler. The trie synced schedules these 1312 // together with bytecodes, so we need to queue them combined. 1313 var ( 1314 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1315 want = maxTrieRequestCount + maxCodeRequestCount 1316 ) 1317 if have < want { 1318 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1319 for i, path := range paths { 1320 s.healer.trieTasks[path] = hashes[i] 1321 } 1322 for _, hash := range codes { 1323 s.healer.codeTasks[hash] = struct{}{} 1324 } 1325 } 1326 // If all the heal tasks are bytecodes or already downloading, bail 1327 if len(s.healer.trieTasks) == 0 { 1328 return 1329 } 1330 // Task pending retrieval, try to find an idle peer. If no such peer 1331 // exists, we probably assigned tasks for all (or they are stateless). 1332 // Abort the entire assignment mechanism. 1333 if len(idlers.ids) == 0 { 1334 return 1335 } 1336 var ( 1337 idle = idlers.ids[0] 1338 peer = s.peers[idle] 1339 cap = idlers.caps[0] 1340 ) 1341 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1342 1343 // Matched a pending task to an idle peer, allocate a unique request id 1344 var reqid uint64 1345 for { 1346 reqid = uint64(rand.Int63()) 1347 if reqid == 0 { 1348 continue 1349 } 1350 if _, ok := s.trienodeHealReqs[reqid]; ok { 1351 continue 1352 } 1353 break 1354 } 1355 // Generate the network query and send it to the peer 1356 if cap > maxTrieRequestCount { 1357 cap = maxTrieRequestCount 1358 } 1359 cap = int(float64(cap) / s.trienodeHealThrottle) 1360 if cap <= 0 { 1361 cap = 1 1362 } 1363 var ( 1364 hashes = make([]common.Hash, 0, cap) 1365 paths = make([]string, 0, cap) 1366 pathsets = make([]TrieNodePathSet, 0, cap) 1367 ) 1368 for path, hash := range s.healer.trieTasks { 1369 delete(s.healer.trieTasks, path) 1370 1371 paths = append(paths, path) 1372 hashes = append(hashes, hash) 1373 if len(paths) >= cap { 1374 break 1375 } 1376 } 1377 // Group requests by account hash 1378 paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) 1379 req := &trienodeHealRequest{ 1380 peer: idle, 1381 id: reqid, 1382 time: time.Now(), 1383 deliver: success, 1384 revert: fail, 1385 cancel: cancel, 1386 stale: make(chan struct{}), 1387 paths: paths, 1388 hashes: hashes, 1389 task: s.healer, 1390 } 1391 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1392 peer.Log().Debug("Trienode heal request timed out", "reqid", reqid) 1393 s.rates.Update(idle, TrieNodesMsg, 0, 0) 1394 s.scheduleRevertTrienodeHealRequest(req) 1395 }) 1396 s.trienodeHealReqs[reqid] = req 1397 delete(s.trienodeHealIdlers, idle) 1398 1399 s.pend.Add(1) 1400 go func(root common.Hash) { 1401 defer s.pend.Done() 1402 1403 // Attempt to send the remote request and revert if it fails 1404 if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil { 1405 log.Debug("Failed to request trienode healers", "err", err) 1406 s.scheduleRevertTrienodeHealRequest(req) 1407 } 1408 }(s.root) 1409 } 1410 } 1411 1412 // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to 1413 // heal any trie errors caused by the snap sync's chunked retrieval model. 1414 func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { 1415 s.lock.Lock() 1416 defer s.lock.Unlock() 1417 1418 // Sort the peers by download capacity to use faster ones if many available 1419 idlers := &capacitySort{ 1420 ids: make([]string, 0, len(s.bytecodeHealIdlers)), 1421 caps: make([]int, 0, len(s.bytecodeHealIdlers)), 1422 } 1423 targetTTL := s.rates.TargetTimeout() 1424 for id := range s.bytecodeHealIdlers { 1425 if _, ok := s.statelessPeers[id]; ok { 1426 continue 1427 } 1428 idlers.ids = append(idlers.ids, id) 1429 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1430 } 1431 if len(idlers.ids) == 0 { 1432 return 1433 } 1434 sort.Sort(sort.Reverse(idlers)) 1435 1436 // Iterate over pending tasks and try to find a peer to retrieve with 1437 for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1438 // If there are not enough trie tasks queued to fully assign, fill the 1439 // queue from the state sync scheduler. The trie synced schedules these 1440 // together with trie nodes, so we need to queue them combined. 1441 var ( 1442 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1443 want = maxTrieRequestCount + maxCodeRequestCount 1444 ) 1445 if have < want { 1446 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1447 for i, path := range paths { 1448 s.healer.trieTasks[path] = hashes[i] 1449 } 1450 for _, hash := range codes { 1451 s.healer.codeTasks[hash] = struct{}{} 1452 } 1453 } 1454 // If all the heal tasks are trienodes or already downloading, bail 1455 if len(s.healer.codeTasks) == 0 { 1456 return 1457 } 1458 // Task pending retrieval, try to find an idle peer. If no such peer 1459 // exists, we probably assigned tasks for all (or they are stateless). 1460 // Abort the entire assignment mechanism. 1461 if len(idlers.ids) == 0 { 1462 return 1463 } 1464 var ( 1465 idle = idlers.ids[0] 1466 peer = s.peers[idle] 1467 cap = idlers.caps[0] 1468 ) 1469 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1470 1471 // Matched a pending task to an idle peer, allocate a unique request id 1472 var reqid uint64 1473 for { 1474 reqid = uint64(rand.Int63()) 1475 if reqid == 0 { 1476 continue 1477 } 1478 if _, ok := s.bytecodeHealReqs[reqid]; ok { 1479 continue 1480 } 1481 break 1482 } 1483 // Generate the network query and send it to the peer 1484 if cap > maxCodeRequestCount { 1485 cap = maxCodeRequestCount 1486 } 1487 hashes := make([]common.Hash, 0, cap) 1488 for hash := range s.healer.codeTasks { 1489 delete(s.healer.codeTasks, hash) 1490 1491 hashes = append(hashes, hash) 1492 if len(hashes) >= cap { 1493 break 1494 } 1495 } 1496 req := &bytecodeHealRequest{ 1497 peer: idle, 1498 id: reqid, 1499 time: time.Now(), 1500 deliver: success, 1501 revert: fail, 1502 cancel: cancel, 1503 stale: make(chan struct{}), 1504 hashes: hashes, 1505 task: s.healer, 1506 } 1507 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1508 peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid) 1509 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1510 s.scheduleRevertBytecodeHealRequest(req) 1511 }) 1512 s.bytecodeHealReqs[reqid] = req 1513 delete(s.bytecodeHealIdlers, idle) 1514 1515 s.pend.Add(1) 1516 go func() { 1517 defer s.pend.Done() 1518 1519 // Attempt to send the remote request and revert if it fails 1520 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1521 log.Debug("Failed to request bytecode healers", "err", err) 1522 s.scheduleRevertBytecodeHealRequest(req) 1523 } 1524 }() 1525 } 1526 } 1527 1528 // revertRequests locates all the currently pending requests from a particular 1529 // peer and reverts them, rescheduling for others to fulfill. 1530 func (s *Syncer) revertRequests(peer string) { 1531 // Gather the requests first, revertals need the lock too 1532 s.lock.Lock() 1533 var accountReqs []*accountRequest 1534 for _, req := range s.accountReqs { 1535 if req.peer == peer { 1536 accountReqs = append(accountReqs, req) 1537 } 1538 } 1539 var bytecodeReqs []*bytecodeRequest 1540 for _, req := range s.bytecodeReqs { 1541 if req.peer == peer { 1542 bytecodeReqs = append(bytecodeReqs, req) 1543 } 1544 } 1545 var storageReqs []*storageRequest 1546 for _, req := range s.storageReqs { 1547 if req.peer == peer { 1548 storageReqs = append(storageReqs, req) 1549 } 1550 } 1551 var trienodeHealReqs []*trienodeHealRequest 1552 for _, req := range s.trienodeHealReqs { 1553 if req.peer == peer { 1554 trienodeHealReqs = append(trienodeHealReqs, req) 1555 } 1556 } 1557 var bytecodeHealReqs []*bytecodeHealRequest 1558 for _, req := range s.bytecodeHealReqs { 1559 if req.peer == peer { 1560 bytecodeHealReqs = append(bytecodeHealReqs, req) 1561 } 1562 } 1563 s.lock.Unlock() 1564 1565 // Revert all the requests matching the peer 1566 for _, req := range accountReqs { 1567 s.revertAccountRequest(req) 1568 } 1569 for _, req := range bytecodeReqs { 1570 s.revertBytecodeRequest(req) 1571 } 1572 for _, req := range storageReqs { 1573 s.revertStorageRequest(req) 1574 } 1575 for _, req := range trienodeHealReqs { 1576 s.revertTrienodeHealRequest(req) 1577 } 1578 for _, req := range bytecodeHealReqs { 1579 s.revertBytecodeHealRequest(req) 1580 } 1581 } 1582 1583 // scheduleRevertAccountRequest asks the event loop to clean up an account range 1584 // request and return all failed retrieval tasks to the scheduler for reassignment. 1585 func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { 1586 select { 1587 case req.revert <- req: 1588 // Sync event loop notified 1589 case <-req.cancel: 1590 // Sync cycle got cancelled 1591 case <-req.stale: 1592 // Request already reverted 1593 } 1594 } 1595 1596 // revertAccountRequest cleans up an account range request and returns all failed 1597 // retrieval tasks to the scheduler for reassignment. 1598 // 1599 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1600 // On peer threads, use scheduleRevertAccountRequest. 1601 func (s *Syncer) revertAccountRequest(req *accountRequest) { 1602 log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) 1603 select { 1604 case <-req.stale: 1605 log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id) 1606 return 1607 default: 1608 } 1609 close(req.stale) 1610 1611 // Remove the request from the tracked set 1612 s.lock.Lock() 1613 delete(s.accountReqs, req.id) 1614 s.lock.Unlock() 1615 1616 // If there's a timeout timer still running, abort it and mark the account 1617 // task as not-pending, ready for rescheduling 1618 req.timeout.Stop() 1619 if req.task.req == req { 1620 req.task.req = nil 1621 } 1622 } 1623 1624 // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request 1625 // and return all failed retrieval tasks to the scheduler for reassignment. 1626 func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { 1627 select { 1628 case req.revert <- req: 1629 // Sync event loop notified 1630 case <-req.cancel: 1631 // Sync cycle got cancelled 1632 case <-req.stale: 1633 // Request already reverted 1634 } 1635 } 1636 1637 // revertBytecodeRequest cleans up a bytecode request and returns all failed 1638 // retrieval tasks to the scheduler for reassignment. 1639 // 1640 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1641 // On peer threads, use scheduleRevertBytecodeRequest. 1642 func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { 1643 log.Debug("Reverting bytecode request", "peer", req.peer) 1644 select { 1645 case <-req.stale: 1646 log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id) 1647 return 1648 default: 1649 } 1650 close(req.stale) 1651 1652 // Remove the request from the tracked set 1653 s.lock.Lock() 1654 delete(s.bytecodeReqs, req.id) 1655 s.lock.Unlock() 1656 1657 // If there's a timeout timer still running, abort it and mark the code 1658 // retrievals as not-pending, ready for rescheduling 1659 req.timeout.Stop() 1660 for _, hash := range req.hashes { 1661 req.task.codeTasks[hash] = struct{}{} 1662 } 1663 } 1664 1665 // scheduleRevertStorageRequest asks the event loop to clean up a storage range 1666 // request and return all failed retrieval tasks to the scheduler for reassignment. 1667 func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { 1668 select { 1669 case req.revert <- req: 1670 // Sync event loop notified 1671 case <-req.cancel: 1672 // Sync cycle got cancelled 1673 case <-req.stale: 1674 // Request already reverted 1675 } 1676 } 1677 1678 // revertStorageRequest cleans up a storage range request and returns all failed 1679 // retrieval tasks to the scheduler for reassignment. 1680 // 1681 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1682 // On peer threads, use scheduleRevertStorageRequest. 1683 func (s *Syncer) revertStorageRequest(req *storageRequest) { 1684 log.Debug("Reverting storage request", "peer", req.peer) 1685 select { 1686 case <-req.stale: 1687 log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id) 1688 return 1689 default: 1690 } 1691 close(req.stale) 1692 1693 // Remove the request from the tracked set 1694 s.lock.Lock() 1695 delete(s.storageReqs, req.id) 1696 s.lock.Unlock() 1697 1698 // If there's a timeout timer still running, abort it and mark the storage 1699 // task as not-pending, ready for rescheduling 1700 req.timeout.Stop() 1701 if req.subTask != nil { 1702 req.subTask.req = nil 1703 } else { 1704 for i, account := range req.accounts { 1705 req.mainTask.stateTasks[account] = req.roots[i] 1706 } 1707 } 1708 } 1709 1710 // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal 1711 // request and return all failed retrieval tasks to the scheduler for reassignment. 1712 func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { 1713 select { 1714 case req.revert <- req: 1715 // Sync event loop notified 1716 case <-req.cancel: 1717 // Sync cycle got cancelled 1718 case <-req.stale: 1719 // Request already reverted 1720 } 1721 } 1722 1723 // revertTrienodeHealRequest cleans up a trienode heal request and returns all 1724 // failed retrieval tasks to the scheduler for reassignment. 1725 // 1726 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1727 // On peer threads, use scheduleRevertTrienodeHealRequest. 1728 func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { 1729 log.Debug("Reverting trienode heal request", "peer", req.peer) 1730 select { 1731 case <-req.stale: 1732 log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) 1733 return 1734 default: 1735 } 1736 close(req.stale) 1737 1738 // Remove the request from the tracked set 1739 s.lock.Lock() 1740 delete(s.trienodeHealReqs, req.id) 1741 s.lock.Unlock() 1742 1743 // If there's a timeout timer still running, abort it and mark the trie node 1744 // retrievals as not-pending, ready for rescheduling 1745 req.timeout.Stop() 1746 for i, path := range req.paths { 1747 req.task.trieTasks[path] = req.hashes[i] 1748 } 1749 } 1750 1751 // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal 1752 // request and return all failed retrieval tasks to the scheduler for reassignment. 1753 func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { 1754 select { 1755 case req.revert <- req: 1756 // Sync event loop notified 1757 case <-req.cancel: 1758 // Sync cycle got cancelled 1759 case <-req.stale: 1760 // Request already reverted 1761 } 1762 } 1763 1764 // revertBytecodeHealRequest cleans up a bytecode heal request and returns all 1765 // failed retrieval tasks to the scheduler for reassignment. 1766 // 1767 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1768 // On peer threads, use scheduleRevertBytecodeHealRequest. 1769 func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { 1770 log.Debug("Reverting bytecode heal request", "peer", req.peer) 1771 select { 1772 case <-req.stale: 1773 log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) 1774 return 1775 default: 1776 } 1777 close(req.stale) 1778 1779 // Remove the request from the tracked set 1780 s.lock.Lock() 1781 delete(s.bytecodeHealReqs, req.id) 1782 s.lock.Unlock() 1783 1784 // If there's a timeout timer still running, abort it and mark the code 1785 // retrievals as not-pending, ready for rescheduling 1786 req.timeout.Stop() 1787 for _, hash := range req.hashes { 1788 req.task.codeTasks[hash] = struct{}{} 1789 } 1790 } 1791 1792 // processAccountResponse integrates an already validated account range response 1793 // into the account tasks. 1794 func (s *Syncer) processAccountResponse(res *accountResponse) { 1795 // Switch the task from pending to filling 1796 res.task.req = nil 1797 res.task.res = res 1798 1799 // Ensure that the response doesn't overflow into the subsequent task 1800 last := res.task.Last.Big() 1801 for i, hash := range res.hashes { 1802 // Mark the range complete if the last is already included. 1803 // Keep iteration to delete the extra states if exists. 1804 cmp := hash.Big().Cmp(last) 1805 if cmp == 0 { 1806 res.cont = false 1807 continue 1808 } 1809 if cmp > 0 { 1810 // Chunk overflown, cut off excess 1811 res.hashes = res.hashes[:i] 1812 res.accounts = res.accounts[:i] 1813 res.cont = false // Mark range completed 1814 break 1815 } 1816 } 1817 // Iterate over all the accounts and assemble which ones need further sub- 1818 // filling before the entire account range can be persisted. 1819 res.task.needCode = make([]bool, len(res.accounts)) 1820 res.task.needState = make([]bool, len(res.accounts)) 1821 res.task.needHeal = make([]bool, len(res.accounts)) 1822 1823 res.task.codeTasks = make(map[common.Hash]struct{}) 1824 res.task.stateTasks = make(map[common.Hash]common.Hash) 1825 1826 resumed := make(map[common.Hash]struct{}) 1827 1828 res.task.pend = 0 1829 for i, account := range res.accounts { 1830 // Check if the account is a contract with an unknown code 1831 if !bytes.Equal(account.CodeHash, types.EmptyCodeHash.Bytes()) { 1832 if !rawdb.HasCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)) { 1833 res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} 1834 res.task.needCode[i] = true 1835 res.task.pend++ 1836 } 1837 } 1838 // Check if the account is a contract with an unknown storage trie 1839 if account.Root != types.EmptyRootHash { 1840 if !rawdb.HasTrieNode(s.db, res.hashes[i], nil, account.Root, s.scheme) { 1841 // If there was a previous large state retrieval in progress, 1842 // don't restart it from scratch. This happens if a sync cycle 1843 // is interrupted and resumed later. However, *do* update the 1844 // previous root hash. 1845 if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok { 1846 log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root) 1847 for _, subtask := range subtasks { 1848 subtask.root = account.Root 1849 } 1850 res.task.needHeal[i] = true 1851 resumed[res.hashes[i]] = struct{}{} 1852 } else { 1853 res.task.stateTasks[res.hashes[i]] = account.Root 1854 } 1855 res.task.needState[i] = true 1856 res.task.pend++ 1857 } 1858 } 1859 } 1860 // Delete any subtasks that have been aborted but not resumed. This may undo 1861 // some progress if a new peer gives us less accounts than an old one, but for 1862 // now we have to live with that. 1863 for hash := range res.task.SubTasks { 1864 if _, ok := resumed[hash]; !ok { 1865 log.Debug("Aborting suspended storage retrieval", "account", hash) 1866 delete(res.task.SubTasks, hash) 1867 } 1868 } 1869 // If the account range contained no contracts, or all have been fully filled 1870 // beforehand, short circuit storage filling and forward to the next task 1871 if res.task.pend == 0 { 1872 s.forwardAccountTask(res.task) 1873 return 1874 } 1875 // Some accounts are incomplete, leave as is for the storage and contract 1876 // task assigners to pick up and fill. 1877 } 1878 1879 // processBytecodeResponse integrates an already validated bytecode response 1880 // into the account tasks. 1881 func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { 1882 batch := s.db.NewBatch() 1883 1884 var ( 1885 codes uint64 1886 ) 1887 for i, hash := range res.hashes { 1888 code := res.codes[i] 1889 1890 // If the bytecode was not delivered, reschedule it 1891 if code == nil { 1892 res.task.codeTasks[hash] = struct{}{} 1893 continue 1894 } 1895 // Code was delivered, mark it not needed any more 1896 for j, account := range res.task.res.accounts { 1897 if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { 1898 res.task.needCode[j] = false 1899 res.task.pend-- 1900 } 1901 } 1902 // Push the bytecode into a database batch 1903 codes++ 1904 rawdb.WriteCode(batch, hash, code) 1905 } 1906 bytes := common.StorageSize(batch.ValueSize()) 1907 if err := batch.Write(); err != nil { 1908 log.Crit("Failed to persist bytecodes", "err", err) 1909 } 1910 s.bytecodeSynced += codes 1911 s.bytecodeBytes += bytes 1912 1913 log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes) 1914 1915 // If this delivery completed the last pending task, forward the account task 1916 // to the next chunk 1917 if res.task.pend == 0 { 1918 s.forwardAccountTask(res.task) 1919 return 1920 } 1921 // Some accounts are still incomplete, leave as is for the storage and contract 1922 // task assigners to pick up and fill. 1923 } 1924 1925 // processStorageResponse integrates an already validated storage response 1926 // into the account tasks. 1927 func (s *Syncer) processStorageResponse(res *storageResponse) { 1928 // Switch the subtask from pending to idle 1929 if res.subTask != nil { 1930 res.subTask.req = nil 1931 } 1932 batch := zonddb.HookedBatch{ 1933 Batch: s.db.NewBatch(), 1934 OnPut: func(key []byte, value []byte) { 1935 s.storageBytes += common.StorageSize(len(key) + len(value)) 1936 }, 1937 } 1938 var ( 1939 slots int 1940 oldStorageBytes = s.storageBytes 1941 ) 1942 // Iterate over all the accounts and reconstruct their storage tries from the 1943 // delivered slots 1944 for i, account := range res.accounts { 1945 // If the account was not delivered, reschedule it 1946 if i >= len(res.hashes) { 1947 res.mainTask.stateTasks[account] = res.roots[i] 1948 continue 1949 } 1950 // State was delivered, if complete mark as not needed any more, otherwise 1951 // mark the account as needing healing 1952 for j, hash := range res.mainTask.res.hashes { 1953 if account != hash { 1954 continue 1955 } 1956 acc := res.mainTask.res.accounts[j] 1957 1958 // If the packet contains multiple contract storage slots, all 1959 // but the last are surely complete. The last contract may be 1960 // chunked, so check it's continuation flag. 1961 if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { 1962 res.mainTask.needState[j] = false 1963 res.mainTask.pend-- 1964 } 1965 // If the last contract was chunked, mark it as needing healing 1966 // to avoid writing it out to disk prematurely. 1967 if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { 1968 res.mainTask.needHeal[j] = true 1969 } 1970 // If the last contract was chunked, we need to switch to large 1971 // contract handling mode 1972 if res.subTask == nil && i == len(res.hashes)-1 && res.cont { 1973 // If we haven't yet started a large-contract retrieval, create 1974 // the subtasks for it within the main account task 1975 if tasks, ok := res.mainTask.SubTasks[account]; !ok { 1976 var ( 1977 keys = res.hashes[i] 1978 chunks = uint64(storageConcurrency) 1979 lastKey common.Hash 1980 ) 1981 if len(keys) > 0 { 1982 lastKey = keys[len(keys)-1] 1983 } 1984 // If the number of slots remaining is low, decrease the 1985 // number of chunks. Somewhere on the order of 10-15K slots 1986 // fit into a packet of 500KB. A key/slot pair is maximum 64 1987 // bytes, so pessimistically maxRequestSize/64 = 8K. 1988 // 1989 // Chunk so that at least 2 packets are needed to fill a task. 1990 if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil { 1991 if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks { 1992 chunks = n + 1 1993 } 1994 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks) 1995 } else { 1996 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks) 1997 } 1998 r := newHashRange(lastKey, chunks) 1999 2000 // Our first task is the one that was just filled by this response. 2001 batch := zonddb.HookedBatch{ 2002 Batch: s.db.NewBatch(), 2003 OnPut: func(key []byte, value []byte) { 2004 s.storageBytes += common.StorageSize(len(key) + len(value)) 2005 }, 2006 } 2007 tasks = append(tasks, &storageTask{ 2008 Next: common.Hash{}, 2009 Last: r.End(), 2010 root: acc.Root, 2011 genBatch: batch, 2012 genTrie: trie.NewStackTrieWithOwner(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 2013 rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) 2014 }, account), 2015 }) 2016 for r.Next() { 2017 batch := zonddb.HookedBatch{ 2018 Batch: s.db.NewBatch(), 2019 OnPut: func(key []byte, value []byte) { 2020 s.storageBytes += common.StorageSize(len(key) + len(value)) 2021 }, 2022 } 2023 tasks = append(tasks, &storageTask{ 2024 Next: r.Start(), 2025 Last: r.End(), 2026 root: acc.Root, 2027 genBatch: batch, 2028 genTrie: trie.NewStackTrieWithOwner(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 2029 rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) 2030 }, account), 2031 }) 2032 } 2033 for _, task := range tasks { 2034 log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", task.Next, "last", task.Last) 2035 } 2036 res.mainTask.SubTasks[account] = tasks 2037 2038 // Since we've just created the sub-tasks, this response 2039 // is surely for the first one (zero origin) 2040 res.subTask = tasks[0] 2041 } 2042 } 2043 // If we're in large contract delivery mode, forward the subtask 2044 if res.subTask != nil { 2045 // Ensure the response doesn't overflow into the subsequent task 2046 last := res.subTask.Last.Big() 2047 // Find the first overflowing key. While at it, mark res as complete 2048 // if we find the range to include or pass the 'last' 2049 index := sort.Search(len(res.hashes[i]), func(k int) bool { 2050 cmp := res.hashes[i][k].Big().Cmp(last) 2051 if cmp >= 0 { 2052 res.cont = false 2053 } 2054 return cmp > 0 2055 }) 2056 if index >= 0 { 2057 // cut off excess 2058 res.hashes[i] = res.hashes[i][:index] 2059 res.slots[i] = res.slots[i][:index] 2060 } 2061 // Forward the relevant storage chunk (even if created just now) 2062 if res.cont { 2063 res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1]) 2064 } else { 2065 res.subTask.done = true 2066 } 2067 } 2068 } 2069 // Iterate over all the complete contracts, reconstruct the trie nodes and 2070 // push them to disk. If the contract is chunked, the trie nodes will be 2071 // reconstructed later. 2072 slots += len(res.hashes[i]) 2073 2074 if i < len(res.hashes)-1 || res.subTask == nil { 2075 tr := trie.NewStackTrieWithOwner(func(owner common.Hash, path []byte, hash common.Hash, val []byte) { 2076 rawdb.WriteTrieNode(batch, owner, path, hash, val, s.scheme) 2077 }, account) 2078 for j := 0; j < len(res.hashes[i]); j++ { 2079 tr.Update(res.hashes[i][j][:], res.slots[i][j]) 2080 } 2081 tr.Commit() 2082 } 2083 // Persist the received storage segments. These flat state maybe 2084 // outdated during the sync, but it can be fixed later during the 2085 // snapshot generation. 2086 for j := 0; j < len(res.hashes[i]); j++ { 2087 rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) 2088 2089 // If we're storing large contracts, generate the trie nodes 2090 // on the fly to not trash the gluing points 2091 if i == len(res.hashes)-1 && res.subTask != nil { 2092 res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j]) 2093 } 2094 } 2095 } 2096 // Large contracts could have generated new trie nodes, flush them to disk 2097 if res.subTask != nil { 2098 if res.subTask.done { 2099 if root, err := res.subTask.genTrie.Commit(); err != nil { 2100 log.Error("Failed to commit stack slots", "err", err) 2101 } else if root == res.subTask.root { 2102 // If the chunk's root is an overflown but full delivery, clear the heal request 2103 for i, account := range res.mainTask.res.hashes { 2104 if account == res.accounts[len(res.accounts)-1] { 2105 res.mainTask.needHeal[i] = false 2106 } 2107 } 2108 } 2109 } 2110 if res.subTask.genBatch.ValueSize() > zonddb.IdealBatchSize || res.subTask.done { 2111 if err := res.subTask.genBatch.Write(); err != nil { 2112 log.Error("Failed to persist stack slots", "err", err) 2113 } 2114 res.subTask.genBatch.Reset() 2115 } 2116 } 2117 // Flush anything written just now and update the stats 2118 if err := batch.Write(); err != nil { 2119 log.Crit("Failed to persist storage slots", "err", err) 2120 } 2121 s.storageSynced += uint64(slots) 2122 2123 log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes) 2124 2125 // If this delivery completed the last pending task, forward the account task 2126 // to the next chunk 2127 if res.mainTask.pend == 0 { 2128 s.forwardAccountTask(res.mainTask) 2129 return 2130 } 2131 // Some accounts are still incomplete, leave as is for the storage and contract 2132 // task assigners to pick up and fill. 2133 } 2134 2135 // processTrienodeHealResponse integrates an already validated trienode response 2136 // into the healer tasks. 2137 func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { 2138 var ( 2139 start = time.Now() 2140 fills int 2141 ) 2142 for i, hash := range res.hashes { 2143 node := res.nodes[i] 2144 2145 // If the trie node was not delivered, reschedule it 2146 if node == nil { 2147 res.task.trieTasks[res.paths[i]] = res.hashes[i] 2148 continue 2149 } 2150 fills++ 2151 2152 // Push the trie node into the state syncer 2153 s.trienodeHealSynced++ 2154 s.trienodeHealBytes += common.StorageSize(len(node)) 2155 2156 err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node}) 2157 switch err { 2158 case nil: 2159 case trie.ErrAlreadyProcessed: 2160 s.trienodeHealDups++ 2161 case trie.ErrNotRequested: 2162 s.trienodeHealNops++ 2163 default: 2164 log.Error("Invalid trienode processed", "hash", hash, "err", err) 2165 } 2166 } 2167 s.commitHealer(false) 2168 2169 // Calculate the processing rate of one filled trie node 2170 rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) 2171 2172 // Update the currently measured trienode queueing and processing throughput. 2173 // 2174 // The processing rate needs to be updated uniformly independent if we've 2175 // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in 2176 // the face of varying network packets. As such, we cannot just measure the 2177 // time it took to process N trie nodes and update once, we need one update 2178 // per trie node. 2179 // 2180 // Naively, that would be: 2181 // 2182 // for i:=0; i<fills; i++ { 2183 // healRate = (1-measurementImpact)*oldRate + measurementImpact*newRate 2184 // } 2185 // 2186 // Essentially, a recursive expansion of HR = (1-MI)*HR + MI*NR. 2187 // 2188 // We can expand that formula for the Nth item as: 2189 // HR(N) = (1-MI)^N*OR + (1-MI)^(N-1)*MI*NR + (1-MI)^(N-2)*MI*NR + ... + (1-MI)^0*MI*NR 2190 // 2191 // The above is a geometric sequence that can be summed to: 2192 // HR(N) = (1-MI)^N*(OR-NR) + NR 2193 s.trienodeHealRate = gomath.Pow(1-trienodeHealRateMeasurementImpact, float64(fills))*(s.trienodeHealRate-rate) + rate 2194 2195 pending := s.trienodeHealPend.Load() 2196 if time.Since(s.trienodeHealThrottled) > time.Second { 2197 // Periodically adjust the trie node throttler 2198 if float64(pending) > 2*s.trienodeHealRate { 2199 s.trienodeHealThrottle *= trienodeHealThrottleIncrease 2200 } else { 2201 s.trienodeHealThrottle /= trienodeHealThrottleDecrease 2202 } 2203 if s.trienodeHealThrottle > maxTrienodeHealThrottle { 2204 s.trienodeHealThrottle = maxTrienodeHealThrottle 2205 } else if s.trienodeHealThrottle < minTrienodeHealThrottle { 2206 s.trienodeHealThrottle = minTrienodeHealThrottle 2207 } 2208 s.trienodeHealThrottled = time.Now() 2209 2210 log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) 2211 } 2212 } 2213 2214 func (s *Syncer) commitHealer(force bool) { 2215 if !force && s.healer.scheduler.MemSize() < zonddb.IdealBatchSize { 2216 return 2217 } 2218 batch := s.db.NewBatch() 2219 if err := s.healer.scheduler.Commit(batch); err != nil { 2220 log.Error("Failed to commit healing data", "err", err) 2221 } 2222 if err := batch.Write(); err != nil { 2223 log.Crit("Failed to persist healing data", "err", err) 2224 } 2225 log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) 2226 } 2227 2228 // processBytecodeHealResponse integrates an already validated bytecode response 2229 // into the healer tasks. 2230 func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { 2231 for i, hash := range res.hashes { 2232 node := res.codes[i] 2233 2234 // If the trie node was not delivered, reschedule it 2235 if node == nil { 2236 res.task.codeTasks[hash] = struct{}{} 2237 continue 2238 } 2239 // Push the trie node into the state syncer 2240 s.bytecodeHealSynced++ 2241 s.bytecodeHealBytes += common.StorageSize(len(node)) 2242 2243 err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) 2244 switch err { 2245 case nil: 2246 case trie.ErrAlreadyProcessed: 2247 s.bytecodeHealDups++ 2248 case trie.ErrNotRequested: 2249 s.bytecodeHealNops++ 2250 default: 2251 log.Error("Invalid bytecode processed", "hash", hash, "err", err) 2252 } 2253 } 2254 s.commitHealer(false) 2255 } 2256 2257 // forwardAccountTask takes a filled account task and persists anything available 2258 // into the database, after which it forwards the next account marker so that the 2259 // task's next chunk may be filled. 2260 func (s *Syncer) forwardAccountTask(task *accountTask) { 2261 // Remove any pending delivery 2262 res := task.res 2263 if res == nil { 2264 return // nothing to forward 2265 } 2266 task.res = nil 2267 2268 // Persist the received account segments. These flat state maybe 2269 // outdated during the sync, but it can be fixed later during the 2270 // snapshot generation. 2271 oldAccountBytes := s.accountBytes 2272 2273 batch := zonddb.HookedBatch{ 2274 Batch: s.db.NewBatch(), 2275 OnPut: func(key []byte, value []byte) { 2276 s.accountBytes += common.StorageSize(len(key) + len(value)) 2277 }, 2278 } 2279 for i, hash := range res.hashes { 2280 if task.needCode[i] || task.needState[i] { 2281 break 2282 } 2283 slim := types.SlimAccountRLP(*res.accounts[i]) 2284 rawdb.WriteAccountSnapshot(batch, hash, slim) 2285 2286 // If the task is complete, drop it into the stack trie to generate 2287 // account trie nodes for it 2288 if !task.needHeal[i] { 2289 full, err := types.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted 2290 if err != nil { 2291 panic(err) // Really shouldn't ever happen 2292 } 2293 task.genTrie.Update(hash[:], full) 2294 } 2295 } 2296 // Flush anything written just now and update the stats 2297 if err := batch.Write(); err != nil { 2298 log.Crit("Failed to persist accounts", "err", err) 2299 } 2300 s.accountSynced += uint64(len(res.accounts)) 2301 2302 // Task filling persisted, push it the chunk marker forward to the first 2303 // account still missing data. 2304 for i, hash := range res.hashes { 2305 if task.needCode[i] || task.needState[i] { 2306 return 2307 } 2308 task.Next = incHash(hash) 2309 } 2310 // All accounts marked as complete, track if the entire task is done 2311 task.done = !res.cont 2312 2313 // Stack trie could have generated trie nodes, push them to disk (we need to 2314 // flush after finalizing task.done. It's fine even if we crash and lose this 2315 // write as it will only cause more data to be downloaded during heal. 2316 if task.done { 2317 if _, err := task.genTrie.Commit(); err != nil { 2318 log.Error("Failed to commit stack account", "err", err) 2319 } 2320 } 2321 if task.genBatch.ValueSize() > zonddb.IdealBatchSize || task.done { 2322 if err := task.genBatch.Write(); err != nil { 2323 log.Error("Failed to persist stack account", "err", err) 2324 } 2325 task.genBatch.Reset() 2326 } 2327 log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes) 2328 } 2329 2330 // OnAccounts is a callback method to invoke when a range of accounts are 2331 // received from a remote peer. 2332 func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { 2333 size := common.StorageSize(len(hashes) * common.HashLength) 2334 for _, account := range accounts { 2335 size += common.StorageSize(len(account)) 2336 } 2337 for _, node := range proof { 2338 size += common.StorageSize(len(node)) 2339 } 2340 logger := peer.Log().New("reqid", id) 2341 logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) 2342 2343 // Whether or not the response is valid, we can mark the peer as idle and 2344 // notify the scheduler to assign a new task. If the response is invalid, 2345 // we'll drop the peer in a bit. 2346 defer func() { 2347 s.lock.Lock() 2348 defer s.lock.Unlock() 2349 if _, ok := s.peers[peer.ID()]; ok { 2350 s.accountIdlers[peer.ID()] = struct{}{} 2351 } 2352 select { 2353 case s.update <- struct{}{}: 2354 default: 2355 } 2356 }() 2357 s.lock.Lock() 2358 // Ensure the response is for a valid request 2359 req, ok := s.accountReqs[id] 2360 if !ok { 2361 // Request stale, perhaps the peer timed out but came through in the end 2362 logger.Warn("Unexpected account range packet") 2363 s.lock.Unlock() 2364 return nil 2365 } 2366 delete(s.accountReqs, id) 2367 s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size)) 2368 2369 // Clean up the request timeout timer, we'll see how to proceed further based 2370 // on the actual delivered content 2371 if !req.timeout.Stop() { 2372 // The timeout is already triggered, and this request will be reverted+rescheduled 2373 s.lock.Unlock() 2374 return nil 2375 } 2376 // Response is valid, but check if peer is signalling that it does not have 2377 // the requested data. For account range queries that means the state being 2378 // retrieved was either already pruned remotely, or the peer is not yet 2379 // synced to our head. 2380 if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { 2381 logger.Debug("Peer rejected account range request", "root", s.root) 2382 s.statelessPeers[peer.ID()] = struct{}{} 2383 s.lock.Unlock() 2384 2385 // Signal this request as failed, and ready for rescheduling 2386 s.scheduleRevertAccountRequest(req) 2387 return nil 2388 } 2389 root := s.root 2390 s.lock.Unlock() 2391 2392 // Reconstruct a partial trie from the response and verify it 2393 keys := make([][]byte, len(hashes)) 2394 for i, key := range hashes { 2395 keys[i] = common.CopyBytes(key[:]) 2396 } 2397 nodes := make(light.NodeList, len(proof)) 2398 for i, node := range proof { 2399 nodes[i] = node 2400 } 2401 proofdb := nodes.NodeSet() 2402 2403 var end []byte 2404 if len(keys) > 0 { 2405 end = keys[len(keys)-1] 2406 } 2407 cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) 2408 if err != nil { 2409 logger.Warn("Account range failed proof", "err", err) 2410 // Signal this request as failed, and ready for rescheduling 2411 s.scheduleRevertAccountRequest(req) 2412 return err 2413 } 2414 accs := make([]*types.StateAccount, len(accounts)) 2415 for i, account := range accounts { 2416 acc := new(types.StateAccount) 2417 if err := rlp.DecodeBytes(account, acc); err != nil { 2418 panic(err) // We created these blobs, we must be able to decode them 2419 } 2420 accs[i] = acc 2421 } 2422 response := &accountResponse{ 2423 task: req.task, 2424 hashes: hashes, 2425 accounts: accs, 2426 cont: cont, 2427 } 2428 select { 2429 case req.deliver <- response: 2430 case <-req.cancel: 2431 case <-req.stale: 2432 } 2433 return nil 2434 } 2435 2436 // OnByteCodes is a callback method to invoke when a batch of contract 2437 // bytes codes are received from a remote peer. 2438 func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2439 s.lock.RLock() 2440 syncing := !s.snapped 2441 s.lock.RUnlock() 2442 2443 if syncing { 2444 return s.onByteCodes(peer, id, bytecodes) 2445 } 2446 return s.onHealByteCodes(peer, id, bytecodes) 2447 } 2448 2449 // onByteCodes is a callback method to invoke when a batch of contract 2450 // bytes codes are received from a remote peer in the syncing phase. 2451 func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2452 var size common.StorageSize 2453 for _, code := range bytecodes { 2454 size += common.StorageSize(len(code)) 2455 } 2456 logger := peer.Log().New("reqid", id) 2457 logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2458 2459 // Whether or not the response is valid, we can mark the peer as idle and 2460 // notify the scheduler to assign a new task. If the response is invalid, 2461 // we'll drop the peer in a bit. 2462 defer func() { 2463 s.lock.Lock() 2464 defer s.lock.Unlock() 2465 if _, ok := s.peers[peer.ID()]; ok { 2466 s.bytecodeIdlers[peer.ID()] = struct{}{} 2467 } 2468 select { 2469 case s.update <- struct{}{}: 2470 default: 2471 } 2472 }() 2473 s.lock.Lock() 2474 // Ensure the response is for a valid request 2475 req, ok := s.bytecodeReqs[id] 2476 if !ok { 2477 // Request stale, perhaps the peer timed out but came through in the end 2478 logger.Warn("Unexpected bytecode packet") 2479 s.lock.Unlock() 2480 return nil 2481 } 2482 delete(s.bytecodeReqs, id) 2483 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2484 2485 // Clean up the request timeout timer, we'll see how to proceed further based 2486 // on the actual delivered content 2487 if !req.timeout.Stop() { 2488 // The timeout is already triggered, and this request will be reverted+rescheduled 2489 s.lock.Unlock() 2490 return nil 2491 } 2492 2493 // Response is valid, but check if peer is signalling that it does not have 2494 // the requested data. For bytecode range queries that means the peer is not 2495 // yet synced. 2496 if len(bytecodes) == 0 { 2497 logger.Debug("Peer rejected bytecode request") 2498 s.statelessPeers[peer.ID()] = struct{}{} 2499 s.lock.Unlock() 2500 2501 // Signal this request as failed, and ready for rescheduling 2502 s.scheduleRevertBytecodeRequest(req) 2503 return nil 2504 } 2505 s.lock.Unlock() 2506 2507 // Cross reference the requested bytecodes with the response to find gaps 2508 // that the serving node is missing 2509 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2510 hash := make([]byte, 32) 2511 2512 codes := make([][]byte, len(req.hashes)) 2513 for i, j := 0, 0; i < len(bytecodes); i++ { 2514 // Find the next hash that we've been served, leaving misses with nils 2515 hasher.Reset() 2516 hasher.Write(bytecodes[i]) 2517 hasher.Read(hash) 2518 2519 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2520 j++ 2521 } 2522 if j < len(req.hashes) { 2523 codes[j] = bytecodes[i] 2524 j++ 2525 continue 2526 } 2527 // We've either ran out of hashes, or got unrequested data 2528 logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) 2529 // Signal this request as failed, and ready for rescheduling 2530 s.scheduleRevertBytecodeRequest(req) 2531 return errors.New("unexpected bytecode") 2532 } 2533 // Response validated, send it to the scheduler for filling 2534 response := &bytecodeResponse{ 2535 task: req.task, 2536 hashes: req.hashes, 2537 codes: codes, 2538 } 2539 select { 2540 case req.deliver <- response: 2541 case <-req.cancel: 2542 case <-req.stale: 2543 } 2544 return nil 2545 } 2546 2547 // OnStorage is a callback method to invoke when ranges of storage slots 2548 // are received from a remote peer. 2549 func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { 2550 // Gather some trace stats to aid in debugging issues 2551 var ( 2552 hashCount int 2553 slotCount int 2554 size common.StorageSize 2555 ) 2556 for _, hashset := range hashes { 2557 size += common.StorageSize(common.HashLength * len(hashset)) 2558 hashCount += len(hashset) 2559 } 2560 for _, slotset := range slots { 2561 for _, slot := range slotset { 2562 size += common.StorageSize(len(slot)) 2563 } 2564 slotCount += len(slotset) 2565 } 2566 for _, node := range proof { 2567 size += common.StorageSize(len(node)) 2568 } 2569 logger := peer.Log().New("reqid", id) 2570 logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) 2571 2572 // Whether or not the response is valid, we can mark the peer as idle and 2573 // notify the scheduler to assign a new task. If the response is invalid, 2574 // we'll drop the peer in a bit. 2575 defer func() { 2576 s.lock.Lock() 2577 defer s.lock.Unlock() 2578 if _, ok := s.peers[peer.ID()]; ok { 2579 s.storageIdlers[peer.ID()] = struct{}{} 2580 } 2581 select { 2582 case s.update <- struct{}{}: 2583 default: 2584 } 2585 }() 2586 s.lock.Lock() 2587 // Ensure the response is for a valid request 2588 req, ok := s.storageReqs[id] 2589 if !ok { 2590 // Request stale, perhaps the peer timed out but came through in the end 2591 logger.Warn("Unexpected storage ranges packet") 2592 s.lock.Unlock() 2593 return nil 2594 } 2595 delete(s.storageReqs, id) 2596 s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size)) 2597 2598 // Clean up the request timeout timer, we'll see how to proceed further based 2599 // on the actual delivered content 2600 if !req.timeout.Stop() { 2601 // The timeout is already triggered, and this request will be reverted+rescheduled 2602 s.lock.Unlock() 2603 return nil 2604 } 2605 2606 // Reject the response if the hash sets and slot sets don't match, or if the 2607 // peer sent more data than requested. 2608 if len(hashes) != len(slots) { 2609 s.lock.Unlock() 2610 s.scheduleRevertStorageRequest(req) // reschedule request 2611 logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) 2612 return errors.New("hash and slot set size mismatch") 2613 } 2614 if len(hashes) > len(req.accounts) { 2615 s.lock.Unlock() 2616 s.scheduleRevertStorageRequest(req) // reschedule request 2617 logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) 2618 return errors.New("hash set larger than requested") 2619 } 2620 // Response is valid, but check if peer is signalling that it does not have 2621 // the requested data. For storage range queries that means the state being 2622 // retrieved was either already pruned remotely, or the peer is not yet 2623 // synced to our head. 2624 if len(hashes) == 0 { 2625 logger.Debug("Peer rejected storage request") 2626 s.statelessPeers[peer.ID()] = struct{}{} 2627 s.lock.Unlock() 2628 s.scheduleRevertStorageRequest(req) // reschedule request 2629 return nil 2630 } 2631 s.lock.Unlock() 2632 2633 // Reconstruct the partial tries from the response and verify them 2634 var cont bool 2635 2636 for i := 0; i < len(hashes); i++ { 2637 // Convert the keys and proofs into an internal format 2638 keys := make([][]byte, len(hashes[i])) 2639 for j, key := range hashes[i] { 2640 keys[j] = common.CopyBytes(key[:]) 2641 } 2642 nodes := make(light.NodeList, 0, len(proof)) 2643 if i == len(hashes)-1 { 2644 for _, node := range proof { 2645 nodes = append(nodes, node) 2646 } 2647 } 2648 var err error 2649 if len(nodes) == 0 { 2650 // No proof has been attached, the response must cover the entire key 2651 // space and hash to the origin root. 2652 _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) 2653 if err != nil { 2654 s.scheduleRevertStorageRequest(req) // reschedule request 2655 logger.Warn("Storage slots failed proof", "err", err) 2656 return err 2657 } 2658 } else { 2659 // A proof was attached, the response is only partial, check that the 2660 // returned data is indeed part of the storage trie 2661 proofdb := nodes.NodeSet() 2662 2663 var end []byte 2664 if len(keys) > 0 { 2665 end = keys[len(keys)-1] 2666 } 2667 cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) 2668 if err != nil { 2669 s.scheduleRevertStorageRequest(req) // reschedule request 2670 logger.Warn("Storage range failed proof", "err", err) 2671 return err 2672 } 2673 } 2674 } 2675 // Partial tries reconstructed, send them to the scheduler for storage filling 2676 response := &storageResponse{ 2677 mainTask: req.mainTask, 2678 subTask: req.subTask, 2679 accounts: req.accounts, 2680 roots: req.roots, 2681 hashes: hashes, 2682 slots: slots, 2683 cont: cont, 2684 } 2685 select { 2686 case req.deliver <- response: 2687 case <-req.cancel: 2688 case <-req.stale: 2689 } 2690 return nil 2691 } 2692 2693 // OnTrieNodes is a callback method to invoke when a batch of trie nodes 2694 // are received from a remote peer. 2695 func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { 2696 var size common.StorageSize 2697 for _, node := range trienodes { 2698 size += common.StorageSize(len(node)) 2699 } 2700 logger := peer.Log().New("reqid", id) 2701 logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) 2702 2703 // Whether or not the response is valid, we can mark the peer as idle and 2704 // notify the scheduler to assign a new task. If the response is invalid, 2705 // we'll drop the peer in a bit. 2706 defer func() { 2707 s.lock.Lock() 2708 defer s.lock.Unlock() 2709 if _, ok := s.peers[peer.ID()]; ok { 2710 s.trienodeHealIdlers[peer.ID()] = struct{}{} 2711 } 2712 select { 2713 case s.update <- struct{}{}: 2714 default: 2715 } 2716 }() 2717 s.lock.Lock() 2718 // Ensure the response is for a valid request 2719 req, ok := s.trienodeHealReqs[id] 2720 if !ok { 2721 // Request stale, perhaps the peer timed out but came through in the end 2722 logger.Warn("Unexpected trienode heal packet") 2723 s.lock.Unlock() 2724 return nil 2725 } 2726 delete(s.trienodeHealReqs, id) 2727 s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes)) 2728 2729 // Clean up the request timeout timer, we'll see how to proceed further based 2730 // on the actual delivered content 2731 if !req.timeout.Stop() { 2732 // The timeout is already triggered, and this request will be reverted+rescheduled 2733 s.lock.Unlock() 2734 return nil 2735 } 2736 2737 // Response is valid, but check if peer is signalling that it does not have 2738 // the requested data. For bytecode range queries that means the peer is not 2739 // yet synced. 2740 if len(trienodes) == 0 { 2741 logger.Debug("Peer rejected trienode heal request") 2742 s.statelessPeers[peer.ID()] = struct{}{} 2743 s.lock.Unlock() 2744 2745 // Signal this request as failed, and ready for rescheduling 2746 s.scheduleRevertTrienodeHealRequest(req) 2747 return nil 2748 } 2749 s.lock.Unlock() 2750 2751 // Cross reference the requested trienodes with the response to find gaps 2752 // that the serving node is missing 2753 var ( 2754 hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) 2755 hash = make([]byte, 32) 2756 nodes = make([][]byte, len(req.hashes)) 2757 fills uint64 2758 ) 2759 for i, j := 0, 0; i < len(trienodes); i++ { 2760 // Find the next hash that we've been served, leaving misses with nils 2761 hasher.Reset() 2762 hasher.Write(trienodes[i]) 2763 hasher.Read(hash) 2764 2765 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2766 j++ 2767 } 2768 if j < len(req.hashes) { 2769 nodes[j] = trienodes[i] 2770 fills++ 2771 j++ 2772 continue 2773 } 2774 // We've either ran out of hashes, or got unrequested data 2775 logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) 2776 2777 // Signal this request as failed, and ready for rescheduling 2778 s.scheduleRevertTrienodeHealRequest(req) 2779 return errors.New("unexpected healing trienode") 2780 } 2781 // Response validated, send it to the scheduler for filling 2782 s.trienodeHealPend.Add(fills) 2783 defer func() { 2784 s.trienodeHealPend.Add(^(fills - 1)) 2785 }() 2786 response := &trienodeHealResponse{ 2787 paths: req.paths, 2788 task: req.task, 2789 hashes: req.hashes, 2790 nodes: nodes, 2791 } 2792 select { 2793 case req.deliver <- response: 2794 case <-req.cancel: 2795 case <-req.stale: 2796 } 2797 return nil 2798 } 2799 2800 // onHealByteCodes is a callback method to invoke when a batch of contract 2801 // bytes codes are received from a remote peer in the healing phase. 2802 func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2803 var size common.StorageSize 2804 for _, code := range bytecodes { 2805 size += common.StorageSize(len(code)) 2806 } 2807 logger := peer.Log().New("reqid", id) 2808 logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2809 2810 // Whether or not the response is valid, we can mark the peer as idle and 2811 // notify the scheduler to assign a new task. If the response is invalid, 2812 // we'll drop the peer in a bit. 2813 defer func() { 2814 s.lock.Lock() 2815 defer s.lock.Unlock() 2816 if _, ok := s.peers[peer.ID()]; ok { 2817 s.bytecodeHealIdlers[peer.ID()] = struct{}{} 2818 } 2819 select { 2820 case s.update <- struct{}{}: 2821 default: 2822 } 2823 }() 2824 s.lock.Lock() 2825 // Ensure the response is for a valid request 2826 req, ok := s.bytecodeHealReqs[id] 2827 if !ok { 2828 // Request stale, perhaps the peer timed out but came through in the end 2829 logger.Warn("Unexpected bytecode heal packet") 2830 s.lock.Unlock() 2831 return nil 2832 } 2833 delete(s.bytecodeHealReqs, id) 2834 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2835 2836 // Clean up the request timeout timer, we'll see how to proceed further based 2837 // on the actual delivered content 2838 if !req.timeout.Stop() { 2839 // The timeout is already triggered, and this request will be reverted+rescheduled 2840 s.lock.Unlock() 2841 return nil 2842 } 2843 2844 // Response is valid, but check if peer is signalling that it does not have 2845 // the requested data. For bytecode range queries that means the peer is not 2846 // yet synced. 2847 if len(bytecodes) == 0 { 2848 logger.Debug("Peer rejected bytecode heal request") 2849 s.statelessPeers[peer.ID()] = struct{}{} 2850 s.lock.Unlock() 2851 2852 // Signal this request as failed, and ready for rescheduling 2853 s.scheduleRevertBytecodeHealRequest(req) 2854 return nil 2855 } 2856 s.lock.Unlock() 2857 2858 // Cross reference the requested bytecodes with the response to find gaps 2859 // that the serving node is missing 2860 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2861 hash := make([]byte, 32) 2862 2863 codes := make([][]byte, len(req.hashes)) 2864 for i, j := 0, 0; i < len(bytecodes); i++ { 2865 // Find the next hash that we've been served, leaving misses with nils 2866 hasher.Reset() 2867 hasher.Write(bytecodes[i]) 2868 hasher.Read(hash) 2869 2870 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2871 j++ 2872 } 2873 if j < len(req.hashes) { 2874 codes[j] = bytecodes[i] 2875 j++ 2876 continue 2877 } 2878 // We've either ran out of hashes, or got unrequested data 2879 logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) 2880 // Signal this request as failed, and ready for rescheduling 2881 s.scheduleRevertBytecodeHealRequest(req) 2882 return errors.New("unexpected healing bytecode") 2883 } 2884 // Response validated, send it to the scheduler for filling 2885 response := &bytecodeHealResponse{ 2886 task: req.task, 2887 hashes: req.hashes, 2888 codes: codes, 2889 } 2890 select { 2891 case req.deliver <- response: 2892 case <-req.cancel: 2893 case <-req.stale: 2894 } 2895 return nil 2896 } 2897 2898 // onHealState is a callback method to invoke when a flat state(account 2899 // or storage slot) is downloaded during the healing stage. The flat states 2900 // can be persisted blindly and can be fixed later in the generation stage. 2901 // Note it's not concurrent safe, please handle the concurrent issue outside. 2902 func (s *Syncer) onHealState(paths [][]byte, value []byte) error { 2903 if len(paths) == 1 { 2904 var account types.StateAccount 2905 if err := rlp.DecodeBytes(value, &account); err != nil { 2906 return nil // Returning the error here would drop the remote peer 2907 } 2908 blob := types.SlimAccountRLP(account) 2909 rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) 2910 s.accountHealed += 1 2911 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) 2912 } 2913 if len(paths) == 2 { 2914 rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) 2915 s.storageHealed += 1 2916 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) 2917 } 2918 if s.stateWriter.ValueSize() > zonddb.IdealBatchSize { 2919 s.stateWriter.Write() // It's fine to ignore the error here 2920 s.stateWriter.Reset() 2921 } 2922 return nil 2923 } 2924 2925 // hashSpace is the total size of the 256 bit hash space for accounts. 2926 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) 2927 2928 // report calculates various status reports and provides it to the user. 2929 func (s *Syncer) report(force bool) { 2930 if len(s.tasks) > 0 { 2931 s.reportSyncProgress(force) 2932 return 2933 } 2934 s.reportHealProgress(force) 2935 } 2936 2937 // reportSyncProgress calculates various status reports and provides it to the user. 2938 func (s *Syncer) reportSyncProgress(force bool) { 2939 // Don't report all the events, just occasionally 2940 if !force && time.Since(s.logTime) < 8*time.Second { 2941 return 2942 } 2943 // Don't report anything until we have a meaningful progress 2944 synced := s.accountBytes + s.bytecodeBytes + s.storageBytes 2945 if synced == 0 { 2946 return 2947 } 2948 accountGaps := new(big.Int) 2949 for _, task := range s.tasks { 2950 accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big())) 2951 } 2952 accountFills := new(big.Int).Sub(hashSpace, accountGaps) 2953 if accountFills.BitLen() == 0 { 2954 return 2955 } 2956 s.logTime = time.Now() 2957 estBytes := float64(new(big.Int).Div( 2958 new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), 2959 accountFills, 2960 ).Uint64()) 2961 // Don't report anything until we have a meaningful progress 2962 if estBytes < 1.0 { 2963 return 2964 } 2965 elapsed := time.Since(s.startTime) 2966 estTime := elapsed / time.Duration(synced) * time.Duration(estBytes) 2967 2968 // Create a mega progress report 2969 var ( 2970 progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes) 2971 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountSynced), s.accountBytes.TerminalString()) 2972 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageSynced), s.storageBytes.TerminalString()) 2973 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeSynced), s.bytecodeBytes.TerminalString()) 2974 ) 2975 log.Info("Syncing: state download in progress", "synced", progress, "state", synced, 2976 "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) 2977 } 2978 2979 // reportHealProgress calculates various status reports and provides it to the user. 2980 func (s *Syncer) reportHealProgress(force bool) { 2981 // Don't report all the events, just occasionally 2982 if !force && time.Since(s.logTime) < 8*time.Second { 2983 return 2984 } 2985 s.logTime = time.Now() 2986 2987 // Create a mega progress report 2988 var ( 2989 trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString()) 2990 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString()) 2991 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString()) 2992 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString()) 2993 ) 2994 log.Info("Syncing: state healing in progress", "accounts", accounts, "slots", storage, 2995 "codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending()) 2996 } 2997 2998 // estimateRemainingSlots tries to determine roughly how many slots are left in 2999 // a contract storage, based on the number of keys and the last hash. This method 3000 // assumes that the hashes are lexicographically ordered and evenly distributed. 3001 func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { 3002 if last == (common.Hash{}) { 3003 return 0, errors.New("last hash empty") 3004 } 3005 space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes))) 3006 space.Div(space, last.Big()) 3007 if !space.IsUint64() { 3008 // Gigantic address space probably due to too few or malicious slots 3009 return 0, errors.New("too few slots for estimation") 3010 } 3011 return space.Uint64() - uint64(hashes), nil 3012 } 3013 3014 // capacitySort implements the Sort interface, allowing sorting by peer message 3015 // throughput. Note, callers should use sort.Reverse to get the desired effect 3016 // of highest capacity being at the front. 3017 type capacitySort struct { 3018 ids []string 3019 caps []int 3020 } 3021 3022 func (s *capacitySort) Len() int { 3023 return len(s.ids) 3024 } 3025 3026 func (s *capacitySort) Less(i, j int) bool { 3027 return s.caps[i] < s.caps[j] 3028 } 3029 3030 func (s *capacitySort) Swap(i, j int) { 3031 s.ids[i], s.ids[j] = s.ids[j], s.ids[i] 3032 s.caps[i], s.caps[j] = s.caps[j], s.caps[i] 3033 } 3034 3035 // healRequestSort implements the Sort interface, allowing sorting trienode 3036 // heal requests, which is a prerequisite for merging storage-requests. 3037 type healRequestSort struct { 3038 paths []string 3039 hashes []common.Hash 3040 syncPaths []trie.SyncPath 3041 } 3042 3043 func (t *healRequestSort) Len() int { 3044 return len(t.hashes) 3045 } 3046 3047 func (t *healRequestSort) Less(i, j int) bool { 3048 a := t.syncPaths[i] 3049 b := t.syncPaths[j] 3050 switch bytes.Compare(a[0], b[0]) { 3051 case -1: 3052 return true 3053 case 1: 3054 return false 3055 } 3056 // identical first part 3057 if len(a) < len(b) { 3058 return true 3059 } 3060 if len(b) < len(a) { 3061 return false 3062 } 3063 if len(a) == 2 { 3064 return bytes.Compare(a[1], b[1]) < 0 3065 } 3066 return false 3067 } 3068 3069 func (t *healRequestSort) Swap(i, j int) { 3070 t.paths[i], t.paths[j] = t.paths[j], t.paths[i] 3071 t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] 3072 t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i] 3073 } 3074 3075 // Merge merges the pathsets, so that several storage requests concerning the 3076 // same account are merged into one, to reduce bandwidth. 3077 // OBS: This operation is moot if t has not first been sorted. 3078 func (t *healRequestSort) Merge() []TrieNodePathSet { 3079 var result []TrieNodePathSet 3080 for _, path := range t.syncPaths { 3081 pathset := TrieNodePathSet(path) 3082 if len(path) == 1 { 3083 // It's an account reference. 3084 result = append(result, pathset) 3085 } else { 3086 // It's a storage reference. 3087 end := len(result) - 1 3088 if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) { 3089 // The account doesn't match last, create a new entry. 3090 result = append(result, pathset) 3091 } else { 3092 // It's the same account as the previous one, add to the storage 3093 // paths of that request. 3094 result[end] = append(result[end], pathset[1]) 3095 } 3096 } 3097 } 3098 return result 3099 } 3100 3101 // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates 3102 // the TrieNodePaths and merges paths which belongs to the same account path. 3103 func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { 3104 var syncPaths []trie.SyncPath 3105 for _, path := range paths { 3106 syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) 3107 } 3108 n := &healRequestSort{paths, hashes, syncPaths} 3109 sort.Sort(n) 3110 pathsets := n.Merge() 3111 return n.paths, n.hashes, n.syncPaths, pathsets 3112 }