github.com/juliankolbe/go-ethereum@v1.9.992/eth/protocols/snap/sync.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "math/big" 25 "math/rand" 26 "sync" 27 "time" 28 29 "github.com/juliankolbe/go-ethereum/common" 30 "github.com/juliankolbe/go-ethereum/core/rawdb" 31 "github.com/juliankolbe/go-ethereum/core/state" 32 "github.com/juliankolbe/go-ethereum/crypto" 33 "github.com/juliankolbe/go-ethereum/ethdb" 34 "github.com/juliankolbe/go-ethereum/event" 35 "github.com/juliankolbe/go-ethereum/light" 36 "github.com/juliankolbe/go-ethereum/log" 37 "github.com/juliankolbe/go-ethereum/rlp" 38 "github.com/juliankolbe/go-ethereum/trie" 39 "golang.org/x/crypto/sha3" 40 ) 41 42 var ( 43 // emptyRoot is the known root hash of an empty trie. 44 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 45 46 // emptyCode is the known hash of the empty EVM bytecode. 47 emptyCode = crypto.Keccak256Hash(nil) 48 ) 49 50 const ( 51 // maxRequestSize is the maximum number of bytes to request from a remote peer. 52 maxRequestSize = 512 * 1024 53 54 // maxStorageSetRequestCountis th maximum number of contracts to request the 55 // storage of in a single query. If this number is too low, we're not filling 56 // responses fully and waste round trip times. If it's too high, we're capping 57 // responses and waste bandwidth. 58 maxStorageSetRequestCount = maxRequestSize / 1024 59 60 // maxCodeRequestCount is the maximum number of bytecode blobs to request in a 61 // single query. If this number is too low, we're not filling responses fully 62 // and waste round trip times. If it's too high, we're capping responses and 63 // waste bandwidth. 64 // 65 // Depoyed bytecodes are currently capped at 24KB, so the minimum request 66 // size should be maxRequestSize / 24K. Assuming that most contracts do not 67 // come close to that, requesting 4x should be a good approximation. 68 maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 69 70 // maxTrieRequestCount is the maximum number of trie node blobs to request in 71 // a single query. If this number is too low, we're not filling responses fully 72 // and waste round trip times. If it's too high, we're capping responses and 73 // waste bandwidth. 74 maxTrieRequestCount = 512 75 76 // accountConcurrency is the number of chunks to split the account trie into 77 // to allow concurrent retrievals. 78 accountConcurrency = 16 79 80 // storageConcurrency is the number of chunks to split the a large contract 81 // storage trie into to allow concurrent retrievals. 82 storageConcurrency = 16 83 ) 84 85 var ( 86 // requestTimeout is the maximum time a peer is allowed to spend on serving 87 // a single network request. 88 requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync? 89 ) 90 91 // ErrCancelled is returned from snap syncing if the operation was prematurely 92 // terminated. 93 var ErrCancelled = errors.New("sync cancelled") 94 95 // accountRequest tracks a pending account range request to ensure responses are 96 // to actual requests and to validate any security constraints. 97 // 98 // Concurrency note: account requests and responses are handled concurrently from 99 // the main runloop to allow Merkle proof verifications on the peer's thread and 100 // to drop on invalid response. The request struct must contain all the data to 101 // construct the response without accessing runloop internals (i.e. task). That 102 // is only included to allow the runloop to match a response to the task being 103 // synced without having yet another set of maps. 104 type accountRequest struct { 105 peer string // Peer to which this request is assigned 106 id uint64 // Request ID of this request 107 108 cancel chan struct{} // Channel to track sync cancellation 109 timeout *time.Timer // Timer to track delivery timeout 110 stale chan struct{} // Channel to signal the request was dropped 111 112 origin common.Hash // First account requested to allow continuation checks 113 limit common.Hash // Last account requested to allow non-overlapping chunking 114 115 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 116 } 117 118 // accountResponse is an already Merkle-verified remote response to an account 119 // range request. It contains the subtrie for the requested account range and 120 // the database that's going to be filled with the internal nodes on commit. 121 type accountResponse struct { 122 task *accountTask // Task which this request is filling 123 124 hashes []common.Hash // Account hashes in the returned range 125 accounts []*state.Account // Expanded accounts in the returned range 126 127 nodes ethdb.KeyValueStore // Database containing the reconstructed trie nodes 128 trie *trie.Trie // Reconstructed trie to reject incomplete account paths 129 130 bounds map[common.Hash]struct{} // Boundary nodes to avoid persisting incomplete accounts 131 overflow *light.NodeSet // Overflow nodes to avoid persisting across chunk boundaries 132 133 cont bool // Whether the account range has a continuation 134 } 135 136 // bytecodeRequest tracks a pending bytecode request to ensure responses are to 137 // actual requests and to validate any security constraints. 138 // 139 // Concurrency note: bytecode requests and responses are handled concurrently from 140 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 141 // to drop on invalid response. The request struct must contain all the data to 142 // construct the response without accessing runloop internals (i.e. task). That 143 // is only included to allow the runloop to match a response to the task being 144 // synced without having yet another set of maps. 145 type bytecodeRequest struct { 146 peer string // Peer to which this request is assigned 147 id uint64 // Request ID of this request 148 149 cancel chan struct{} // Channel to track sync cancellation 150 timeout *time.Timer // Timer to track delivery timeout 151 stale chan struct{} // Channel to signal the request was dropped 152 153 hashes []common.Hash // Bytecode hashes to validate responses 154 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 155 } 156 157 // bytecodeResponse is an already verified remote response to a bytecode request. 158 type bytecodeResponse struct { 159 task *accountTask // Task which this request is filling 160 161 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 162 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 163 } 164 165 // storageRequest tracks a pending storage ranges request to ensure responses are 166 // to actual requests and to validate any security constraints. 167 // 168 // Concurrency note: storage requests and responses are handled concurrently from 169 // the main runloop to allow Merkel proof verifications on the peer's thread and 170 // to drop on invalid response. The request struct must contain all the data to 171 // construct the response without accessing runloop internals (i.e. tasks). That 172 // is only included to allow the runloop to match a response to the task being 173 // synced without having yet another set of maps. 174 type storageRequest struct { 175 peer string // Peer to which this request is assigned 176 id uint64 // Request ID of this request 177 178 cancel chan struct{} // Channel to track sync cancellation 179 timeout *time.Timer // Timer to track delivery timeout 180 stale chan struct{} // Channel to signal the request was dropped 181 182 accounts []common.Hash // Account hashes to validate responses 183 roots []common.Hash // Storage roots to validate responses 184 185 origin common.Hash // First storage slot requested to allow continuation checks 186 limit common.Hash // Last storage slot requested to allow non-overlapping chunking 187 188 mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!) 189 subTask *storageTask // Task which this response is filling (only access fields through the runloop!!) 190 } 191 192 // storageResponse is an already Merkle-verified remote response to a storage 193 // range request. It contains the subtries for the requested storage ranges and 194 // the databases that's going to be filled with the internal nodes on commit. 195 type storageResponse struct { 196 mainTask *accountTask // Task which this response belongs to 197 subTask *storageTask // Task which this response is filling 198 199 accounts []common.Hash // Account hashes requested, may be only partially filled 200 roots []common.Hash // Storage roots requested, may be only partially filled 201 202 hashes [][]common.Hash // Storage slot hashes in the returned range 203 slots [][][]byte // Storage slot values in the returned range 204 nodes []ethdb.KeyValueStore // Database containing the reconstructed trie nodes 205 tries []*trie.Trie // Reconstructed tries to reject overflown slots 206 207 // Fields relevant for the last account only 208 bounds map[common.Hash]struct{} // Boundary nodes to avoid persisting (incomplete) 209 overflow *light.NodeSet // Overflow nodes to avoid persisting across chunk boundaries 210 cont bool // Whether the last storage range has a continuation 211 } 212 213 // trienodeHealRequest tracks a pending state trie request to ensure responses 214 // are to actual requests and to validate any security constraints. 215 // 216 // Concurrency note: trie node requests and responses are handled concurrently from 217 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 218 // to drop on invalid response. The request struct must contain all the data to 219 // construct the response without accessing runloop internals (i.e. task). That 220 // is only included to allow the runloop to match a response to the task being 221 // synced without having yet another set of maps. 222 type trienodeHealRequest struct { 223 peer string // Peer to which this request is assigned 224 id uint64 // Request ID of this request 225 226 cancel chan struct{} // Channel to track sync cancellation 227 timeout *time.Timer // Timer to track delivery timeout 228 stale chan struct{} // Channel to signal the request was dropped 229 230 hashes []common.Hash // Trie node hashes to validate responses 231 paths []trie.SyncPath // Trie node paths requested for rescheduling 232 233 task *healTask // Task which this request is filling (only access fields through the runloop!!) 234 } 235 236 // trienodeHealResponse is an already verified remote response to a trie node request. 237 type trienodeHealResponse struct { 238 task *healTask // Task which this request is filling 239 240 hashes []common.Hash // Hashes of the trie nodes to avoid double hashing 241 paths []trie.SyncPath // Trie node paths requested for rescheduling missing ones 242 nodes [][]byte // Actual trie nodes to store into the database (nil = missing) 243 } 244 245 // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to 246 // actual requests and to validate any security constraints. 247 // 248 // Concurrency note: bytecode requests and responses are handled concurrently from 249 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 250 // to drop on invalid response. The request struct must contain all the data to 251 // construct the response without accessing runloop internals (i.e. task). That 252 // is only included to allow the runloop to match a response to the task being 253 // synced without having yet another set of maps. 254 type bytecodeHealRequest struct { 255 peer string // Peer to which this request is assigned 256 id uint64 // Request ID of this request 257 258 cancel chan struct{} // Channel to track sync cancellation 259 timeout *time.Timer // Timer to track delivery timeout 260 stale chan struct{} // Channel to signal the request was dropped 261 262 hashes []common.Hash // Bytecode hashes to validate responses 263 task *healTask // Task which this request is filling (only access fields through the runloop!!) 264 } 265 266 // bytecodeHealResponse is an already verified remote response to a bytecode request. 267 type bytecodeHealResponse struct { 268 task *healTask // Task which this request is filling 269 270 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 271 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 272 } 273 274 // accountTask represents the sync task for a chunk of the account snapshot. 275 type accountTask struct { 276 // These fields get serialized to leveldb on shutdown 277 Next common.Hash // Next account to sync in this interval 278 Last common.Hash // Last account to sync in this interval 279 SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts 280 281 // These fields are internals used during runtime 282 req *accountRequest // Pending request to fill this task 283 res *accountResponse // Validate response filling this task 284 pend int // Number of pending subtasks for this round 285 286 needCode []bool // Flags whether the filling accounts need code retrieval 287 needState []bool // Flags whether the filling accounts need storage retrieval 288 needHeal []bool // Flags whether the filling accounts's state was chunked and need healing 289 290 codeTasks map[common.Hash]struct{} // Code hashes that need retrieval 291 stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval 292 293 done bool // Flag whether the task can be removed 294 } 295 296 // storageTask represents the sync task for a chunk of the storage snapshot. 297 type storageTask struct { 298 Next common.Hash // Next account to sync in this interval 299 Last common.Hash // Last account to sync in this interval 300 301 // These fields are internals used during runtime 302 root common.Hash // Storage root hash for this instance 303 req *storageRequest // Pending request to fill this task 304 done bool // Flag whether the task can be removed 305 } 306 307 // healTask represents the sync task for healing the snap-synced chunk boundaries. 308 type healTask struct { 309 scheduler *trie.Sync // State trie sync scheduler defining the tasks 310 311 trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval 312 codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval 313 } 314 315 // syncProgress is a database entry to allow suspending and resuming a snapshot state 316 // sync. Opposed to full and fast sync, there is no way to restart a suspended 317 // snap sync without prior knowledge of the suspension point. 318 type syncProgress struct { 319 Tasks []*accountTask // The suspended account tasks (contract tasks within) 320 321 // Status report during syncing phase 322 AccountSynced uint64 // Number of accounts downloaded 323 AccountBytes common.StorageSize // Number of account trie bytes persisted to disk 324 BytecodeSynced uint64 // Number of bytecodes downloaded 325 BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 326 StorageSynced uint64 // Number of storage slots downloaded 327 StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk 328 329 // Status report during healing phase 330 TrienodeHealSynced uint64 // Number of state trie nodes downloaded 331 TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 332 TrienodeHealDups uint64 // Number of state trie nodes already processed 333 TrienodeHealNops uint64 // Number of state trie nodes not requested 334 BytecodeHealSynced uint64 // Number of bytecodes downloaded 335 BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 336 BytecodeHealDups uint64 // Number of bytecodes already processed 337 BytecodeHealNops uint64 // Number of bytecodes not requested 338 } 339 340 // SyncPeer abstracts out the methods required for a peer to be synced against 341 // with the goal of allowing the construction of mock peers without the full 342 // blown networking. 343 type SyncPeer interface { 344 // ID retrieves the peer's unique identifier. 345 ID() string 346 347 // RequestAccountRange fetches a batch of accounts rooted in a specific account 348 // trie, starting with the origin. 349 RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error 350 351 // RequestStorageRange fetches a batch of storage slots belonging to one or 352 // more accounts. If slots from only one accout is requested, an origin marker 353 // may also be used to retrieve from there. 354 RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error 355 356 // RequestByteCodes fetches a batch of bytecodes by hash. 357 RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error 358 359 // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in 360 // a specificstate trie. 361 RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error 362 363 // Log retrieves the peer's own contextual logger. 364 Log() log.Logger 365 } 366 367 // Syncer is an Ethereum account and storage trie syncer based on snapshots and 368 // the snap protocol. It's purpose is to download all the accounts and storage 369 // slots from remote peers and reassemble chunks of the state trie, on top of 370 // which a state sync can be run to fix any gaps / overlaps. 371 // 372 // Every network request has a variety of failure events: 373 // - The peer disconnects after task assignment, failing to send the request 374 // - The peer disconnects after sending the request, before delivering on it 375 // - The peer remains connected, but does not deliver a response in time 376 // - The peer delivers a stale response after a previous timeout 377 // - The peer delivers a refusal to serve the requested state 378 type Syncer struct { 379 db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) 380 bloom *trie.SyncBloom // Bloom filter to deduplicate nodes for state fixup 381 382 root common.Hash // Current state trie root being synced 383 tasks []*accountTask // Current account task set being synced 384 snapped bool // Flag to signal that snap phase is done 385 healer *healTask // Current state healing task being executed 386 update chan struct{} // Notification channel for possible sync progression 387 388 peers map[string]SyncPeer // Currently active peers to download from 389 peerJoin *event.Feed // Event feed to react to peers joining 390 peerDrop *event.Feed // Event feed to react to peers dropping 391 392 // Request tracking during syncing phase 393 statelessPeers map[string]struct{} // Peers that failed to deliver state data 394 accountIdlers map[string]struct{} // Peers that aren't serving account requests 395 bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests 396 storageIdlers map[string]struct{} // Peers that aren't serving storage requests 397 398 accountReqs map[uint64]*accountRequest // Account requests currently running 399 bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running 400 storageReqs map[uint64]*storageRequest // Storage requests currently running 401 402 accountReqFails chan *accountRequest // Failed account range requests to revert 403 bytecodeReqFails chan *bytecodeRequest // Failed bytecode requests to revert 404 storageReqFails chan *storageRequest // Failed storage requests to revert 405 406 accountResps chan *accountResponse // Account sub-tries to integrate into the database 407 bytecodeResps chan *bytecodeResponse // Bytecodes to integrate into the database 408 storageResps chan *storageResponse // Storage sub-tries to integrate into the database 409 410 accountSynced uint64 // Number of accounts downloaded 411 accountBytes common.StorageSize // Number of account trie bytes persisted to disk 412 bytecodeSynced uint64 // Number of bytecodes downloaded 413 bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 414 storageSynced uint64 // Number of storage slots downloaded 415 storageBytes common.StorageSize // Number of storage trie bytes persisted to disk 416 417 // Request tracking during healing phase 418 trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests 419 bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests 420 421 trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running 422 bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running 423 424 trienodeHealReqFails chan *trienodeHealRequest // Failed trienode requests to revert 425 bytecodeHealReqFails chan *bytecodeHealRequest // Failed bytecode requests to revert 426 427 trienodeHealResps chan *trienodeHealResponse // Trie nodes to integrate into the database 428 bytecodeHealResps chan *bytecodeHealResponse // Bytecodes to integrate into the database 429 430 trienodeHealSynced uint64 // Number of state trie nodes downloaded 431 trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 432 trienodeHealDups uint64 // Number of state trie nodes already processed 433 trienodeHealNops uint64 // Number of state trie nodes not requested 434 bytecodeHealSynced uint64 // Number of bytecodes downloaded 435 bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 436 bytecodeHealDups uint64 // Number of bytecodes already processed 437 bytecodeHealNops uint64 // Number of bytecodes not requested 438 439 startTime time.Time // Time instance when snapshot sync started 440 startAcc common.Hash // Account hash where sync started from 441 logTime time.Time // Time instance when status was last reported 442 443 pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown 444 lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) 445 } 446 447 // NewSyncer creates a new snapshot syncer to download the Ethereum state over the 448 // snap protocol. 449 func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { 450 return &Syncer{ 451 db: db, 452 bloom: bloom, 453 454 peers: make(map[string]SyncPeer), 455 peerJoin: new(event.Feed), 456 peerDrop: new(event.Feed), 457 update: make(chan struct{}, 1), 458 459 accountIdlers: make(map[string]struct{}), 460 storageIdlers: make(map[string]struct{}), 461 bytecodeIdlers: make(map[string]struct{}), 462 463 accountReqs: make(map[uint64]*accountRequest), 464 storageReqs: make(map[uint64]*storageRequest), 465 bytecodeReqs: make(map[uint64]*bytecodeRequest), 466 accountReqFails: make(chan *accountRequest), 467 storageReqFails: make(chan *storageRequest), 468 bytecodeReqFails: make(chan *bytecodeRequest), 469 accountResps: make(chan *accountResponse), 470 storageResps: make(chan *storageResponse), 471 bytecodeResps: make(chan *bytecodeResponse), 472 473 trienodeHealIdlers: make(map[string]struct{}), 474 bytecodeHealIdlers: make(map[string]struct{}), 475 476 trienodeHealReqs: make(map[uint64]*trienodeHealRequest), 477 bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), 478 trienodeHealReqFails: make(chan *trienodeHealRequest), 479 bytecodeHealReqFails: make(chan *bytecodeHealRequest), 480 trienodeHealResps: make(chan *trienodeHealResponse), 481 bytecodeHealResps: make(chan *bytecodeHealResponse), 482 } 483 } 484 485 // Register injects a new data source into the syncer's peerset. 486 func (s *Syncer) Register(peer SyncPeer) error { 487 // Make sure the peer is not registered yet 488 id := peer.ID() 489 490 s.lock.Lock() 491 if _, ok := s.peers[id]; ok { 492 log.Error("Snap peer already registered", "id", id) 493 494 s.lock.Unlock() 495 return errors.New("already registered") 496 } 497 s.peers[id] = peer 498 499 // Mark the peer as idle, even if no sync is running 500 s.accountIdlers[id] = struct{}{} 501 s.storageIdlers[id] = struct{}{} 502 s.bytecodeIdlers[id] = struct{}{} 503 s.trienodeHealIdlers[id] = struct{}{} 504 s.bytecodeHealIdlers[id] = struct{}{} 505 s.lock.Unlock() 506 507 // Notify any active syncs that a new peer can be assigned data 508 s.peerJoin.Send(id) 509 return nil 510 } 511 512 // Unregister injects a new data source into the syncer's peerset. 513 func (s *Syncer) Unregister(id string) error { 514 // Remove all traces of the peer from the registry 515 s.lock.Lock() 516 if _, ok := s.peers[id]; !ok { 517 log.Error("Snap peer not registered", "id", id) 518 519 s.lock.Unlock() 520 return errors.New("not registered") 521 } 522 delete(s.peers, id) 523 524 // Remove status markers, even if no sync is running 525 delete(s.statelessPeers, id) 526 527 delete(s.accountIdlers, id) 528 delete(s.storageIdlers, id) 529 delete(s.bytecodeIdlers, id) 530 delete(s.trienodeHealIdlers, id) 531 delete(s.bytecodeHealIdlers, id) 532 s.lock.Unlock() 533 534 // Notify any active syncs that pending requests need to be reverted 535 s.peerDrop.Send(id) 536 return nil 537 } 538 539 // Sync starts (or resumes a previous) sync cycle to iterate over an state trie 540 // with the given root and reconstruct the nodes based on the snapshot leaves. 541 // Previously downloaded segments will not be redownloaded of fixed, rather any 542 // errors will be healed after the leaves are fully accumulated. 543 func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { 544 // Move the trie root from any previous value, revert stateless markers for 545 // any peers and initialize the syncer if it was not yet run 546 s.lock.Lock() 547 s.root = root 548 s.healer = &healTask{ 549 scheduler: state.NewStateSync(root, s.db, s.bloom), 550 trieTasks: make(map[common.Hash]trie.SyncPath), 551 codeTasks: make(map[common.Hash]struct{}), 552 } 553 s.statelessPeers = make(map[string]struct{}) 554 s.lock.Unlock() 555 556 if s.startTime == (time.Time{}) { 557 s.startTime = time.Now() 558 } 559 // Retrieve the previous sync status from LevelDB and abort if already synced 560 s.loadSyncStatus() 561 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 562 log.Debug("Snapshot sync already completed") 563 return nil 564 } 565 defer func() { // Persist any progress, independent of failure 566 for _, task := range s.tasks { 567 s.forwardAccountTask(task) 568 } 569 s.cleanAccountTasks() 570 s.saveSyncStatus() 571 }() 572 573 log.Debug("Starting snapshot sync cycle", "root", root) 574 defer s.report(true) 575 576 // Whether sync completed or not, disregard any future packets 577 defer func() { 578 log.Debug("Terminating snapshot sync cycle", "root", root) 579 s.lock.Lock() 580 s.accountReqs = make(map[uint64]*accountRequest) 581 s.storageReqs = make(map[uint64]*storageRequest) 582 s.bytecodeReqs = make(map[uint64]*bytecodeRequest) 583 s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) 584 s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) 585 s.lock.Unlock() 586 }() 587 // Keep scheduling sync tasks 588 peerJoin := make(chan string, 16) 589 peerJoinSub := s.peerJoin.Subscribe(peerJoin) 590 defer peerJoinSub.Unsubscribe() 591 592 peerDrop := make(chan string, 16) 593 peerDropSub := s.peerDrop.Subscribe(peerDrop) 594 defer peerDropSub.Unsubscribe() 595 596 for { 597 // Remove all completed tasks and terminate sync if everything's done 598 s.cleanStorageTasks() 599 s.cleanAccountTasks() 600 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 601 return nil 602 } 603 // Assign all the data retrieval tasks to any free peers 604 s.assignAccountTasks(cancel) 605 s.assignBytecodeTasks(cancel) 606 s.assignStorageTasks(cancel) 607 608 if len(s.tasks) == 0 { 609 // Sync phase done, run heal phase 610 s.assignTrienodeHealTasks(cancel) 611 s.assignBytecodeHealTasks(cancel) 612 } 613 // Wait for something to happen 614 select { 615 case <-s.update: 616 // Something happened (new peer, delivery, timeout), recheck tasks 617 case <-peerJoin: 618 // A new peer joined, try to schedule it new tasks 619 case id := <-peerDrop: 620 s.revertRequests(id) 621 case <-cancel: 622 return ErrCancelled 623 624 case req := <-s.accountReqFails: 625 s.revertAccountRequest(req) 626 case req := <-s.bytecodeReqFails: 627 s.revertBytecodeRequest(req) 628 case req := <-s.storageReqFails: 629 s.revertStorageRequest(req) 630 case req := <-s.trienodeHealReqFails: 631 s.revertTrienodeHealRequest(req) 632 case req := <-s.bytecodeHealReqFails: 633 s.revertBytecodeHealRequest(req) 634 635 case res := <-s.accountResps: 636 s.processAccountResponse(res) 637 case res := <-s.bytecodeResps: 638 s.processBytecodeResponse(res) 639 case res := <-s.storageResps: 640 s.processStorageResponse(res) 641 case res := <-s.trienodeHealResps: 642 s.processTrienodeHealResponse(res) 643 case res := <-s.bytecodeHealResps: 644 s.processBytecodeHealResponse(res) 645 } 646 // Report stats if something meaningful happened 647 s.report(false) 648 } 649 } 650 651 // loadSyncStatus retrieves a previously aborted sync status from the database, 652 // or generates a fresh one if none is available. 653 func (s *Syncer) loadSyncStatus() { 654 var progress syncProgress 655 656 if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { 657 if err := json.Unmarshal(status, &progress); err != nil { 658 log.Error("Failed to decode snap sync status", "err", err) 659 } else { 660 for _, task := range progress.Tasks { 661 log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) 662 } 663 s.tasks = progress.Tasks 664 s.snapped = len(s.tasks) == 0 665 666 s.accountSynced = progress.AccountSynced 667 s.accountBytes = progress.AccountBytes 668 s.bytecodeSynced = progress.BytecodeSynced 669 s.bytecodeBytes = progress.BytecodeBytes 670 s.storageSynced = progress.StorageSynced 671 s.storageBytes = progress.StorageBytes 672 673 s.trienodeHealSynced = progress.TrienodeHealSynced 674 s.trienodeHealBytes = progress.TrienodeHealBytes 675 s.bytecodeHealSynced = progress.BytecodeHealSynced 676 s.bytecodeHealBytes = progress.BytecodeHealBytes 677 return 678 } 679 } 680 // Either we've failed to decode the previus state, or there was none. 681 // Start a fresh sync by chunking up the account range and scheduling 682 // them for retrieval. 683 s.tasks = nil 684 s.accountSynced, s.accountBytes = 0, 0 685 s.bytecodeSynced, s.bytecodeBytes = 0, 0 686 s.storageSynced, s.storageBytes = 0, 0 687 s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 688 s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 689 690 var next common.Hash 691 step := new(big.Int).Sub( 692 new(big.Int).Div( 693 new(big.Int).Exp(common.Big2, common.Big256, nil), 694 big.NewInt(accountConcurrency), 695 ), common.Big1, 696 ) 697 for i := 0; i < accountConcurrency; i++ { 698 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 699 if i == accountConcurrency-1 { 700 // Make sure we don't overflow if the step is not a proper divisor 701 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 702 } 703 s.tasks = append(s.tasks, &accountTask{ 704 Next: next, 705 Last: last, 706 SubTasks: make(map[common.Hash][]*storageTask), 707 }) 708 log.Debug("Created account sync task", "from", next, "last", last) 709 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 710 } 711 } 712 713 // saveSyncStatus marshals the remaining sync tasks into leveldb. 714 func (s *Syncer) saveSyncStatus() { 715 progress := &syncProgress{ 716 Tasks: s.tasks, 717 AccountSynced: s.accountSynced, 718 AccountBytes: s.accountBytes, 719 BytecodeSynced: s.bytecodeSynced, 720 BytecodeBytes: s.bytecodeBytes, 721 StorageSynced: s.storageSynced, 722 StorageBytes: s.storageBytes, 723 TrienodeHealSynced: s.trienodeHealSynced, 724 TrienodeHealBytes: s.trienodeHealBytes, 725 BytecodeHealSynced: s.bytecodeHealSynced, 726 BytecodeHealBytes: s.bytecodeHealBytes, 727 } 728 status, err := json.Marshal(progress) 729 if err != nil { 730 panic(err) // This can only fail during implementation 731 } 732 rawdb.WriteSnapshotSyncStatus(s.db, status) 733 } 734 735 // cleanAccountTasks removes account range retrieval tasks that have already been 736 // completed. 737 func (s *Syncer) cleanAccountTasks() { 738 for i := 0; i < len(s.tasks); i++ { 739 if s.tasks[i].done { 740 s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) 741 i-- 742 } 743 } 744 if len(s.tasks) == 0 { 745 s.lock.Lock() 746 s.snapped = true 747 s.lock.Unlock() 748 } 749 } 750 751 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks 752 // within, cleaning any that have been completed. 753 func (s *Syncer) cleanStorageTasks() { 754 for _, task := range s.tasks { 755 for account, subtasks := range task.SubTasks { 756 // Remove storage range retrieval tasks that completed 757 for j := 0; j < len(subtasks); j++ { 758 if subtasks[j].done { 759 subtasks = append(subtasks[:j], subtasks[j+1:]...) 760 j-- 761 } 762 } 763 if len(subtasks) > 0 { 764 task.SubTasks[account] = subtasks 765 continue 766 } 767 // If all storage chunks are done, mark the account as done too 768 for j, hash := range task.res.hashes { 769 if hash == account { 770 task.needState[j] = false 771 } 772 } 773 delete(task.SubTasks, account) 774 task.pend-- 775 776 // If this was the last pending task, forward the account task 777 if task.pend == 0 { 778 s.forwardAccountTask(task) 779 } 780 } 781 } 782 } 783 784 // assignAccountTasks attempts to match idle peers to pending account range 785 // retrievals. 786 func (s *Syncer) assignAccountTasks(cancel chan struct{}) { 787 s.lock.Lock() 788 defer s.lock.Unlock() 789 790 // If there are no idle peers, short circuit assignment 791 if len(s.accountIdlers) == 0 { 792 return 793 } 794 // Iterate over all the tasks and try to find a pending one 795 for _, task := range s.tasks { 796 // Skip any tasks already filling 797 if task.req != nil || task.res != nil { 798 continue 799 } 800 // Task pending retrieval, try to find an idle peer. If no such peer 801 // exists, we probably assigned tasks for all (or they are stateless). 802 // Abort the entire assignment mechanism. 803 var idle string 804 for id := range s.accountIdlers { 805 // If the peer rejected a query in this sync cycle, don't bother asking 806 // again for anything, it's either out of sync or already pruned 807 if _, ok := s.statelessPeers[id]; ok { 808 continue 809 } 810 idle = id 811 break 812 } 813 if idle == "" { 814 return 815 } 816 // Matched a pending task to an idle peer, allocate a unique request id 817 var reqid uint64 818 for { 819 reqid = uint64(rand.Int63()) 820 if reqid == 0 { 821 continue 822 } 823 if _, ok := s.accountReqs[reqid]; ok { 824 continue 825 } 826 break 827 } 828 // Generate the network query and send it to the peer 829 req := &accountRequest{ 830 peer: idle, 831 id: reqid, 832 cancel: cancel, 833 stale: make(chan struct{}), 834 origin: task.Next, 835 limit: task.Last, 836 task: task, 837 } 838 req.timeout = time.AfterFunc(requestTimeout, func() { 839 log.Debug("Account range request timed out") 840 s.scheduleRevertAccountRequest(req) 841 }) 842 s.accountReqs[reqid] = req 843 delete(s.accountIdlers, idle) 844 845 s.pend.Add(1) 846 go func(peer SyncPeer, root common.Hash) { 847 defer s.pend.Done() 848 849 // Attempt to send the remote request and revert if it fails 850 if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil { 851 peer.Log().Debug("Failed to request account range", "err", err) 852 s.scheduleRevertAccountRequest(req) 853 } 854 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 855 856 // Inject the request into the task to block further assignments 857 task.req = req 858 } 859 } 860 861 // assignBytecodeTasks attempts to match idle peers to pending code retrievals. 862 func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { 863 s.lock.Lock() 864 defer s.lock.Unlock() 865 866 // If there are no idle peers, short circuit assignment 867 if len(s.bytecodeIdlers) == 0 { 868 return 869 } 870 // Iterate over all the tasks and try to find a pending one 871 for _, task := range s.tasks { 872 // Skip any tasks not in the bytecode retrieval phase 873 if task.res == nil { 874 continue 875 } 876 // Skip tasks that are already retrieving (or done with) all codes 877 if len(task.codeTasks) == 0 { 878 continue 879 } 880 // Task pending retrieval, try to find an idle peer. If no such peer 881 // exists, we probably assigned tasks for all (or they are stateless). 882 // Abort the entire assignment mechanism. 883 var idle string 884 for id := range s.bytecodeIdlers { 885 // If the peer rejected a query in this sync cycle, don't bother asking 886 // again for anything, it's either out of sync or already pruned 887 if _, ok := s.statelessPeers[id]; ok { 888 continue 889 } 890 idle = id 891 break 892 } 893 if idle == "" { 894 return 895 } 896 // Matched a pending task to an idle peer, allocate a unique request id 897 var reqid uint64 898 for { 899 reqid = uint64(rand.Int63()) 900 if reqid == 0 { 901 continue 902 } 903 if _, ok := s.bytecodeReqs[reqid]; ok { 904 continue 905 } 906 break 907 } 908 // Generate the network query and send it to the peer 909 hashes := make([]common.Hash, 0, maxCodeRequestCount) 910 for hash := range task.codeTasks { 911 delete(task.codeTasks, hash) 912 hashes = append(hashes, hash) 913 if len(hashes) >= maxCodeRequestCount { 914 break 915 } 916 } 917 req := &bytecodeRequest{ 918 peer: idle, 919 id: reqid, 920 cancel: cancel, 921 stale: make(chan struct{}), 922 hashes: hashes, 923 task: task, 924 } 925 req.timeout = time.AfterFunc(requestTimeout, func() { 926 log.Debug("Bytecode request timed out") 927 s.scheduleRevertBytecodeRequest(req) 928 }) 929 s.bytecodeReqs[reqid] = req 930 delete(s.bytecodeIdlers, idle) 931 932 s.pend.Add(1) 933 go func(peer SyncPeer) { 934 defer s.pend.Done() 935 936 // Attempt to send the remote request and revert if it fails 937 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 938 log.Debug("Failed to request bytecodes", "err", err) 939 s.scheduleRevertBytecodeRequest(req) 940 } 941 }(s.peers[idle]) // We're in the lock, peers[id] surely exists 942 } 943 } 944 945 // assignStorageTasks attempts to match idle peers to pending storage range 946 // retrievals. 947 func (s *Syncer) assignStorageTasks(cancel chan struct{}) { 948 s.lock.Lock() 949 defer s.lock.Unlock() 950 951 // If there are no idle peers, short circuit assignment 952 if len(s.storageIdlers) == 0 { 953 return 954 } 955 // Iterate over all the tasks and try to find a pending one 956 for _, task := range s.tasks { 957 // Skip any tasks not in the storage retrieval phase 958 if task.res == nil { 959 continue 960 } 961 // Skip tasks that are already retrieving (or done with) all small states 962 if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { 963 continue 964 } 965 // Task pending retrieval, try to find an idle peer. If no such peer 966 // exists, we probably assigned tasks for all (or they are stateless). 967 // Abort the entire assignment mechanism. 968 var idle string 969 for id := range s.storageIdlers { 970 // If the peer rejected a query in this sync cycle, don't bother asking 971 // again for anything, it's either out of sync or already pruned 972 if _, ok := s.statelessPeers[id]; ok { 973 continue 974 } 975 idle = id 976 break 977 } 978 if idle == "" { 979 return 980 } 981 // Matched a pending task to an idle peer, allocate a unique request id 982 var reqid uint64 983 for { 984 reqid = uint64(rand.Int63()) 985 if reqid == 0 { 986 continue 987 } 988 if _, ok := s.storageReqs[reqid]; ok { 989 continue 990 } 991 break 992 } 993 // Generate the network query and send it to the peer. If there are 994 // large contract tasks pending, complete those before diving into 995 // even more new contracts. 996 var ( 997 accounts = make([]common.Hash, 0, maxStorageSetRequestCount) 998 roots = make([]common.Hash, 0, maxStorageSetRequestCount) 999 subtask *storageTask 1000 ) 1001 for account, subtasks := range task.SubTasks { 1002 for _, st := range subtasks { 1003 // Skip any subtasks already filling 1004 if st.req != nil { 1005 continue 1006 } 1007 // Found an incomplete storage chunk, schedule it 1008 accounts = append(accounts, account) 1009 roots = append(roots, st.root) 1010 subtask = st 1011 break // Large contract chunks are downloaded individually 1012 } 1013 if subtask != nil { 1014 break // Large contract chunks are downloaded individually 1015 } 1016 } 1017 if subtask == nil { 1018 // No large contract required retrieval, but small ones available 1019 for acccount, root := range task.stateTasks { 1020 delete(task.stateTasks, acccount) 1021 1022 accounts = append(accounts, acccount) 1023 roots = append(roots, root) 1024 1025 if len(accounts) >= maxStorageSetRequestCount { 1026 break 1027 } 1028 } 1029 } 1030 // If nothing was found, it means this task is actually already fully 1031 // retrieving, but large contracts are hard to detect. Skip to the next. 1032 if len(accounts) == 0 { 1033 continue 1034 } 1035 req := &storageRequest{ 1036 peer: idle, 1037 id: reqid, 1038 cancel: cancel, 1039 stale: make(chan struct{}), 1040 accounts: accounts, 1041 roots: roots, 1042 mainTask: task, 1043 subTask: subtask, 1044 } 1045 if subtask != nil { 1046 req.origin = subtask.Next 1047 req.limit = subtask.Last 1048 } 1049 req.timeout = time.AfterFunc(requestTimeout, func() { 1050 log.Debug("Storage request timed out") 1051 s.scheduleRevertStorageRequest(req) 1052 }) 1053 s.storageReqs[reqid] = req 1054 delete(s.storageIdlers, idle) 1055 1056 s.pend.Add(1) 1057 go func(peer SyncPeer, root common.Hash) { 1058 defer s.pend.Done() 1059 1060 // Attempt to send the remote request and revert if it fails 1061 var origin, limit []byte 1062 if subtask != nil { 1063 origin, limit = req.origin[:], req.limit[:] 1064 } 1065 if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil { 1066 log.Debug("Failed to request storage", "err", err) 1067 s.scheduleRevertStorageRequest(req) 1068 } 1069 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 1070 1071 // Inject the request into the subtask to block further assignments 1072 if subtask != nil { 1073 subtask.req = req 1074 } 1075 } 1076 } 1077 1078 // assignTrienodeHealTasks attempts to match idle peers to trie node requests to 1079 // heal any trie errors caused by the snap sync's chunked retrieval model. 1080 func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { 1081 s.lock.Lock() 1082 defer s.lock.Unlock() 1083 1084 // If there are no idle peers, short circuit assignment 1085 if len(s.trienodeHealIdlers) == 0 { 1086 return 1087 } 1088 // Iterate over pending tasks and try to find a peer to retrieve with 1089 for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1090 // If there are not enough trie tasks queued to fully assign, fill the 1091 // queue from the state sync scheduler. The trie synced schedules these 1092 // together with bytecodes, so we need to queue them combined. 1093 var ( 1094 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1095 want = maxTrieRequestCount + maxCodeRequestCount 1096 ) 1097 if have < want { 1098 nodes, paths, codes := s.healer.scheduler.Missing(want - have) 1099 for i, hash := range nodes { 1100 s.healer.trieTasks[hash] = paths[i] 1101 } 1102 for _, hash := range codes { 1103 s.healer.codeTasks[hash] = struct{}{} 1104 } 1105 } 1106 // If all the heal tasks are bytecodes or already downloading, bail 1107 if len(s.healer.trieTasks) == 0 { 1108 return 1109 } 1110 // Task pending retrieval, try to find an idle peer. If no such peer 1111 // exists, we probably assigned tasks for all (or they are stateless). 1112 // Abort the entire assignment mechanism. 1113 var idle string 1114 for id := range s.trienodeHealIdlers { 1115 // If the peer rejected a query in this sync cycle, don't bother asking 1116 // again for anything, it's either out of sync or already pruned 1117 if _, ok := s.statelessPeers[id]; ok { 1118 continue 1119 } 1120 idle = id 1121 break 1122 } 1123 if idle == "" { 1124 return 1125 } 1126 // Matched a pending task to an idle peer, allocate a unique request id 1127 var reqid uint64 1128 for { 1129 reqid = uint64(rand.Int63()) 1130 if reqid == 0 { 1131 continue 1132 } 1133 if _, ok := s.trienodeHealReqs[reqid]; ok { 1134 continue 1135 } 1136 break 1137 } 1138 // Generate the network query and send it to the peer 1139 var ( 1140 hashes = make([]common.Hash, 0, maxTrieRequestCount) 1141 paths = make([]trie.SyncPath, 0, maxTrieRequestCount) 1142 pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount) 1143 ) 1144 for hash, pathset := range s.healer.trieTasks { 1145 delete(s.healer.trieTasks, hash) 1146 1147 hashes = append(hashes, hash) 1148 paths = append(paths, pathset) 1149 pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash 1150 1151 if len(hashes) >= maxTrieRequestCount { 1152 break 1153 } 1154 } 1155 req := &trienodeHealRequest{ 1156 peer: idle, 1157 id: reqid, 1158 cancel: cancel, 1159 stale: make(chan struct{}), 1160 hashes: hashes, 1161 paths: paths, 1162 task: s.healer, 1163 } 1164 req.timeout = time.AfterFunc(requestTimeout, func() { 1165 log.Debug("Trienode heal request timed out") 1166 s.scheduleRevertTrienodeHealRequest(req) 1167 }) 1168 s.trienodeHealReqs[reqid] = req 1169 delete(s.trienodeHealIdlers, idle) 1170 1171 s.pend.Add(1) 1172 go func(peer SyncPeer, root common.Hash) { 1173 defer s.pend.Done() 1174 1175 // Attempt to send the remote request and revert if it fails 1176 if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil { 1177 log.Debug("Failed to request trienode healers", "err", err) 1178 s.scheduleRevertTrienodeHealRequest(req) 1179 } 1180 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 1181 } 1182 } 1183 1184 // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to 1185 // heal any trie errors caused by the snap sync's chunked retrieval model. 1186 func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { 1187 s.lock.Lock() 1188 defer s.lock.Unlock() 1189 1190 // If there are no idle peers, short circuit assignment 1191 if len(s.bytecodeHealIdlers) == 0 { 1192 return 1193 } 1194 // Iterate over pending tasks and try to find a peer to retrieve with 1195 for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1196 // If there are not enough trie tasks queued to fully assign, fill the 1197 // queue from the state sync scheduler. The trie synced schedules these 1198 // together with trie nodes, so we need to queue them combined. 1199 var ( 1200 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1201 want = maxTrieRequestCount + maxCodeRequestCount 1202 ) 1203 if have < want { 1204 nodes, paths, codes := s.healer.scheduler.Missing(want - have) 1205 for i, hash := range nodes { 1206 s.healer.trieTasks[hash] = paths[i] 1207 } 1208 for _, hash := range codes { 1209 s.healer.codeTasks[hash] = struct{}{} 1210 } 1211 } 1212 // If all the heal tasks are trienodes or already downloading, bail 1213 if len(s.healer.codeTasks) == 0 { 1214 return 1215 } 1216 // Task pending retrieval, try to find an idle peer. If no such peer 1217 // exists, we probably assigned tasks for all (or they are stateless). 1218 // Abort the entire assignment mechanism. 1219 var idle string 1220 for id := range s.bytecodeHealIdlers { 1221 // If the peer rejected a query in this sync cycle, don't bother asking 1222 // again for anything, it's either out of sync or already pruned 1223 if _, ok := s.statelessPeers[id]; ok { 1224 continue 1225 } 1226 idle = id 1227 break 1228 } 1229 if idle == "" { 1230 return 1231 } 1232 // Matched a pending task to an idle peer, allocate a unique request id 1233 var reqid uint64 1234 for { 1235 reqid = uint64(rand.Int63()) 1236 if reqid == 0 { 1237 continue 1238 } 1239 if _, ok := s.bytecodeHealReqs[reqid]; ok { 1240 continue 1241 } 1242 break 1243 } 1244 // Generate the network query and send it to the peer 1245 hashes := make([]common.Hash, 0, maxCodeRequestCount) 1246 for hash := range s.healer.codeTasks { 1247 delete(s.healer.codeTasks, hash) 1248 1249 hashes = append(hashes, hash) 1250 if len(hashes) >= maxCodeRequestCount { 1251 break 1252 } 1253 } 1254 req := &bytecodeHealRequest{ 1255 peer: idle, 1256 id: reqid, 1257 cancel: cancel, 1258 stale: make(chan struct{}), 1259 hashes: hashes, 1260 task: s.healer, 1261 } 1262 req.timeout = time.AfterFunc(requestTimeout, func() { 1263 log.Debug("Bytecode heal request timed out") 1264 s.scheduleRevertBytecodeHealRequest(req) 1265 }) 1266 s.bytecodeHealReqs[reqid] = req 1267 delete(s.bytecodeHealIdlers, idle) 1268 1269 s.pend.Add(1) 1270 go func(peer SyncPeer) { 1271 defer s.pend.Done() 1272 1273 // Attempt to send the remote request and revert if it fails 1274 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1275 log.Debug("Failed to request bytecode healers", "err", err) 1276 s.scheduleRevertBytecodeHealRequest(req) 1277 } 1278 }(s.peers[idle]) // We're in the lock, peers[id] surely exists 1279 } 1280 } 1281 1282 // revertRequests locates all the currently pending reuqests from a particular 1283 // peer and reverts them, rescheduling for others to fulfill. 1284 func (s *Syncer) revertRequests(peer string) { 1285 // Gather the requests first, revertals need the lock too 1286 s.lock.Lock() 1287 var accountReqs []*accountRequest 1288 for _, req := range s.accountReqs { 1289 if req.peer == peer { 1290 accountReqs = append(accountReqs, req) 1291 } 1292 } 1293 var bytecodeReqs []*bytecodeRequest 1294 for _, req := range s.bytecodeReqs { 1295 if req.peer == peer { 1296 bytecodeReqs = append(bytecodeReqs, req) 1297 } 1298 } 1299 var storageReqs []*storageRequest 1300 for _, req := range s.storageReqs { 1301 if req.peer == peer { 1302 storageReqs = append(storageReqs, req) 1303 } 1304 } 1305 var trienodeHealReqs []*trienodeHealRequest 1306 for _, req := range s.trienodeHealReqs { 1307 if req.peer == peer { 1308 trienodeHealReqs = append(trienodeHealReqs, req) 1309 } 1310 } 1311 var bytecodeHealReqs []*bytecodeHealRequest 1312 for _, req := range s.bytecodeHealReqs { 1313 if req.peer == peer { 1314 bytecodeHealReqs = append(bytecodeHealReqs, req) 1315 } 1316 } 1317 s.lock.Unlock() 1318 1319 // Revert all the requests matching the peer 1320 for _, req := range accountReqs { 1321 s.revertAccountRequest(req) 1322 } 1323 for _, req := range bytecodeReqs { 1324 s.revertBytecodeRequest(req) 1325 } 1326 for _, req := range storageReqs { 1327 s.revertStorageRequest(req) 1328 } 1329 for _, req := range trienodeHealReqs { 1330 s.revertTrienodeHealRequest(req) 1331 } 1332 for _, req := range bytecodeHealReqs { 1333 s.revertBytecodeHealRequest(req) 1334 } 1335 } 1336 1337 // scheduleRevertAccountRequest asks the event loop to clean up an account range 1338 // request and return all failed retrieval tasks to the scheduler for reassignment. 1339 func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { 1340 select { 1341 case s.accountReqFails <- req: 1342 // Sync event loop notified 1343 case <-req.cancel: 1344 // Sync cycle got cancelled 1345 case <-req.stale: 1346 // Request already reverted 1347 } 1348 } 1349 1350 // revertAccountRequest cleans up an account range request and returns all failed 1351 // retrieval tasks to the scheduler for reassignment. 1352 // 1353 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1354 // On peer threads, use scheduleRevertAccountRequest. 1355 func (s *Syncer) revertAccountRequest(req *accountRequest) { 1356 log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) 1357 select { 1358 case <-req.stale: 1359 log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id) 1360 return 1361 default: 1362 } 1363 close(req.stale) 1364 1365 // Remove the request from the tracked set 1366 s.lock.Lock() 1367 delete(s.accountReqs, req.id) 1368 s.lock.Unlock() 1369 1370 // If there's a timeout timer still running, abort it and mark the account 1371 // task as not-pending, ready for resheduling 1372 req.timeout.Stop() 1373 if req.task.req == req { 1374 req.task.req = nil 1375 } 1376 } 1377 1378 // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request 1379 // and return all failed retrieval tasks to the scheduler for reassignment. 1380 func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { 1381 select { 1382 case s.bytecodeReqFails <- req: 1383 // Sync event loop notified 1384 case <-req.cancel: 1385 // Sync cycle got cancelled 1386 case <-req.stale: 1387 // Request already reverted 1388 } 1389 } 1390 1391 // revertBytecodeRequest cleans up a bytecode request and returns all failed 1392 // retrieval tasks to the scheduler for reassignment. 1393 // 1394 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1395 // On peer threads, use scheduleRevertBytecodeRequest. 1396 func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { 1397 log.Debug("Reverting bytecode request", "peer", req.peer) 1398 select { 1399 case <-req.stale: 1400 log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id) 1401 return 1402 default: 1403 } 1404 close(req.stale) 1405 1406 // Remove the request from the tracked set 1407 s.lock.Lock() 1408 delete(s.bytecodeReqs, req.id) 1409 s.lock.Unlock() 1410 1411 // If there's a timeout timer still running, abort it and mark the code 1412 // retrievals as not-pending, ready for resheduling 1413 req.timeout.Stop() 1414 for _, hash := range req.hashes { 1415 req.task.codeTasks[hash] = struct{}{} 1416 } 1417 } 1418 1419 // scheduleRevertStorageRequest asks the event loop to clean up a storage range 1420 // request and return all failed retrieval tasks to the scheduler for reassignment. 1421 func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { 1422 select { 1423 case s.storageReqFails <- req: 1424 // Sync event loop notified 1425 case <-req.cancel: 1426 // Sync cycle got cancelled 1427 case <-req.stale: 1428 // Request already reverted 1429 } 1430 } 1431 1432 // revertStorageRequest cleans up a storage range request and returns all failed 1433 // retrieval tasks to the scheduler for reassignment. 1434 // 1435 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1436 // On peer threads, use scheduleRevertStorageRequest. 1437 func (s *Syncer) revertStorageRequest(req *storageRequest) { 1438 log.Debug("Reverting storage request", "peer", req.peer) 1439 select { 1440 case <-req.stale: 1441 log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id) 1442 return 1443 default: 1444 } 1445 close(req.stale) 1446 1447 // Remove the request from the tracked set 1448 s.lock.Lock() 1449 delete(s.storageReqs, req.id) 1450 s.lock.Unlock() 1451 1452 // If there's a timeout timer still running, abort it and mark the storage 1453 // task as not-pending, ready for resheduling 1454 req.timeout.Stop() 1455 if req.subTask != nil { 1456 req.subTask.req = nil 1457 } else { 1458 for i, account := range req.accounts { 1459 req.mainTask.stateTasks[account] = req.roots[i] 1460 } 1461 } 1462 } 1463 1464 // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal 1465 // request and return all failed retrieval tasks to the scheduler for reassignment. 1466 func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { 1467 select { 1468 case s.trienodeHealReqFails <- req: 1469 // Sync event loop notified 1470 case <-req.cancel: 1471 // Sync cycle got cancelled 1472 case <-req.stale: 1473 // Request already reverted 1474 } 1475 } 1476 1477 // revertTrienodeHealRequest cleans up a trienode heal request and returns all 1478 // failed retrieval tasks to the scheduler for reassignment. 1479 // 1480 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1481 // On peer threads, use scheduleRevertTrienodeHealRequest. 1482 func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { 1483 log.Debug("Reverting trienode heal request", "peer", req.peer) 1484 select { 1485 case <-req.stale: 1486 log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) 1487 return 1488 default: 1489 } 1490 close(req.stale) 1491 1492 // Remove the request from the tracked set 1493 s.lock.Lock() 1494 delete(s.trienodeHealReqs, req.id) 1495 s.lock.Unlock() 1496 1497 // If there's a timeout timer still running, abort it and mark the trie node 1498 // retrievals as not-pending, ready for resheduling 1499 req.timeout.Stop() 1500 for i, hash := range req.hashes { 1501 req.task.trieTasks[hash] = req.paths[i] 1502 } 1503 } 1504 1505 // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal 1506 // request and return all failed retrieval tasks to the scheduler for reassignment. 1507 func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { 1508 select { 1509 case s.bytecodeHealReqFails <- req: 1510 // Sync event loop notified 1511 case <-req.cancel: 1512 // Sync cycle got cancelled 1513 case <-req.stale: 1514 // Request already reverted 1515 } 1516 } 1517 1518 // revertBytecodeHealRequest cleans up a bytecode heal request and returns all 1519 // failed retrieval tasks to the scheduler for reassignment. 1520 // 1521 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1522 // On peer threads, use scheduleRevertBytecodeHealRequest. 1523 func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { 1524 log.Debug("Reverting bytecode heal request", "peer", req.peer) 1525 select { 1526 case <-req.stale: 1527 log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) 1528 return 1529 default: 1530 } 1531 close(req.stale) 1532 1533 // Remove the request from the tracked set 1534 s.lock.Lock() 1535 delete(s.bytecodeHealReqs, req.id) 1536 s.lock.Unlock() 1537 1538 // If there's a timeout timer still running, abort it and mark the code 1539 // retrievals as not-pending, ready for resheduling 1540 req.timeout.Stop() 1541 for _, hash := range req.hashes { 1542 req.task.codeTasks[hash] = struct{}{} 1543 } 1544 } 1545 1546 // processAccountResponse integrates an already validated account range response 1547 // into the account tasks. 1548 func (s *Syncer) processAccountResponse(res *accountResponse) { 1549 // Switch the task from pending to filling 1550 res.task.req = nil 1551 res.task.res = res 1552 1553 // Ensure that the response doesn't overflow into the subsequent task 1554 last := res.task.Last.Big() 1555 for i, hash := range res.hashes { 1556 if hash.Big().Cmp(last) > 0 { 1557 // Chunk overflown, cut off excess, but also update the boundary nodes 1558 for j := i; j < len(res.hashes); j++ { 1559 if err := res.trie.Prove(res.hashes[j][:], 0, res.overflow); err != nil { 1560 panic(err) // Account range was already proven, what happened 1561 } 1562 } 1563 res.hashes = res.hashes[:i] 1564 res.accounts = res.accounts[:i] 1565 res.cont = false // Mark range completed 1566 break 1567 } 1568 } 1569 // Iterate over all the accounts and assemble which ones need further sub- 1570 // filling before the entire account range can be persisted. 1571 res.task.needCode = make([]bool, len(res.accounts)) 1572 res.task.needState = make([]bool, len(res.accounts)) 1573 res.task.needHeal = make([]bool, len(res.accounts)) 1574 1575 res.task.codeTasks = make(map[common.Hash]struct{}) 1576 res.task.stateTasks = make(map[common.Hash]common.Hash) 1577 1578 resumed := make(map[common.Hash]struct{}) 1579 1580 res.task.pend = 0 1581 for i, account := range res.accounts { 1582 // Check if the account is a contract with an unknown code 1583 if !bytes.Equal(account.CodeHash, emptyCode[:]) { 1584 if code := rawdb.ReadCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)); code == nil { 1585 res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} 1586 res.task.needCode[i] = true 1587 res.task.pend++ 1588 } 1589 } 1590 // Check if the account is a contract with an unknown storage trie 1591 if account.Root != emptyRoot { 1592 if node, err := s.db.Get(account.Root[:]); err != nil || node == nil { 1593 // If there was a previous large state retrieval in progress, 1594 // don't restart it from scratch. This happens if a sync cycle 1595 // is interrupted and resumed later. However, *do* update the 1596 // previous root hash. 1597 if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok { 1598 log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root) 1599 for _, subtask := range subtasks { 1600 subtask.root = account.Root 1601 } 1602 res.task.needHeal[i] = true 1603 resumed[res.hashes[i]] = struct{}{} 1604 } else { 1605 res.task.stateTasks[res.hashes[i]] = account.Root 1606 } 1607 res.task.needState[i] = true 1608 res.task.pend++ 1609 } 1610 } 1611 } 1612 // Delete any subtasks that have been aborted but not resumed. This may undo 1613 // some progress if a new peer gives us less accounts than an old one, but for 1614 // now we have to live with that. 1615 for hash := range res.task.SubTasks { 1616 if _, ok := resumed[hash]; !ok { 1617 log.Debug("Aborting suspended storage retrieval", "account", hash) 1618 delete(res.task.SubTasks, hash) 1619 } 1620 } 1621 // If the account range contained no contracts, or all have been fully filled 1622 // beforehand, short circuit storage filling and forward to the next task 1623 if res.task.pend == 0 { 1624 s.forwardAccountTask(res.task) 1625 return 1626 } 1627 // Some accounts are incomplete, leave as is for the storage and contract 1628 // task assigners to pick up and fill. 1629 } 1630 1631 // processBytecodeResponse integrates an already validated bytecode response 1632 // into the account tasks. 1633 func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { 1634 batch := s.db.NewBatch() 1635 1636 var ( 1637 codes uint64 1638 bytes common.StorageSize 1639 ) 1640 for i, hash := range res.hashes { 1641 code := res.codes[i] 1642 1643 // If the bytecode was not delivered, reschedule it 1644 if code == nil { 1645 res.task.codeTasks[hash] = struct{}{} 1646 continue 1647 } 1648 // Code was delivered, mark it not needed any more 1649 for j, account := range res.task.res.accounts { 1650 if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { 1651 res.task.needCode[j] = false 1652 res.task.pend-- 1653 } 1654 } 1655 // Push the bytecode into a database batch 1656 s.bytecodeSynced++ 1657 s.bytecodeBytes += common.StorageSize(len(code)) 1658 1659 codes++ 1660 bytes += common.StorageSize(len(code)) 1661 1662 rawdb.WriteCode(batch, hash, code) 1663 s.bloom.Add(hash[:]) 1664 } 1665 if err := batch.Write(); err != nil { 1666 log.Crit("Failed to persist bytecodes", "err", err) 1667 } 1668 log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes) 1669 1670 // If this delivery completed the last pending task, forward the account task 1671 // to the next chunk 1672 if res.task.pend == 0 { 1673 s.forwardAccountTask(res.task) 1674 return 1675 } 1676 // Some accounts are still incomplete, leave as is for the storage and contract 1677 // task assigners to pick up and fill. 1678 } 1679 1680 // processStorageResponse integrates an already validated storage response 1681 // into the account tasks. 1682 func (s *Syncer) processStorageResponse(res *storageResponse) { 1683 // Switch the suntask from pending to idle 1684 if res.subTask != nil { 1685 res.subTask.req = nil 1686 } 1687 batch := s.db.NewBatch() 1688 1689 var ( 1690 slots int 1691 nodes int 1692 skipped int 1693 bytes common.StorageSize 1694 ) 1695 // Iterate over all the accounts and reconstruct their storage tries from the 1696 // delivered slots 1697 for i, account := range res.accounts { 1698 // If the account was not delivered, reschedule it 1699 if i >= len(res.hashes) { 1700 res.mainTask.stateTasks[account] = res.roots[i] 1701 continue 1702 } 1703 // State was delivered, if complete mark as not needed any more, otherwise 1704 // mark the account as needing healing 1705 for j, hash := range res.mainTask.res.hashes { 1706 if account != hash { 1707 continue 1708 } 1709 acc := res.mainTask.res.accounts[j] 1710 1711 // If the packet contains multiple contract storage slots, all 1712 // but the last are surely complete. The last contract may be 1713 // chunked, so check it's continuation flag. 1714 if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { 1715 res.mainTask.needState[j] = false 1716 res.mainTask.pend-- 1717 } 1718 // If the last contract was chunked, mark it as needing healing 1719 // to avoid writing it out to disk prematurely. 1720 if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { 1721 res.mainTask.needHeal[j] = true 1722 } 1723 // If the last contract was chunked, we need to switch to large 1724 // contract handling mode 1725 if res.subTask == nil && i == len(res.hashes)-1 && res.cont { 1726 // If we haven't yet started a large-contract retrieval, create 1727 // the subtasks for it within the main account task 1728 if tasks, ok := res.mainTask.SubTasks[account]; !ok { 1729 var ( 1730 next common.Hash 1731 ) 1732 step := new(big.Int).Sub( 1733 new(big.Int).Div( 1734 new(big.Int).Exp(common.Big2, common.Big256, nil), 1735 big.NewInt(storageConcurrency), 1736 ), common.Big1, 1737 ) 1738 for k := 0; k < storageConcurrency; k++ { 1739 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 1740 if k == storageConcurrency-1 { 1741 // Make sure we don't overflow if the step is not a proper divisor 1742 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 1743 } 1744 tasks = append(tasks, &storageTask{ 1745 Next: next, 1746 Last: last, 1747 root: acc.Root, 1748 }) 1749 log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last) 1750 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 1751 } 1752 res.mainTask.SubTasks[account] = tasks 1753 1754 // Since we've just created the sub-tasks, this response 1755 // is surely for the first one (zero origin) 1756 res.subTask = tasks[0] 1757 } 1758 } 1759 // If we're in large contract delivery mode, forward the subtask 1760 if res.subTask != nil { 1761 // Ensure the response doesn't overflow into the subsequent task 1762 last := res.subTask.Last.Big() 1763 for k, hash := range res.hashes[i] { 1764 if hash.Big().Cmp(last) > 0 { 1765 // Chunk overflown, cut off excess, but also update the boundary 1766 for l := k; l < len(res.hashes[i]); l++ { 1767 if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil { 1768 panic(err) // Account range was already proven, what happened 1769 } 1770 } 1771 res.hashes[i] = res.hashes[i][:k] 1772 res.slots[i] = res.slots[i][:k] 1773 res.cont = false // Mark range completed 1774 break 1775 } 1776 } 1777 // Forward the relevant storage chunk (even if created just now) 1778 if res.cont { 1779 res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1))) 1780 } else { 1781 res.subTask.done = true 1782 } 1783 } 1784 } 1785 // Iterate over all the reconstructed trie nodes and push them to disk 1786 slots += len(res.hashes[i]) 1787 1788 it := res.nodes[i].NewIterator(nil, nil) 1789 for it.Next() { 1790 // Boundary nodes are not written for the last result, since they are incomplete 1791 if i == len(res.hashes)-1 { 1792 if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok { 1793 skipped++ 1794 continue 1795 } 1796 } 1797 // Node is not a boundary, persist to disk 1798 batch.Put(it.Key(), it.Value()) 1799 s.bloom.Add(it.Key()) 1800 1801 bytes += common.StorageSize(common.HashLength + len(it.Value())) 1802 nodes++ 1803 } 1804 it.Release() 1805 } 1806 if err := batch.Write(); err != nil { 1807 log.Crit("Failed to persist storage slots", "err", err) 1808 } 1809 s.storageSynced += uint64(slots) 1810 s.storageBytes += bytes 1811 1812 log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "nodes", nodes, "skipped", skipped, "bytes", bytes) 1813 1814 // If this delivery completed the last pending task, forward the account task 1815 // to the next chunk 1816 if res.mainTask.pend == 0 { 1817 s.forwardAccountTask(res.mainTask) 1818 return 1819 } 1820 // Some accounts are still incomplete, leave as is for the storage and contract 1821 // task assigners to pick up and fill. 1822 } 1823 1824 // processTrienodeHealResponse integrates an already validated trienode response 1825 // into the healer tasks. 1826 func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { 1827 for i, hash := range res.hashes { 1828 node := res.nodes[i] 1829 1830 // If the trie node was not delivered, reschedule it 1831 if node == nil { 1832 res.task.trieTasks[hash] = res.paths[i] 1833 continue 1834 } 1835 // Push the trie node into the state syncer 1836 s.trienodeHealSynced++ 1837 s.trienodeHealBytes += common.StorageSize(len(node)) 1838 1839 err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) 1840 switch err { 1841 case nil: 1842 case trie.ErrAlreadyProcessed: 1843 s.trienodeHealDups++ 1844 case trie.ErrNotRequested: 1845 s.trienodeHealNops++ 1846 default: 1847 log.Error("Invalid trienode processed", "hash", hash, "err", err) 1848 } 1849 } 1850 batch := s.db.NewBatch() 1851 if err := s.healer.scheduler.Commit(batch); err != nil { 1852 log.Error("Failed to commit healing data", "err", err) 1853 } 1854 if err := batch.Write(); err != nil { 1855 log.Crit("Failed to persist healing data", "err", err) 1856 } 1857 log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) 1858 } 1859 1860 // processBytecodeHealResponse integrates an already validated bytecode response 1861 // into the healer tasks. 1862 func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { 1863 for i, hash := range res.hashes { 1864 node := res.codes[i] 1865 1866 // If the trie node was not delivered, reschedule it 1867 if node == nil { 1868 res.task.codeTasks[hash] = struct{}{} 1869 continue 1870 } 1871 // Push the trie node into the state syncer 1872 s.bytecodeHealSynced++ 1873 s.bytecodeHealBytes += common.StorageSize(len(node)) 1874 1875 err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) 1876 switch err { 1877 case nil: 1878 case trie.ErrAlreadyProcessed: 1879 s.bytecodeHealDups++ 1880 case trie.ErrNotRequested: 1881 s.bytecodeHealNops++ 1882 default: 1883 log.Error("Invalid bytecode processed", "hash", hash, "err", err) 1884 } 1885 } 1886 batch := s.db.NewBatch() 1887 if err := s.healer.scheduler.Commit(batch); err != nil { 1888 log.Error("Failed to commit healing data", "err", err) 1889 } 1890 if err := batch.Write(); err != nil { 1891 log.Crit("Failed to persist healing data", "err", err) 1892 } 1893 log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize())) 1894 } 1895 1896 // forwardAccountTask takes a filled account task and persists anything available 1897 // into the database, after which it forwards the next account marker so that the 1898 // task's next chunk may be filled. 1899 func (s *Syncer) forwardAccountTask(task *accountTask) { 1900 // Remove any pending delivery 1901 res := task.res 1902 if res == nil { 1903 return // nothing to forward 1904 } 1905 task.res = nil 1906 1907 // Iterate over all the accounts and gather all the incomplete trie nodes. A 1908 // node is incomplete if we haven't yet filled it (sync was interrupted), or 1909 // if we filled it in multiple chunks (storage trie), in which case the few 1910 // nodes on the chunk boundaries are missing. 1911 incompletes := light.NewNodeSet() 1912 for i := range res.accounts { 1913 // If the filling was interrupted, mark everything after as incomplete 1914 if task.needCode[i] || task.needState[i] { 1915 for j := i; j < len(res.accounts); j++ { 1916 if err := res.trie.Prove(res.hashes[j][:], 0, incompletes); err != nil { 1917 panic(err) // Account range was already proven, what happened 1918 } 1919 } 1920 break 1921 } 1922 // Filling not interrupted until this point, mark incomplete if needs healing 1923 if task.needHeal[i] { 1924 if err := res.trie.Prove(res.hashes[i][:], 0, incompletes); err != nil { 1925 panic(err) // Account range was already proven, what happened 1926 } 1927 } 1928 } 1929 // Persist every finalized trie node that's not on the boundary 1930 batch := s.db.NewBatch() 1931 1932 var ( 1933 nodes int 1934 skipped int 1935 bytes common.StorageSize 1936 ) 1937 it := res.nodes.NewIterator(nil, nil) 1938 for it.Next() { 1939 // Boundary nodes are not written, since they are incomplete 1940 if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok { 1941 skipped++ 1942 continue 1943 } 1944 // Overflow nodes are not written, since they mess with another task 1945 if _, err := res.overflow.Get(it.Key()); err == nil { 1946 skipped++ 1947 continue 1948 } 1949 // Accounts with split storage requests are incomplete 1950 if _, err := incompletes.Get(it.Key()); err == nil { 1951 skipped++ 1952 continue 1953 } 1954 // Node is neither a boundary, not an incomplete account, persist to disk 1955 batch.Put(it.Key(), it.Value()) 1956 s.bloom.Add(it.Key()) 1957 1958 bytes += common.StorageSize(common.HashLength + len(it.Value())) 1959 nodes++ 1960 } 1961 it.Release() 1962 1963 if err := batch.Write(); err != nil { 1964 log.Crit("Failed to persist accounts", "err", err) 1965 } 1966 s.accountBytes += bytes 1967 s.accountSynced += uint64(len(res.accounts)) 1968 1969 log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "nodes", nodes, "skipped", skipped, "bytes", bytes) 1970 1971 // Task filling persisted, push it the chunk marker forward to the first 1972 // account still missing data. 1973 for i, hash := range res.hashes { 1974 if task.needCode[i] || task.needState[i] { 1975 return 1976 } 1977 task.Next = common.BigToHash(new(big.Int).Add(hash.Big(), big.NewInt(1))) 1978 } 1979 // All accounts marked as complete, track if the entire task is done 1980 task.done = !res.cont 1981 } 1982 1983 // OnAccounts is a callback method to invoke when a range of accounts are 1984 // received from a remote peer. 1985 func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { 1986 size := common.StorageSize(len(hashes) * common.HashLength) 1987 for _, account := range accounts { 1988 size += common.StorageSize(len(account)) 1989 } 1990 for _, node := range proof { 1991 size += common.StorageSize(len(node)) 1992 } 1993 logger := peer.Log().New("reqid", id) 1994 logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) 1995 1996 // Whether or not the response is valid, we can mark the peer as idle and 1997 // notify the scheduler to assign a new task. If the response is invalid, 1998 // we'll drop the peer in a bit. 1999 s.lock.Lock() 2000 if _, ok := s.peers[peer.ID()]; ok { 2001 s.accountIdlers[peer.ID()] = struct{}{} 2002 } 2003 select { 2004 case s.update <- struct{}{}: 2005 default: 2006 } 2007 // Ensure the response is for a valid request 2008 req, ok := s.accountReqs[id] 2009 if !ok { 2010 // Request stale, perhaps the peer timed out but came through in the end 2011 logger.Warn("Unexpected account range packet") 2012 s.lock.Unlock() 2013 return nil 2014 } 2015 delete(s.accountReqs, id) 2016 2017 // Clean up the request timeout timer, we'll see how to proceed further based 2018 // on the actual delivered content 2019 if !req.timeout.Stop() { 2020 // The timeout is already triggered, and this request will be reverted+rescheduled 2021 s.lock.Unlock() 2022 return nil 2023 } 2024 2025 // Response is valid, but check if peer is signalling that it does not have 2026 // the requested data. For account range queries that means the state being 2027 // retrieved was either already pruned remotely, or the peer is not yet 2028 // synced to our head. 2029 if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { 2030 logger.Debug("Peer rejected account range request", "root", s.root) 2031 s.statelessPeers[peer.ID()] = struct{}{} 2032 s.lock.Unlock() 2033 2034 // Signal this request as failed, and ready for rescheduling 2035 s.scheduleRevertAccountRequest(req) 2036 return nil 2037 } 2038 root := s.root 2039 s.lock.Unlock() 2040 2041 // Reconstruct a partial trie from the response and verify it 2042 keys := make([][]byte, len(hashes)) 2043 for i, key := range hashes { 2044 keys[i] = common.CopyBytes(key[:]) 2045 } 2046 nodes := make(light.NodeList, len(proof)) 2047 for i, node := range proof { 2048 nodes[i] = node 2049 } 2050 proofdb := nodes.NodeSet() 2051 2052 var end []byte 2053 if len(keys) > 0 { 2054 end = keys[len(keys)-1] 2055 } 2056 db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) 2057 if err != nil { 2058 logger.Warn("Account range failed proof", "err", err) 2059 // Signal this request as failed, and ready for rescheduling 2060 s.scheduleRevertAccountRequest(req) 2061 return err 2062 } 2063 // Partial trie reconstructed, send it to the scheduler for storage filling 2064 bounds := make(map[common.Hash]struct{}) 2065 2066 it := notary.Accessed().NewIterator(nil, nil) 2067 for it.Next() { 2068 bounds[common.BytesToHash(it.Key())] = struct{}{} 2069 } 2070 it.Release() 2071 2072 accs := make([]*state.Account, len(accounts)) 2073 for i, account := range accounts { 2074 acc := new(state.Account) 2075 if err := rlp.DecodeBytes(account, acc); err != nil { 2076 panic(err) // We created these blobs, we must be able to decode them 2077 } 2078 accs[i] = acc 2079 } 2080 response := &accountResponse{ 2081 task: req.task, 2082 hashes: hashes, 2083 accounts: accs, 2084 nodes: db, 2085 trie: tr, 2086 bounds: bounds, 2087 overflow: light.NewNodeSet(), 2088 cont: cont, 2089 } 2090 select { 2091 case s.accountResps <- response: 2092 case <-req.cancel: 2093 case <-req.stale: 2094 } 2095 return nil 2096 } 2097 2098 // OnByteCodes is a callback method to invoke when a batch of contract 2099 // bytes codes are received from a remote peer. 2100 func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2101 s.lock.RLock() 2102 syncing := !s.snapped 2103 s.lock.RUnlock() 2104 2105 if syncing { 2106 return s.onByteCodes(peer, id, bytecodes) 2107 } 2108 return s.onHealByteCodes(peer, id, bytecodes) 2109 } 2110 2111 // onByteCodes is a callback method to invoke when a batch of contract 2112 // bytes codes are received from a remote peer in the syncing phase. 2113 func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2114 var size common.StorageSize 2115 for _, code := range bytecodes { 2116 size += common.StorageSize(len(code)) 2117 } 2118 logger := peer.Log().New("reqid", id) 2119 logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2120 2121 // Whether or not the response is valid, we can mark the peer as idle and 2122 // notify the scheduler to assign a new task. If the response is invalid, 2123 // we'll drop the peer in a bit. 2124 s.lock.Lock() 2125 if _, ok := s.peers[peer.ID()]; ok { 2126 s.bytecodeIdlers[peer.ID()] = struct{}{} 2127 } 2128 select { 2129 case s.update <- struct{}{}: 2130 default: 2131 } 2132 // Ensure the response is for a valid request 2133 req, ok := s.bytecodeReqs[id] 2134 if !ok { 2135 // Request stale, perhaps the peer timed out but came through in the end 2136 logger.Warn("Unexpected bytecode packet") 2137 s.lock.Unlock() 2138 return nil 2139 } 2140 delete(s.bytecodeReqs, id) 2141 2142 // Clean up the request timeout timer, we'll see how to proceed further based 2143 // on the actual delivered content 2144 if !req.timeout.Stop() { 2145 // The timeout is already triggered, and this request will be reverted+rescheduled 2146 s.lock.Unlock() 2147 return nil 2148 } 2149 2150 // Response is valid, but check if peer is signalling that it does not have 2151 // the requested data. For bytecode range queries that means the peer is not 2152 // yet synced. 2153 if len(bytecodes) == 0 { 2154 logger.Debug("Peer rejected bytecode request") 2155 s.statelessPeers[peer.ID()] = struct{}{} 2156 s.lock.Unlock() 2157 2158 // Signal this request as failed, and ready for rescheduling 2159 s.scheduleRevertBytecodeRequest(req) 2160 return nil 2161 } 2162 s.lock.Unlock() 2163 2164 // Cross reference the requested bytecodes with the response to find gaps 2165 // that the serving node is missing 2166 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2167 hash := make([]byte, 32) 2168 2169 codes := make([][]byte, len(req.hashes)) 2170 for i, j := 0, 0; i < len(bytecodes); i++ { 2171 // Find the next hash that we've been served, leaving misses with nils 2172 hasher.Reset() 2173 hasher.Write(bytecodes[i]) 2174 hasher.Read(hash) 2175 2176 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2177 j++ 2178 } 2179 if j < len(req.hashes) { 2180 codes[j] = bytecodes[i] 2181 j++ 2182 continue 2183 } 2184 // We've either ran out of hashes, or got unrequested data 2185 logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) 2186 // Signal this request as failed, and ready for rescheduling 2187 s.scheduleRevertBytecodeRequest(req) 2188 return errors.New("unexpected bytecode") 2189 } 2190 // Response validated, send it to the scheduler for filling 2191 response := &bytecodeResponse{ 2192 task: req.task, 2193 hashes: req.hashes, 2194 codes: codes, 2195 } 2196 select { 2197 case s.bytecodeResps <- response: 2198 case <-req.cancel: 2199 case <-req.stale: 2200 } 2201 return nil 2202 } 2203 2204 // OnStorage is a callback method to invoke when ranges of storage slots 2205 // are received from a remote peer. 2206 func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { 2207 // Gather some trace stats to aid in debugging issues 2208 var ( 2209 hashCount int 2210 slotCount int 2211 size common.StorageSize 2212 ) 2213 for _, hashset := range hashes { 2214 size += common.StorageSize(common.HashLength * len(hashset)) 2215 hashCount += len(hashset) 2216 } 2217 for _, slotset := range slots { 2218 for _, slot := range slotset { 2219 size += common.StorageSize(len(slot)) 2220 } 2221 slotCount += len(slotset) 2222 } 2223 for _, node := range proof { 2224 size += common.StorageSize(len(node)) 2225 } 2226 logger := peer.Log().New("reqid", id) 2227 logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) 2228 2229 // Whether or not the response is valid, we can mark the peer as idle and 2230 // notify the scheduler to assign a new task. If the response is invalid, 2231 // we'll drop the peer in a bit. 2232 s.lock.Lock() 2233 if _, ok := s.peers[peer.ID()]; ok { 2234 s.storageIdlers[peer.ID()] = struct{}{} 2235 } 2236 select { 2237 case s.update <- struct{}{}: 2238 default: 2239 } 2240 // Ensure the response is for a valid request 2241 req, ok := s.storageReqs[id] 2242 if !ok { 2243 // Request stale, perhaps the peer timed out but came through in the end 2244 logger.Warn("Unexpected storage ranges packet") 2245 s.lock.Unlock() 2246 return nil 2247 } 2248 delete(s.storageReqs, id) 2249 2250 // Clean up the request timeout timer, we'll see how to proceed further based 2251 // on the actual delivered content 2252 if !req.timeout.Stop() { 2253 // The timeout is already triggered, and this request will be reverted+rescheduled 2254 s.lock.Unlock() 2255 return nil 2256 } 2257 2258 // Reject the response if the hash sets and slot sets don't match, or if the 2259 // peer sent more data than requested. 2260 if len(hashes) != len(slots) { 2261 s.lock.Unlock() 2262 s.scheduleRevertStorageRequest(req) // reschedule request 2263 logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) 2264 return errors.New("hash and slot set size mismatch") 2265 } 2266 if len(hashes) > len(req.accounts) { 2267 s.lock.Unlock() 2268 s.scheduleRevertStorageRequest(req) // reschedule request 2269 logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) 2270 return errors.New("hash set larger than requested") 2271 } 2272 // Response is valid, but check if peer is signalling that it does not have 2273 // the requested data. For storage range queries that means the state being 2274 // retrieved was either already pruned remotely, or the peer is not yet 2275 // synced to our head. 2276 if len(hashes) == 0 { 2277 logger.Debug("Peer rejected storage request") 2278 s.statelessPeers[peer.ID()] = struct{}{} 2279 s.lock.Unlock() 2280 s.scheduleRevertStorageRequest(req) // reschedule request 2281 return nil 2282 } 2283 s.lock.Unlock() 2284 2285 // Reconstruct the partial tries from the response and verify them 2286 var ( 2287 dbs = make([]ethdb.KeyValueStore, len(hashes)) 2288 tries = make([]*trie.Trie, len(hashes)) 2289 notary *trie.KeyValueNotary 2290 cont bool 2291 ) 2292 for i := 0; i < len(hashes); i++ { 2293 // Convert the keys and proofs into an internal format 2294 keys := make([][]byte, len(hashes[i])) 2295 for j, key := range hashes[i] { 2296 keys[j] = common.CopyBytes(key[:]) 2297 } 2298 nodes := make(light.NodeList, 0, len(proof)) 2299 if i == len(hashes)-1 { 2300 for _, node := range proof { 2301 nodes = append(nodes, node) 2302 } 2303 } 2304 var err error 2305 if len(nodes) == 0 { 2306 // No proof has been attached, the response must cover the entire key 2307 // space and hash to the origin root. 2308 dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) 2309 if err != nil { 2310 s.scheduleRevertStorageRequest(req) // reschedule request 2311 logger.Warn("Storage slots failed proof", "err", err) 2312 return err 2313 } 2314 } else { 2315 // A proof was attached, the response is only partial, check that the 2316 // returned data is indeed part of the storage trie 2317 proofdb := nodes.NodeSet() 2318 2319 var end []byte 2320 if len(keys) > 0 { 2321 end = keys[len(keys)-1] 2322 } 2323 dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) 2324 if err != nil { 2325 s.scheduleRevertStorageRequest(req) // reschedule request 2326 logger.Warn("Storage range failed proof", "err", err) 2327 return err 2328 } 2329 } 2330 } 2331 // Partial tries reconstructed, send them to the scheduler for storage filling 2332 bounds := make(map[common.Hash]struct{}) 2333 2334 if notary != nil { // if all contract storages are delivered in full, no notary will be created 2335 it := notary.Accessed().NewIterator(nil, nil) 2336 for it.Next() { 2337 bounds[common.BytesToHash(it.Key())] = struct{}{} 2338 } 2339 it.Release() 2340 } 2341 response := &storageResponse{ 2342 mainTask: req.mainTask, 2343 subTask: req.subTask, 2344 accounts: req.accounts, 2345 roots: req.roots, 2346 hashes: hashes, 2347 slots: slots, 2348 nodes: dbs, 2349 tries: tries, 2350 bounds: bounds, 2351 overflow: light.NewNodeSet(), 2352 cont: cont, 2353 } 2354 select { 2355 case s.storageResps <- response: 2356 case <-req.cancel: 2357 case <-req.stale: 2358 } 2359 return nil 2360 } 2361 2362 // OnTrieNodes is a callback method to invoke when a batch of trie nodes 2363 // are received from a remote peer. 2364 func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { 2365 var size common.StorageSize 2366 for _, node := range trienodes { 2367 size += common.StorageSize(len(node)) 2368 } 2369 logger := peer.Log().New("reqid", id) 2370 logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) 2371 2372 // Whether or not the response is valid, we can mark the peer as idle and 2373 // notify the scheduler to assign a new task. If the response is invalid, 2374 // we'll drop the peer in a bit. 2375 s.lock.Lock() 2376 if _, ok := s.peers[peer.ID()]; ok { 2377 s.trienodeHealIdlers[peer.ID()] = struct{}{} 2378 } 2379 select { 2380 case s.update <- struct{}{}: 2381 default: 2382 } 2383 // Ensure the response is for a valid request 2384 req, ok := s.trienodeHealReqs[id] 2385 if !ok { 2386 // Request stale, perhaps the peer timed out but came through in the end 2387 logger.Warn("Unexpected trienode heal packet") 2388 s.lock.Unlock() 2389 return nil 2390 } 2391 delete(s.trienodeHealReqs, id) 2392 2393 // Clean up the request timeout timer, we'll see how to proceed further based 2394 // on the actual delivered content 2395 if !req.timeout.Stop() { 2396 // The timeout is already triggered, and this request will be reverted+rescheduled 2397 s.lock.Unlock() 2398 return nil 2399 } 2400 2401 // Response is valid, but check if peer is signalling that it does not have 2402 // the requested data. For bytecode range queries that means the peer is not 2403 // yet synced. 2404 if len(trienodes) == 0 { 2405 logger.Debug("Peer rejected trienode heal request") 2406 s.statelessPeers[peer.ID()] = struct{}{} 2407 s.lock.Unlock() 2408 2409 // Signal this request as failed, and ready for rescheduling 2410 s.scheduleRevertTrienodeHealRequest(req) 2411 return nil 2412 } 2413 s.lock.Unlock() 2414 2415 // Cross reference the requested trienodes with the response to find gaps 2416 // that the serving node is missing 2417 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2418 hash := make([]byte, 32) 2419 2420 nodes := make([][]byte, len(req.hashes)) 2421 for i, j := 0, 0; i < len(trienodes); i++ { 2422 // Find the next hash that we've been served, leaving misses with nils 2423 hasher.Reset() 2424 hasher.Write(trienodes[i]) 2425 hasher.Read(hash) 2426 2427 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2428 j++ 2429 } 2430 if j < len(req.hashes) { 2431 nodes[j] = trienodes[i] 2432 j++ 2433 continue 2434 } 2435 // We've either ran out of hashes, or got unrequested data 2436 logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) 2437 // Signal this request as failed, and ready for rescheduling 2438 s.scheduleRevertTrienodeHealRequest(req) 2439 return errors.New("unexpected healing trienode") 2440 } 2441 // Response validated, send it to the scheduler for filling 2442 response := &trienodeHealResponse{ 2443 task: req.task, 2444 hashes: req.hashes, 2445 paths: req.paths, 2446 nodes: nodes, 2447 } 2448 select { 2449 case s.trienodeHealResps <- response: 2450 case <-req.cancel: 2451 case <-req.stale: 2452 } 2453 return nil 2454 } 2455 2456 // onHealByteCodes is a callback method to invoke when a batch of contract 2457 // bytes codes are received from a remote peer in the healing phase. 2458 func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2459 var size common.StorageSize 2460 for _, code := range bytecodes { 2461 size += common.StorageSize(len(code)) 2462 } 2463 logger := peer.Log().New("reqid", id) 2464 logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2465 2466 // Whether or not the response is valid, we can mark the peer as idle and 2467 // notify the scheduler to assign a new task. If the response is invalid, 2468 // we'll drop the peer in a bit. 2469 s.lock.Lock() 2470 if _, ok := s.peers[peer.ID()]; ok { 2471 s.bytecodeHealIdlers[peer.ID()] = struct{}{} 2472 } 2473 select { 2474 case s.update <- struct{}{}: 2475 default: 2476 } 2477 // Ensure the response is for a valid request 2478 req, ok := s.bytecodeHealReqs[id] 2479 if !ok { 2480 // Request stale, perhaps the peer timed out but came through in the end 2481 logger.Warn("Unexpected bytecode heal packet") 2482 s.lock.Unlock() 2483 return nil 2484 } 2485 delete(s.bytecodeHealReqs, id) 2486 2487 // Clean up the request timeout timer, we'll see how to proceed further based 2488 // on the actual delivered content 2489 if !req.timeout.Stop() { 2490 // The timeout is already triggered, and this request will be reverted+rescheduled 2491 s.lock.Unlock() 2492 return nil 2493 } 2494 2495 // Response is valid, but check if peer is signalling that it does not have 2496 // the requested data. For bytecode range queries that means the peer is not 2497 // yet synced. 2498 if len(bytecodes) == 0 { 2499 logger.Debug("Peer rejected bytecode heal request") 2500 s.statelessPeers[peer.ID()] = struct{}{} 2501 s.lock.Unlock() 2502 2503 // Signal this request as failed, and ready for rescheduling 2504 s.scheduleRevertBytecodeHealRequest(req) 2505 return nil 2506 } 2507 s.lock.Unlock() 2508 2509 // Cross reference the requested bytecodes with the response to find gaps 2510 // that the serving node is missing 2511 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2512 hash := make([]byte, 32) 2513 2514 codes := make([][]byte, len(req.hashes)) 2515 for i, j := 0, 0; i < len(bytecodes); i++ { 2516 // Find the next hash that we've been served, leaving misses with nils 2517 hasher.Reset() 2518 hasher.Write(bytecodes[i]) 2519 hasher.Read(hash) 2520 2521 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2522 j++ 2523 } 2524 if j < len(req.hashes) { 2525 codes[j] = bytecodes[i] 2526 j++ 2527 continue 2528 } 2529 // We've either ran out of hashes, or got unrequested data 2530 logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) 2531 // Signal this request as failed, and ready for rescheduling 2532 s.scheduleRevertBytecodeHealRequest(req) 2533 return errors.New("unexpected healing bytecode") 2534 } 2535 // Response validated, send it to the scheduler for filling 2536 response := &bytecodeHealResponse{ 2537 task: req.task, 2538 hashes: req.hashes, 2539 codes: codes, 2540 } 2541 select { 2542 case s.bytecodeHealResps <- response: 2543 case <-req.cancel: 2544 case <-req.stale: 2545 } 2546 return nil 2547 } 2548 2549 // hashSpace is the total size of the 256 bit hash space for accounts. 2550 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) 2551 2552 // report calculates various status reports and provides it to the user. 2553 func (s *Syncer) report(force bool) { 2554 if len(s.tasks) > 0 { 2555 s.reportSyncProgress(force) 2556 return 2557 } 2558 s.reportHealProgress(force) 2559 } 2560 2561 // reportSyncProgress calculates various status reports and provides it to the user. 2562 func (s *Syncer) reportSyncProgress(force bool) { 2563 // Don't report all the events, just occasionally 2564 if !force && time.Since(s.logTime) < 3*time.Second { 2565 return 2566 } 2567 // Don't report anything until we have a meaningful progress 2568 synced := s.accountBytes + s.bytecodeBytes + s.storageBytes 2569 if synced == 0 { 2570 return 2571 } 2572 accountGaps := new(big.Int) 2573 for _, task := range s.tasks { 2574 accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big())) 2575 } 2576 accountFills := new(big.Int).Sub(hashSpace, accountGaps) 2577 if accountFills.BitLen() == 0 { 2578 return 2579 } 2580 s.logTime = time.Now() 2581 estBytes := float64(new(big.Int).Div( 2582 new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), 2583 accountFills, 2584 ).Uint64()) 2585 2586 elapsed := time.Since(s.startTime) 2587 estTime := elapsed / time.Duration(synced) * time.Duration(estBytes) 2588 2589 // Create a mega progress report 2590 var ( 2591 progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes) 2592 accounts = fmt.Sprintf("%d@%v", s.accountSynced, s.accountBytes.TerminalString()) 2593 storage = fmt.Sprintf("%d@%v", s.storageSynced, s.storageBytes.TerminalString()) 2594 bytecode = fmt.Sprintf("%d@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString()) 2595 ) 2596 log.Info("State sync in progress", "synced", progress, "state", synced, 2597 "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) 2598 } 2599 2600 // reportHealProgress calculates various status reports and provides it to the user. 2601 func (s *Syncer) reportHealProgress(force bool) { 2602 // Don't report all the events, just occasionally 2603 if !force && time.Since(s.logTime) < 3*time.Second { 2604 return 2605 } 2606 s.logTime = time.Now() 2607 2608 // Create a mega progress report 2609 var ( 2610 trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString()) 2611 bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString()) 2612 ) 2613 log.Info("State heal in progress", "nodes", trienode, "codes", bytecode, 2614 "pending", s.healer.scheduler.Pending()) 2615 }