github.com/cryptogateway/go-paymex@v0.0.0-20210204174735-96277fb1e602/eth/protocols/snap/sync.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "math/big" 25 "math/rand" 26 "sync" 27 "time" 28 29 "github.com/cryptogateway/go-paymex/common" 30 "github.com/cryptogateway/go-paymex/core/rawdb" 31 "github.com/cryptogateway/go-paymex/core/state" 32 "github.com/cryptogateway/go-paymex/crypto" 33 "github.com/cryptogateway/go-paymex/ethdb" 34 "github.com/cryptogateway/go-paymex/event" 35 "github.com/cryptogateway/go-paymex/light" 36 "github.com/cryptogateway/go-paymex/log" 37 "github.com/cryptogateway/go-paymex/rlp" 38 "github.com/cryptogateway/go-paymex/trie" 39 "golang.org/x/crypto/sha3" 40 ) 41 42 var ( 43 // emptyRoot is the known root hash of an empty trie. 44 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 45 46 // emptyCode is the known hash of the empty EVM bytecode. 47 emptyCode = crypto.Keccak256Hash(nil) 48 ) 49 50 const ( 51 // maxRequestSize is the maximum number of bytes to request from a remote peer. 52 maxRequestSize = 512 * 1024 53 54 // maxStorageSetRequestCountis th maximum number of contracts to request the 55 // storage of in a single query. If this number is too low, we're not filling 56 // responses fully and waste round trip times. If it's too high, we're capping 57 // responses and waste bandwidth. 58 maxStorageSetRequestCount = maxRequestSize / 1024 59 60 // maxCodeRequestCount is the maximum number of bytecode blobs to request in a 61 // single query. If this number is too low, we're not filling responses fully 62 // and waste round trip times. If it's too high, we're capping responses and 63 // waste bandwidth. 64 // 65 // Depoyed bytecodes are currently capped at 24KB, so the minimum request 66 // size should be maxRequestSize / 24K. Assuming that most contracts do not 67 // come close to that, requesting 4x should be a good approximation. 68 maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 69 70 // maxTrieRequestCount is the maximum number of trie node blobs to request in 71 // a single query. If this number is too low, we're not filling responses fully 72 // and waste round trip times. If it's too high, we're capping responses and 73 // waste bandwidth. 74 maxTrieRequestCount = 512 75 76 // accountConcurrency is the number of chunks to split the account trie into 77 // to allow concurrent retrievals. 78 accountConcurrency = 16 79 80 // storageConcurrency is the number of chunks to split the a large contract 81 // storage trie into to allow concurrent retrievals. 82 storageConcurrency = 16 83 ) 84 85 var ( 86 // requestTimeout is the maximum time a peer is allowed to spend on serving 87 // a single network request. 88 requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync? 89 ) 90 91 // accountRequest tracks a pending account range request to ensure responses are 92 // to actual requests and to validate any security constraints. 93 // 94 // Concurrency note: account requests and responses are handled concurrently from 95 // the main runloop to allow Merkle proof verifications on the peer's thread and 96 // to drop on invalid response. The request struct must contain all the data to 97 // construct the response without accessing runloop internals (i.e. task). That 98 // is only included to allow the runloop to match a response to the task being 99 // synced without having yet another set of maps. 100 type accountRequest struct { 101 peer string // Peer to which this request is assigned 102 id uint64 // Request ID of this request 103 104 cancel chan struct{} // Channel to track sync cancellation 105 timeout *time.Timer // Timer to track delivery timeout 106 stale chan struct{} // Channel to signal the request was dropped 107 108 origin common.Hash // First account requested to allow continuation checks 109 limit common.Hash // Last account requested to allow non-overlapping chunking 110 111 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 112 } 113 114 // accountResponse is an already Merkle-verified remote response to an account 115 // range request. It contains the subtrie for the requested account range and 116 // the database that's going to be filled with the internal nodes on commit. 117 type accountResponse struct { 118 task *accountTask // Task which this request is filling 119 120 hashes []common.Hash // Account hashes in the returned range 121 accounts []*state.Account // Expanded accounts in the returned range 122 123 nodes ethdb.KeyValueStore // Database containing the reconstructed trie nodes 124 trie *trie.Trie // Reconstructed trie to reject incomplete account paths 125 126 bounds map[common.Hash]struct{} // Boundary nodes to avoid persisting incomplete accounts 127 overflow *light.NodeSet // Overflow nodes to avoid persisting across chunk boundaries 128 129 cont bool // Whether the account range has a continuation 130 } 131 132 // bytecodeRequest tracks a pending bytecode request to ensure responses are to 133 // actual requests and to validate any security constraints. 134 // 135 // Concurrency note: bytecode requests and responses are handled concurrently from 136 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 137 // to drop on invalid response. The request struct must contain all the data to 138 // construct the response without accessing runloop internals (i.e. task). That 139 // is only included to allow the runloop to match a response to the task being 140 // synced without having yet another set of maps. 141 type bytecodeRequest struct { 142 peer string // Peer to which this request is assigned 143 id uint64 // Request ID of this request 144 145 cancel chan struct{} // Channel to track sync cancellation 146 timeout *time.Timer // Timer to track delivery timeout 147 stale chan struct{} // Channel to signal the request was dropped 148 149 hashes []common.Hash // Bytecode hashes to validate responses 150 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 151 } 152 153 // bytecodeResponse is an already verified remote response to a bytecode request. 154 type bytecodeResponse struct { 155 task *accountTask // Task which this request is filling 156 157 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 158 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 159 } 160 161 // storageRequest tracks a pending storage ranges request to ensure responses are 162 // to actual requests and to validate any security constraints. 163 // 164 // Concurrency note: storage requests and responses are handled concurrently from 165 // the main runloop to allow Merkel proof verifications on the peer's thread and 166 // to drop on invalid response. The request struct must contain all the data to 167 // construct the response without accessing runloop internals (i.e. tasks). That 168 // is only included to allow the runloop to match a response to the task being 169 // synced without having yet another set of maps. 170 type storageRequest struct { 171 peer string // Peer to which this request is assigned 172 id uint64 // Request ID of this request 173 174 cancel chan struct{} // Channel to track sync cancellation 175 timeout *time.Timer // Timer to track delivery timeout 176 stale chan struct{} // Channel to signal the request was dropped 177 178 accounts []common.Hash // Account hashes to validate responses 179 roots []common.Hash // Storage roots to validate responses 180 181 origin common.Hash // First storage slot requested to allow continuation checks 182 limit common.Hash // Last storage slot requested to allow non-overlapping chunking 183 184 mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!) 185 subTask *storageTask // Task which this response is filling (only access fields through the runloop!!) 186 } 187 188 // storageResponse is an already Merkle-verified remote response to a storage 189 // range request. It contains the subtries for the requested storage ranges and 190 // the databases that's going to be filled with the internal nodes on commit. 191 type storageResponse struct { 192 mainTask *accountTask // Task which this response belongs to 193 subTask *storageTask // Task which this response is filling 194 195 accounts []common.Hash // Account hashes requested, may be only partially filled 196 roots []common.Hash // Storage roots requested, may be only partially filled 197 198 hashes [][]common.Hash // Storage slot hashes in the returned range 199 slots [][][]byte // Storage slot values in the returned range 200 nodes []ethdb.KeyValueStore // Database containing the reconstructed trie nodes 201 tries []*trie.Trie // Reconstructed tries to reject overflown slots 202 203 // Fields relevant for the last account only 204 bounds map[common.Hash]struct{} // Boundary nodes to avoid persisting (incomplete) 205 overflow *light.NodeSet // Overflow nodes to avoid persisting across chunk boundaries 206 cont bool // Whether the last storage range has a continuation 207 } 208 209 // trienodeHealRequest tracks a pending state trie request to ensure responses 210 // are to actual requests and to validate any security constraints. 211 // 212 // Concurrency note: trie node requests and responses are handled concurrently from 213 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 214 // to drop on invalid response. The request struct must contain all the data to 215 // construct the response without accessing runloop internals (i.e. task). That 216 // is only included to allow the runloop to match a response to the task being 217 // synced without having yet another set of maps. 218 type trienodeHealRequest struct { 219 peer string // Peer to which this request is assigned 220 id uint64 // Request ID of this request 221 222 cancel chan struct{} // Channel to track sync cancellation 223 timeout *time.Timer // Timer to track delivery timeout 224 stale chan struct{} // Channel to signal the request was dropped 225 226 hashes []common.Hash // Trie node hashes to validate responses 227 paths []trie.SyncPath // Trie node paths requested for rescheduling 228 229 task *healTask // Task which this request is filling (only access fields through the runloop!!) 230 } 231 232 // trienodeHealResponse is an already verified remote response to a trie node request. 233 type trienodeHealResponse struct { 234 task *healTask // Task which this request is filling 235 236 hashes []common.Hash // Hashes of the trie nodes to avoid double hashing 237 paths []trie.SyncPath // Trie node paths requested for rescheduling missing ones 238 nodes [][]byte // Actual trie nodes to store into the database (nil = missing) 239 } 240 241 // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to 242 // actual requests and to validate any security constraints. 243 // 244 // Concurrency note: bytecode requests and responses are handled concurrently from 245 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 246 // to drop on invalid response. The request struct must contain all the data to 247 // construct the response without accessing runloop internals (i.e. task). That 248 // is only included to allow the runloop to match a response to the task being 249 // synced without having yet another set of maps. 250 type bytecodeHealRequest struct { 251 peer string // Peer to which this request is assigned 252 id uint64 // Request ID of this request 253 254 cancel chan struct{} // Channel to track sync cancellation 255 timeout *time.Timer // Timer to track delivery timeout 256 stale chan struct{} // Channel to signal the request was dropped 257 258 hashes []common.Hash // Bytecode hashes to validate responses 259 task *healTask // Task which this request is filling (only access fields through the runloop!!) 260 } 261 262 // bytecodeHealResponse is an already verified remote response to a bytecode request. 263 type bytecodeHealResponse struct { 264 task *healTask // Task which this request is filling 265 266 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 267 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 268 } 269 270 // accountTask represents the sync task for a chunk of the account snapshot. 271 type accountTask struct { 272 // These fields get serialized to leveldb on shutdown 273 Next common.Hash // Next account to sync in this interval 274 Last common.Hash // Last account to sync in this interval 275 SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts 276 277 // These fields are internals used during runtime 278 req *accountRequest // Pending request to fill this task 279 res *accountResponse // Validate response filling this task 280 pend int // Number of pending subtasks for this round 281 282 needCode []bool // Flags whether the filling accounts need code retrieval 283 needState []bool // Flags whether the filling accounts need storage retrieval 284 needHeal []bool // Flags whether the filling accounts's state was chunked and need healing 285 286 codeTasks map[common.Hash]struct{} // Code hashes that need retrieval 287 stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval 288 289 done bool // Flag whether the task can be removed 290 } 291 292 // storageTask represents the sync task for a chunk of the storage snapshot. 293 type storageTask struct { 294 Next common.Hash // Next account to sync in this interval 295 Last common.Hash // Last account to sync in this interval 296 297 // These fields are internals used during runtime 298 root common.Hash // Storage root hash for this instance 299 req *storageRequest // Pending request to fill this task 300 done bool // Flag whether the task can be removed 301 } 302 303 // healTask represents the sync task for healing the snap-synced chunk boundaries. 304 type healTask struct { 305 scheduler *trie.Sync // State trie sync scheduler defining the tasks 306 307 trieTasks map[common.Hash]trie.SyncPath // Set of trie node tasks currently queued for retrieval 308 codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval 309 } 310 311 // syncProgress is a database entry to allow suspending and resuming a snapshot state 312 // sync. Opposed to full and fast sync, there is no way to restart a suspended 313 // snap sync without prior knowledge of the suspension point. 314 type syncProgress struct { 315 Tasks []*accountTask // The suspended account tasks (contract tasks within) 316 317 // Status report during syncing phase 318 AccountSynced uint64 // Number of accounts downloaded 319 AccountBytes common.StorageSize // Number of account trie bytes persisted to disk 320 BytecodeSynced uint64 // Number of bytecodes downloaded 321 BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 322 StorageSynced uint64 // Number of storage slots downloaded 323 StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk 324 325 // Status report during healing phase 326 TrienodeHealSynced uint64 // Number of state trie nodes downloaded 327 TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 328 TrienodeHealDups uint64 // Number of state trie nodes already processed 329 TrienodeHealNops uint64 // Number of state trie nodes not requested 330 BytecodeHealSynced uint64 // Number of bytecodes downloaded 331 BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 332 BytecodeHealDups uint64 // Number of bytecodes already processed 333 BytecodeHealNops uint64 // Number of bytecodes not requested 334 } 335 336 // SyncPeer abstracts out the methods required for a peer to be synced against 337 // with the goal of allowing the construction of mock peers without the full 338 // blown networking. 339 type SyncPeer interface { 340 // ID retrieves the peer's unique identifier. 341 ID() string 342 343 // RequestAccountRange fetches a batch of accounts rooted in a specific account 344 // trie, starting with the origin. 345 RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error 346 347 // RequestStorageRange fetches a batch of storage slots belonging to one or 348 // more accounts. If slots from only one accout is requested, an origin marker 349 // may also be used to retrieve from there. 350 RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error 351 352 // RequestByteCodes fetches a batch of bytecodes by hash. 353 RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error 354 355 // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in 356 // a specificstate trie. 357 RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error 358 359 // Log retrieves the peer's own contextual logger. 360 Log() log.Logger 361 } 362 363 // Syncer is an Ethereum account and storage trie syncer based on snapshots and 364 // the snap protocol. It's purpose is to download all the accounts and storage 365 // slots from remote peers and reassemble chunks of the state trie, on top of 366 // which a state sync can be run to fix any gaps / overlaps. 367 // 368 // Every network request has a variety of failure events: 369 // - The peer disconnects after task assignment, failing to send the request 370 // - The peer disconnects after sending the request, before delivering on it 371 // - The peer remains connected, but does not deliver a response in time 372 // - The peer delivers a stale response after a previous timeout 373 // - The peer delivers a refusal to serve the requested state 374 type Syncer struct { 375 db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) 376 bloom *trie.SyncBloom // Bloom filter to deduplicate nodes for state fixup 377 378 root common.Hash // Current state trie root being synced 379 tasks []*accountTask // Current account task set being synced 380 snapped bool // Flag to signal that snap phase is done 381 healer *healTask // Current state healing task being executed 382 update chan struct{} // Notification channel for possible sync progression 383 384 peers map[string]SyncPeer // Currently active peers to download from 385 peerJoin *event.Feed // Event feed to react to peers joining 386 peerDrop *event.Feed // Event feed to react to peers dropping 387 388 // Request tracking during syncing phase 389 statelessPeers map[string]struct{} // Peers that failed to deliver state data 390 accountIdlers map[string]struct{} // Peers that aren't serving account requests 391 bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests 392 storageIdlers map[string]struct{} // Peers that aren't serving storage requests 393 394 accountReqs map[uint64]*accountRequest // Account requests currently running 395 bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running 396 storageReqs map[uint64]*storageRequest // Storage requests currently running 397 398 accountReqFails chan *accountRequest // Failed account range requests to revert 399 bytecodeReqFails chan *bytecodeRequest // Failed bytecode requests to revert 400 storageReqFails chan *storageRequest // Failed storage requests to revert 401 402 accountResps chan *accountResponse // Account sub-tries to integrate into the database 403 bytecodeResps chan *bytecodeResponse // Bytecodes to integrate into the database 404 storageResps chan *storageResponse // Storage sub-tries to integrate into the database 405 406 accountSynced uint64 // Number of accounts downloaded 407 accountBytes common.StorageSize // Number of account trie bytes persisted to disk 408 bytecodeSynced uint64 // Number of bytecodes downloaded 409 bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 410 storageSynced uint64 // Number of storage slots downloaded 411 storageBytes common.StorageSize // Number of storage trie bytes persisted to disk 412 413 // Request tracking during healing phase 414 trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests 415 bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests 416 417 trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running 418 bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running 419 420 trienodeHealReqFails chan *trienodeHealRequest // Failed trienode requests to revert 421 bytecodeHealReqFails chan *bytecodeHealRequest // Failed bytecode requests to revert 422 423 trienodeHealResps chan *trienodeHealResponse // Trie nodes to integrate into the database 424 bytecodeHealResps chan *bytecodeHealResponse // Bytecodes to integrate into the database 425 426 trienodeHealSynced uint64 // Number of state trie nodes downloaded 427 trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 428 trienodeHealDups uint64 // Number of state trie nodes already processed 429 trienodeHealNops uint64 // Number of state trie nodes not requested 430 bytecodeHealSynced uint64 // Number of bytecodes downloaded 431 bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 432 bytecodeHealDups uint64 // Number of bytecodes already processed 433 bytecodeHealNops uint64 // Number of bytecodes not requested 434 435 startTime time.Time // Time instance when snapshot sync started 436 startAcc common.Hash // Account hash where sync started from 437 logTime time.Time // Time instance when status was last reported 438 439 pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown 440 lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) 441 } 442 443 // NewSyncer creates a new snapshot syncer to download the Ethereum state over the 444 // snap protocol. 445 func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { 446 return &Syncer{ 447 db: db, 448 bloom: bloom, 449 450 peers: make(map[string]SyncPeer), 451 peerJoin: new(event.Feed), 452 peerDrop: new(event.Feed), 453 update: make(chan struct{}, 1), 454 455 accountIdlers: make(map[string]struct{}), 456 storageIdlers: make(map[string]struct{}), 457 bytecodeIdlers: make(map[string]struct{}), 458 459 accountReqs: make(map[uint64]*accountRequest), 460 storageReqs: make(map[uint64]*storageRequest), 461 bytecodeReqs: make(map[uint64]*bytecodeRequest), 462 accountReqFails: make(chan *accountRequest), 463 storageReqFails: make(chan *storageRequest), 464 bytecodeReqFails: make(chan *bytecodeRequest), 465 accountResps: make(chan *accountResponse), 466 storageResps: make(chan *storageResponse), 467 bytecodeResps: make(chan *bytecodeResponse), 468 469 trienodeHealIdlers: make(map[string]struct{}), 470 bytecodeHealIdlers: make(map[string]struct{}), 471 472 trienodeHealReqs: make(map[uint64]*trienodeHealRequest), 473 bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), 474 trienodeHealReqFails: make(chan *trienodeHealRequest), 475 bytecodeHealReqFails: make(chan *bytecodeHealRequest), 476 trienodeHealResps: make(chan *trienodeHealResponse), 477 bytecodeHealResps: make(chan *bytecodeHealResponse), 478 } 479 } 480 481 // Register injects a new data source into the syncer's peerset. 482 func (s *Syncer) Register(peer SyncPeer) error { 483 // Make sure the peer is not registered yet 484 id := peer.ID() 485 486 s.lock.Lock() 487 if _, ok := s.peers[id]; ok { 488 log.Error("Snap peer already registered", "id", id) 489 490 s.lock.Unlock() 491 return errors.New("already registered") 492 } 493 s.peers[id] = peer 494 495 // Mark the peer as idle, even if no sync is running 496 s.accountIdlers[id] = struct{}{} 497 s.storageIdlers[id] = struct{}{} 498 s.bytecodeIdlers[id] = struct{}{} 499 s.trienodeHealIdlers[id] = struct{}{} 500 s.bytecodeHealIdlers[id] = struct{}{} 501 s.lock.Unlock() 502 503 // Notify any active syncs that a new peer can be assigned data 504 s.peerJoin.Send(id) 505 return nil 506 } 507 508 // Unregister injects a new data source into the syncer's peerset. 509 func (s *Syncer) Unregister(id string) error { 510 // Remove all traces of the peer from the registry 511 s.lock.Lock() 512 if _, ok := s.peers[id]; !ok { 513 log.Error("Snap peer not registered", "id", id) 514 515 s.lock.Unlock() 516 return errors.New("not registered") 517 } 518 delete(s.peers, id) 519 520 // Remove status markers, even if no sync is running 521 delete(s.statelessPeers, id) 522 523 delete(s.accountIdlers, id) 524 delete(s.storageIdlers, id) 525 delete(s.bytecodeIdlers, id) 526 delete(s.trienodeHealIdlers, id) 527 delete(s.bytecodeHealIdlers, id) 528 s.lock.Unlock() 529 530 // Notify any active syncs that pending requests need to be reverted 531 s.peerDrop.Send(id) 532 return nil 533 } 534 535 // Sync starts (or resumes a previous) sync cycle to iterate over an state trie 536 // with the given root and reconstruct the nodes based on the snapshot leaves. 537 // Previously downloaded segments will not be redownloaded of fixed, rather any 538 // errors will be healed after the leaves are fully accumulated. 539 func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { 540 // Move the trie root from any previous value, revert stateless markers for 541 // any peers and initialize the syncer if it was not yet run 542 s.lock.Lock() 543 s.root = root 544 s.healer = &healTask{ 545 scheduler: state.NewStateSync(root, s.db, s.bloom), 546 trieTasks: make(map[common.Hash]trie.SyncPath), 547 codeTasks: make(map[common.Hash]struct{}), 548 } 549 s.statelessPeers = make(map[string]struct{}) 550 s.lock.Unlock() 551 552 if s.startTime == (time.Time{}) { 553 s.startTime = time.Now() 554 } 555 // Retrieve the previous sync status from LevelDB and abort if already synced 556 s.loadSyncStatus() 557 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 558 log.Debug("Snapshot sync already completed") 559 return nil 560 } 561 defer func() { // Persist any progress, independent of failure 562 for _, task := range s.tasks { 563 s.forwardAccountTask(task) 564 } 565 s.cleanAccountTasks() 566 s.saveSyncStatus() 567 }() 568 569 log.Debug("Starting snapshot sync cycle", "root", root) 570 defer s.report(true) 571 572 // Whether sync completed or not, disregard any future packets 573 defer func() { 574 log.Debug("Terminating snapshot sync cycle", "root", root) 575 s.lock.Lock() 576 s.accountReqs = make(map[uint64]*accountRequest) 577 s.storageReqs = make(map[uint64]*storageRequest) 578 s.bytecodeReqs = make(map[uint64]*bytecodeRequest) 579 s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) 580 s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) 581 s.lock.Unlock() 582 }() 583 // Keep scheduling sync tasks 584 peerJoin := make(chan string, 16) 585 peerJoinSub := s.peerJoin.Subscribe(peerJoin) 586 defer peerJoinSub.Unsubscribe() 587 588 peerDrop := make(chan string, 16) 589 peerDropSub := s.peerDrop.Subscribe(peerDrop) 590 defer peerDropSub.Unsubscribe() 591 592 for { 593 // Remove all completed tasks and terminate sync if everything's done 594 s.cleanStorageTasks() 595 s.cleanAccountTasks() 596 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 597 return nil 598 } 599 // Assign all the data retrieval tasks to any free peers 600 s.assignAccountTasks(cancel) 601 s.assignBytecodeTasks(cancel) 602 s.assignStorageTasks(cancel) 603 604 if len(s.tasks) == 0 { 605 // Sync phase done, run heal phase 606 s.assignTrienodeHealTasks(cancel) 607 s.assignBytecodeHealTasks(cancel) 608 } 609 // Wait for something to happen 610 select { 611 case <-s.update: 612 // Something happened (new peer, delivery, timeout), recheck tasks 613 case <-peerJoin: 614 // A new peer joined, try to schedule it new tasks 615 case id := <-peerDrop: 616 s.revertRequests(id) 617 case <-cancel: 618 return errCancelled 619 620 case req := <-s.accountReqFails: 621 s.revertAccountRequest(req) 622 case req := <-s.bytecodeReqFails: 623 s.revertBytecodeRequest(req) 624 case req := <-s.storageReqFails: 625 s.revertStorageRequest(req) 626 case req := <-s.trienodeHealReqFails: 627 s.revertTrienodeHealRequest(req) 628 case req := <-s.bytecodeHealReqFails: 629 s.revertBytecodeHealRequest(req) 630 631 case res := <-s.accountResps: 632 s.processAccountResponse(res) 633 case res := <-s.bytecodeResps: 634 s.processBytecodeResponse(res) 635 case res := <-s.storageResps: 636 s.processStorageResponse(res) 637 case res := <-s.trienodeHealResps: 638 s.processTrienodeHealResponse(res) 639 case res := <-s.bytecodeHealResps: 640 s.processBytecodeHealResponse(res) 641 } 642 // Report stats if something meaningful happened 643 s.report(false) 644 } 645 } 646 647 // loadSyncStatus retrieves a previously aborted sync status from the database, 648 // or generates a fresh one if none is available. 649 func (s *Syncer) loadSyncStatus() { 650 var progress syncProgress 651 652 if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { 653 if err := json.Unmarshal(status, &progress); err != nil { 654 log.Error("Failed to decode snap sync status", "err", err) 655 } else { 656 for _, task := range progress.Tasks { 657 log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) 658 } 659 s.tasks = progress.Tasks 660 s.snapped = len(s.tasks) == 0 661 662 s.accountSynced = progress.AccountSynced 663 s.accountBytes = progress.AccountBytes 664 s.bytecodeSynced = progress.BytecodeSynced 665 s.bytecodeBytes = progress.BytecodeBytes 666 s.storageSynced = progress.StorageSynced 667 s.storageBytes = progress.StorageBytes 668 669 s.trienodeHealSynced = progress.TrienodeHealSynced 670 s.trienodeHealBytes = progress.TrienodeHealBytes 671 s.bytecodeHealSynced = progress.BytecodeHealSynced 672 s.bytecodeHealBytes = progress.BytecodeHealBytes 673 return 674 } 675 } 676 // Either we've failed to decode the previus state, or there was none. 677 // Start a fresh sync by chunking up the account range and scheduling 678 // them for retrieval. 679 s.tasks = nil 680 s.accountSynced, s.accountBytes = 0, 0 681 s.bytecodeSynced, s.bytecodeBytes = 0, 0 682 s.storageSynced, s.storageBytes = 0, 0 683 s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 684 s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 685 686 var next common.Hash 687 step := new(big.Int).Sub( 688 new(big.Int).Div( 689 new(big.Int).Exp(common.Big2, common.Big256, nil), 690 big.NewInt(accountConcurrency), 691 ), common.Big1, 692 ) 693 for i := 0; i < accountConcurrency; i++ { 694 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 695 if i == accountConcurrency-1 { 696 // Make sure we don't overflow if the step is not a proper divisor 697 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 698 } 699 s.tasks = append(s.tasks, &accountTask{ 700 Next: next, 701 Last: last, 702 SubTasks: make(map[common.Hash][]*storageTask), 703 }) 704 log.Debug("Created account sync task", "from", next, "last", last) 705 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 706 } 707 } 708 709 // saveSyncStatus marshals the remaining sync tasks into leveldb. 710 func (s *Syncer) saveSyncStatus() { 711 progress := &syncProgress{ 712 Tasks: s.tasks, 713 AccountSynced: s.accountSynced, 714 AccountBytes: s.accountBytes, 715 BytecodeSynced: s.bytecodeSynced, 716 BytecodeBytes: s.bytecodeBytes, 717 StorageSynced: s.storageSynced, 718 StorageBytes: s.storageBytes, 719 TrienodeHealSynced: s.trienodeHealSynced, 720 TrienodeHealBytes: s.trienodeHealBytes, 721 BytecodeHealSynced: s.bytecodeHealSynced, 722 BytecodeHealBytes: s.bytecodeHealBytes, 723 } 724 status, err := json.Marshal(progress) 725 if err != nil { 726 panic(err) // This can only fail during implementation 727 } 728 rawdb.WriteSnapshotSyncStatus(s.db, status) 729 } 730 731 // cleanAccountTasks removes account range retrieval tasks that have already been 732 // completed. 733 func (s *Syncer) cleanAccountTasks() { 734 for i := 0; i < len(s.tasks); i++ { 735 if s.tasks[i].done { 736 s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) 737 i-- 738 } 739 } 740 if len(s.tasks) == 0 { 741 s.lock.Lock() 742 s.snapped = true 743 s.lock.Unlock() 744 } 745 } 746 747 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks 748 // within, cleaning any that have been completed. 749 func (s *Syncer) cleanStorageTasks() { 750 for _, task := range s.tasks { 751 for account, subtasks := range task.SubTasks { 752 // Remove storage range retrieval tasks that completed 753 for j := 0; j < len(subtasks); j++ { 754 if subtasks[j].done { 755 subtasks = append(subtasks[:j], subtasks[j+1:]...) 756 j-- 757 } 758 } 759 if len(subtasks) > 0 { 760 task.SubTasks[account] = subtasks 761 continue 762 } 763 // If all storage chunks are done, mark the account as done too 764 for j, hash := range task.res.hashes { 765 if hash == account { 766 task.needState[j] = false 767 } 768 } 769 delete(task.SubTasks, account) 770 task.pend-- 771 772 // If this was the last pending task, forward the account task 773 if task.pend == 0 { 774 s.forwardAccountTask(task) 775 } 776 } 777 } 778 } 779 780 // assignAccountTasks attempts to match idle peers to pending account range 781 // retrievals. 782 func (s *Syncer) assignAccountTasks(cancel chan struct{}) { 783 s.lock.Lock() 784 defer s.lock.Unlock() 785 786 // If there are no idle peers, short circuit assignment 787 if len(s.accountIdlers) == 0 { 788 return 789 } 790 // Iterate over all the tasks and try to find a pending one 791 for _, task := range s.tasks { 792 // Skip any tasks already filling 793 if task.req != nil || task.res != nil { 794 continue 795 } 796 // Task pending retrieval, try to find an idle peer. If no such peer 797 // exists, we probably assigned tasks for all (or they are stateless). 798 // Abort the entire assignment mechanism. 799 var idle string 800 for id := range s.accountIdlers { 801 // If the peer rejected a query in this sync cycle, don't bother asking 802 // again for anything, it's either out of sync or already pruned 803 if _, ok := s.statelessPeers[id]; ok { 804 continue 805 } 806 idle = id 807 break 808 } 809 if idle == "" { 810 return 811 } 812 // Matched a pending task to an idle peer, allocate a unique request id 813 var reqid uint64 814 for { 815 reqid = uint64(rand.Int63()) 816 if reqid == 0 { 817 continue 818 } 819 if _, ok := s.accountReqs[reqid]; ok { 820 continue 821 } 822 break 823 } 824 // Generate the network query and send it to the peer 825 req := &accountRequest{ 826 peer: idle, 827 id: reqid, 828 cancel: cancel, 829 stale: make(chan struct{}), 830 origin: task.Next, 831 limit: task.Last, 832 task: task, 833 } 834 req.timeout = time.AfterFunc(requestTimeout, func() { 835 log.Debug("Account range request timed out") 836 s.scheduleRevertAccountRequest(req) 837 }) 838 s.accountReqs[reqid] = req 839 delete(s.accountIdlers, idle) 840 841 s.pend.Add(1) 842 go func(peer SyncPeer, root common.Hash) { 843 defer s.pend.Done() 844 845 // Attempt to send the remote request and revert if it fails 846 if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, maxRequestSize); err != nil { 847 peer.Log().Debug("Failed to request account range", "err", err) 848 s.scheduleRevertAccountRequest(req) 849 } 850 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 851 852 // Inject the request into the task to block further assignments 853 task.req = req 854 } 855 } 856 857 // assignBytecodeTasks attempts to match idle peers to pending code retrievals. 858 func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { 859 s.lock.Lock() 860 defer s.lock.Unlock() 861 862 // If there are no idle peers, short circuit assignment 863 if len(s.bytecodeIdlers) == 0 { 864 return 865 } 866 // Iterate over all the tasks and try to find a pending one 867 for _, task := range s.tasks { 868 // Skip any tasks not in the bytecode retrieval phase 869 if task.res == nil { 870 continue 871 } 872 // Skip tasks that are already retrieving (or done with) all codes 873 if len(task.codeTasks) == 0 { 874 continue 875 } 876 // Task pending retrieval, try to find an idle peer. If no such peer 877 // exists, we probably assigned tasks for all (or they are stateless). 878 // Abort the entire assignment mechanism. 879 var idle string 880 for id := range s.bytecodeIdlers { 881 // If the peer rejected a query in this sync cycle, don't bother asking 882 // again for anything, it's either out of sync or already pruned 883 if _, ok := s.statelessPeers[id]; ok { 884 continue 885 } 886 idle = id 887 break 888 } 889 if idle == "" { 890 return 891 } 892 // Matched a pending task to an idle peer, allocate a unique request id 893 var reqid uint64 894 for { 895 reqid = uint64(rand.Int63()) 896 if reqid == 0 { 897 continue 898 } 899 if _, ok := s.bytecodeReqs[reqid]; ok { 900 continue 901 } 902 break 903 } 904 // Generate the network query and send it to the peer 905 hashes := make([]common.Hash, 0, maxCodeRequestCount) 906 for hash := range task.codeTasks { 907 delete(task.codeTasks, hash) 908 hashes = append(hashes, hash) 909 if len(hashes) >= maxCodeRequestCount { 910 break 911 } 912 } 913 req := &bytecodeRequest{ 914 peer: idle, 915 id: reqid, 916 cancel: cancel, 917 stale: make(chan struct{}), 918 hashes: hashes, 919 task: task, 920 } 921 req.timeout = time.AfterFunc(requestTimeout, func() { 922 log.Debug("Bytecode request timed out") 923 s.scheduleRevertBytecodeRequest(req) 924 }) 925 s.bytecodeReqs[reqid] = req 926 delete(s.bytecodeIdlers, idle) 927 928 s.pend.Add(1) 929 go func(peer SyncPeer) { 930 defer s.pend.Done() 931 932 // Attempt to send the remote request and revert if it fails 933 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 934 log.Debug("Failed to request bytecodes", "err", err) 935 s.scheduleRevertBytecodeRequest(req) 936 } 937 }(s.peers[idle]) // We're in the lock, peers[id] surely exists 938 } 939 } 940 941 // assignStorageTasks attempts to match idle peers to pending storage range 942 // retrievals. 943 func (s *Syncer) assignStorageTasks(cancel chan struct{}) { 944 s.lock.Lock() 945 defer s.lock.Unlock() 946 947 // If there are no idle peers, short circuit assignment 948 if len(s.storageIdlers) == 0 { 949 return 950 } 951 // Iterate over all the tasks and try to find a pending one 952 for _, task := range s.tasks { 953 // Skip any tasks not in the storage retrieval phase 954 if task.res == nil { 955 continue 956 } 957 // Skip tasks that are already retrieving (or done with) all small states 958 if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { 959 continue 960 } 961 // Task pending retrieval, try to find an idle peer. If no such peer 962 // exists, we probably assigned tasks for all (or they are stateless). 963 // Abort the entire assignment mechanism. 964 var idle string 965 for id := range s.storageIdlers { 966 // If the peer rejected a query in this sync cycle, don't bother asking 967 // again for anything, it's either out of sync or already pruned 968 if _, ok := s.statelessPeers[id]; ok { 969 continue 970 } 971 idle = id 972 break 973 } 974 if idle == "" { 975 return 976 } 977 // Matched a pending task to an idle peer, allocate a unique request id 978 var reqid uint64 979 for { 980 reqid = uint64(rand.Int63()) 981 if reqid == 0 { 982 continue 983 } 984 if _, ok := s.storageReqs[reqid]; ok { 985 continue 986 } 987 break 988 } 989 // Generate the network query and send it to the peer. If there are 990 // large contract tasks pending, complete those before diving into 991 // even more new contracts. 992 var ( 993 accounts = make([]common.Hash, 0, maxStorageSetRequestCount) 994 roots = make([]common.Hash, 0, maxStorageSetRequestCount) 995 subtask *storageTask 996 ) 997 for account, subtasks := range task.SubTasks { 998 for _, st := range subtasks { 999 // Skip any subtasks already filling 1000 if st.req != nil { 1001 continue 1002 } 1003 // Found an incomplete storage chunk, schedule it 1004 accounts = append(accounts, account) 1005 roots = append(roots, st.root) 1006 subtask = st 1007 break // Large contract chunks are downloaded individually 1008 } 1009 if subtask != nil { 1010 break // Large contract chunks are downloaded individually 1011 } 1012 } 1013 if subtask == nil { 1014 // No large contract required retrieval, but small ones available 1015 for acccount, root := range task.stateTasks { 1016 delete(task.stateTasks, acccount) 1017 1018 accounts = append(accounts, acccount) 1019 roots = append(roots, root) 1020 1021 if len(accounts) >= maxStorageSetRequestCount { 1022 break 1023 } 1024 } 1025 } 1026 // If nothing was found, it means this task is actually already fully 1027 // retrieving, but large contracts are hard to detect. Skip to the next. 1028 if len(accounts) == 0 { 1029 continue 1030 } 1031 req := &storageRequest{ 1032 peer: idle, 1033 id: reqid, 1034 cancel: cancel, 1035 stale: make(chan struct{}), 1036 accounts: accounts, 1037 roots: roots, 1038 mainTask: task, 1039 subTask: subtask, 1040 } 1041 if subtask != nil { 1042 req.origin = subtask.Next 1043 req.limit = subtask.Last 1044 } 1045 req.timeout = time.AfterFunc(requestTimeout, func() { 1046 log.Debug("Storage request timed out") 1047 s.scheduleRevertStorageRequest(req) 1048 }) 1049 s.storageReqs[reqid] = req 1050 delete(s.storageIdlers, idle) 1051 1052 s.pend.Add(1) 1053 go func(peer SyncPeer, root common.Hash) { 1054 defer s.pend.Done() 1055 1056 // Attempt to send the remote request and revert if it fails 1057 var origin, limit []byte 1058 if subtask != nil { 1059 origin, limit = req.origin[:], req.limit[:] 1060 } 1061 if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, maxRequestSize); err != nil { 1062 log.Debug("Failed to request storage", "err", err) 1063 s.scheduleRevertStorageRequest(req) 1064 } 1065 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 1066 1067 // Inject the request into the subtask to block further assignments 1068 if subtask != nil { 1069 subtask.req = req 1070 } 1071 } 1072 } 1073 1074 // assignTrienodeHealTasks attempts to match idle peers to trie node requests to 1075 // heal any trie errors caused by the snap sync's chunked retrieval model. 1076 func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { 1077 s.lock.Lock() 1078 defer s.lock.Unlock() 1079 1080 // If there are no idle peers, short circuit assignment 1081 if len(s.trienodeHealIdlers) == 0 { 1082 return 1083 } 1084 // Iterate over pending tasks and try to find a peer to retrieve with 1085 for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1086 // If there are not enough trie tasks queued to fully assign, fill the 1087 // queue from the state sync scheduler. The trie synced schedules these 1088 // together with bytecodes, so we need to queue them combined. 1089 var ( 1090 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1091 want = maxTrieRequestCount + maxCodeRequestCount 1092 ) 1093 if have < want { 1094 nodes, paths, codes := s.healer.scheduler.Missing(want - have) 1095 for i, hash := range nodes { 1096 s.healer.trieTasks[hash] = paths[i] 1097 } 1098 for _, hash := range codes { 1099 s.healer.codeTasks[hash] = struct{}{} 1100 } 1101 } 1102 // If all the heal tasks are bytecodes or already downloading, bail 1103 if len(s.healer.trieTasks) == 0 { 1104 return 1105 } 1106 // Task pending retrieval, try to find an idle peer. If no such peer 1107 // exists, we probably assigned tasks for all (or they are stateless). 1108 // Abort the entire assignment mechanism. 1109 var idle string 1110 for id := range s.trienodeHealIdlers { 1111 // If the peer rejected a query in this sync cycle, don't bother asking 1112 // again for anything, it's either out of sync or already pruned 1113 if _, ok := s.statelessPeers[id]; ok { 1114 continue 1115 } 1116 idle = id 1117 break 1118 } 1119 if idle == "" { 1120 return 1121 } 1122 // Matched a pending task to an idle peer, allocate a unique request id 1123 var reqid uint64 1124 for { 1125 reqid = uint64(rand.Int63()) 1126 if reqid == 0 { 1127 continue 1128 } 1129 if _, ok := s.trienodeHealReqs[reqid]; ok { 1130 continue 1131 } 1132 break 1133 } 1134 // Generate the network query and send it to the peer 1135 var ( 1136 hashes = make([]common.Hash, 0, maxTrieRequestCount) 1137 paths = make([]trie.SyncPath, 0, maxTrieRequestCount) 1138 pathsets = make([]TrieNodePathSet, 0, maxTrieRequestCount) 1139 ) 1140 for hash, pathset := range s.healer.trieTasks { 1141 delete(s.healer.trieTasks, hash) 1142 1143 hashes = append(hashes, hash) 1144 paths = append(paths, pathset) 1145 pathsets = append(pathsets, [][]byte(pathset)) // TODO(karalabe): group requests by account hash 1146 1147 if len(hashes) >= maxTrieRequestCount { 1148 break 1149 } 1150 } 1151 req := &trienodeHealRequest{ 1152 peer: idle, 1153 id: reqid, 1154 cancel: cancel, 1155 stale: make(chan struct{}), 1156 hashes: hashes, 1157 paths: paths, 1158 task: s.healer, 1159 } 1160 req.timeout = time.AfterFunc(requestTimeout, func() { 1161 log.Debug("Trienode heal request timed out") 1162 s.scheduleRevertTrienodeHealRequest(req) 1163 }) 1164 s.trienodeHealReqs[reqid] = req 1165 delete(s.trienodeHealIdlers, idle) 1166 1167 s.pend.Add(1) 1168 go func(peer SyncPeer, root common.Hash) { 1169 defer s.pend.Done() 1170 1171 // Attempt to send the remote request and revert if it fails 1172 if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil { 1173 log.Debug("Failed to request trienode healers", "err", err) 1174 s.scheduleRevertTrienodeHealRequest(req) 1175 } 1176 }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists 1177 } 1178 } 1179 1180 // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to 1181 // heal any trie errors caused by the snap sync's chunked retrieval model. 1182 func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { 1183 s.lock.Lock() 1184 defer s.lock.Unlock() 1185 1186 // If there are no idle peers, short circuit assignment 1187 if len(s.bytecodeHealIdlers) == 0 { 1188 return 1189 } 1190 // Iterate over pending tasks and try to find a peer to retrieve with 1191 for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1192 // If there are not enough trie tasks queued to fully assign, fill the 1193 // queue from the state sync scheduler. The trie synced schedules these 1194 // together with trie nodes, so we need to queue them combined. 1195 var ( 1196 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1197 want = maxTrieRequestCount + maxCodeRequestCount 1198 ) 1199 if have < want { 1200 nodes, paths, codes := s.healer.scheduler.Missing(want - have) 1201 for i, hash := range nodes { 1202 s.healer.trieTasks[hash] = paths[i] 1203 } 1204 for _, hash := range codes { 1205 s.healer.codeTasks[hash] = struct{}{} 1206 } 1207 } 1208 // If all the heal tasks are trienodes or already downloading, bail 1209 if len(s.healer.codeTasks) == 0 { 1210 return 1211 } 1212 // Task pending retrieval, try to find an idle peer. If no such peer 1213 // exists, we probably assigned tasks for all (or they are stateless). 1214 // Abort the entire assignment mechanism. 1215 var idle string 1216 for id := range s.bytecodeHealIdlers { 1217 // If the peer rejected a query in this sync cycle, don't bother asking 1218 // again for anything, it's either out of sync or already pruned 1219 if _, ok := s.statelessPeers[id]; ok { 1220 continue 1221 } 1222 idle = id 1223 break 1224 } 1225 if idle == "" { 1226 return 1227 } 1228 // Matched a pending task to an idle peer, allocate a unique request id 1229 var reqid uint64 1230 for { 1231 reqid = uint64(rand.Int63()) 1232 if reqid == 0 { 1233 continue 1234 } 1235 if _, ok := s.bytecodeHealReqs[reqid]; ok { 1236 continue 1237 } 1238 break 1239 } 1240 // Generate the network query and send it to the peer 1241 hashes := make([]common.Hash, 0, maxCodeRequestCount) 1242 for hash := range s.healer.codeTasks { 1243 delete(s.healer.codeTasks, hash) 1244 1245 hashes = append(hashes, hash) 1246 if len(hashes) >= maxCodeRequestCount { 1247 break 1248 } 1249 } 1250 req := &bytecodeHealRequest{ 1251 peer: idle, 1252 id: reqid, 1253 cancel: cancel, 1254 stale: make(chan struct{}), 1255 hashes: hashes, 1256 task: s.healer, 1257 } 1258 req.timeout = time.AfterFunc(requestTimeout, func() { 1259 log.Debug("Bytecode heal request timed out") 1260 s.scheduleRevertBytecodeHealRequest(req) 1261 }) 1262 s.bytecodeHealReqs[reqid] = req 1263 delete(s.bytecodeHealIdlers, idle) 1264 1265 s.pend.Add(1) 1266 go func(peer SyncPeer) { 1267 defer s.pend.Done() 1268 1269 // Attempt to send the remote request and revert if it fails 1270 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1271 log.Debug("Failed to request bytecode healers", "err", err) 1272 s.scheduleRevertBytecodeHealRequest(req) 1273 } 1274 }(s.peers[idle]) // We're in the lock, peers[id] surely exists 1275 } 1276 } 1277 1278 // revertRequests locates all the currently pending reuqests from a particular 1279 // peer and reverts them, rescheduling for others to fulfill. 1280 func (s *Syncer) revertRequests(peer string) { 1281 // Gather the requests first, revertals need the lock too 1282 s.lock.Lock() 1283 var accountReqs []*accountRequest 1284 for _, req := range s.accountReqs { 1285 if req.peer == peer { 1286 accountReqs = append(accountReqs, req) 1287 } 1288 } 1289 var bytecodeReqs []*bytecodeRequest 1290 for _, req := range s.bytecodeReqs { 1291 if req.peer == peer { 1292 bytecodeReqs = append(bytecodeReqs, req) 1293 } 1294 } 1295 var storageReqs []*storageRequest 1296 for _, req := range s.storageReqs { 1297 if req.peer == peer { 1298 storageReqs = append(storageReqs, req) 1299 } 1300 } 1301 var trienodeHealReqs []*trienodeHealRequest 1302 for _, req := range s.trienodeHealReqs { 1303 if req.peer == peer { 1304 trienodeHealReqs = append(trienodeHealReqs, req) 1305 } 1306 } 1307 var bytecodeHealReqs []*bytecodeHealRequest 1308 for _, req := range s.bytecodeHealReqs { 1309 if req.peer == peer { 1310 bytecodeHealReqs = append(bytecodeHealReqs, req) 1311 } 1312 } 1313 s.lock.Unlock() 1314 1315 // Revert all the requests matching the peer 1316 for _, req := range accountReqs { 1317 s.revertAccountRequest(req) 1318 } 1319 for _, req := range bytecodeReqs { 1320 s.revertBytecodeRequest(req) 1321 } 1322 for _, req := range storageReqs { 1323 s.revertStorageRequest(req) 1324 } 1325 for _, req := range trienodeHealReqs { 1326 s.revertTrienodeHealRequest(req) 1327 } 1328 for _, req := range bytecodeHealReqs { 1329 s.revertBytecodeHealRequest(req) 1330 } 1331 } 1332 1333 // scheduleRevertAccountRequest asks the event loop to clean up an account range 1334 // request and return all failed retrieval tasks to the scheduler for reassignment. 1335 func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { 1336 select { 1337 case s.accountReqFails <- req: 1338 // Sync event loop notified 1339 case <-req.cancel: 1340 // Sync cycle got cancelled 1341 case <-req.stale: 1342 // Request already reverted 1343 } 1344 } 1345 1346 // revertAccountRequest cleans up an account range request and returns all failed 1347 // retrieval tasks to the scheduler for reassignment. 1348 // 1349 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1350 // On peer threads, use scheduleRevertAccountRequest. 1351 func (s *Syncer) revertAccountRequest(req *accountRequest) { 1352 log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) 1353 select { 1354 case <-req.stale: 1355 log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id) 1356 return 1357 default: 1358 } 1359 close(req.stale) 1360 1361 // Remove the request from the tracked set 1362 s.lock.Lock() 1363 delete(s.accountReqs, req.id) 1364 s.lock.Unlock() 1365 1366 // If there's a timeout timer still running, abort it and mark the account 1367 // task as not-pending, ready for resheduling 1368 req.timeout.Stop() 1369 if req.task.req == req { 1370 req.task.req = nil 1371 } 1372 } 1373 1374 // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request 1375 // and return all failed retrieval tasks to the scheduler for reassignment. 1376 func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { 1377 select { 1378 case s.bytecodeReqFails <- req: 1379 // Sync event loop notified 1380 case <-req.cancel: 1381 // Sync cycle got cancelled 1382 case <-req.stale: 1383 // Request already reverted 1384 } 1385 } 1386 1387 // revertBytecodeRequest cleans up a bytecode request and returns all failed 1388 // retrieval tasks to the scheduler for reassignment. 1389 // 1390 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1391 // On peer threads, use scheduleRevertBytecodeRequest. 1392 func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { 1393 log.Debug("Reverting bytecode request", "peer", req.peer) 1394 select { 1395 case <-req.stale: 1396 log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id) 1397 return 1398 default: 1399 } 1400 close(req.stale) 1401 1402 // Remove the request from the tracked set 1403 s.lock.Lock() 1404 delete(s.bytecodeReqs, req.id) 1405 s.lock.Unlock() 1406 1407 // If there's a timeout timer still running, abort it and mark the code 1408 // retrievals as not-pending, ready for resheduling 1409 req.timeout.Stop() 1410 for _, hash := range req.hashes { 1411 req.task.codeTasks[hash] = struct{}{} 1412 } 1413 } 1414 1415 // scheduleRevertStorageRequest asks the event loop to clean up a storage range 1416 // request and return all failed retrieval tasks to the scheduler for reassignment. 1417 func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { 1418 select { 1419 case s.storageReqFails <- req: 1420 // Sync event loop notified 1421 case <-req.cancel: 1422 // Sync cycle got cancelled 1423 case <-req.stale: 1424 // Request already reverted 1425 } 1426 } 1427 1428 // revertStorageRequest cleans up a storage range request and returns all failed 1429 // retrieval tasks to the scheduler for reassignment. 1430 // 1431 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1432 // On peer threads, use scheduleRevertStorageRequest. 1433 func (s *Syncer) revertStorageRequest(req *storageRequest) { 1434 log.Debug("Reverting storage request", "peer", req.peer) 1435 select { 1436 case <-req.stale: 1437 log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id) 1438 return 1439 default: 1440 } 1441 close(req.stale) 1442 1443 // Remove the request from the tracked set 1444 s.lock.Lock() 1445 delete(s.storageReqs, req.id) 1446 s.lock.Unlock() 1447 1448 // If there's a timeout timer still running, abort it and mark the storage 1449 // task as not-pending, ready for resheduling 1450 req.timeout.Stop() 1451 if req.subTask != nil { 1452 req.subTask.req = nil 1453 } else { 1454 for i, account := range req.accounts { 1455 req.mainTask.stateTasks[account] = req.roots[i] 1456 } 1457 } 1458 } 1459 1460 // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal 1461 // request and return all failed retrieval tasks to the scheduler for reassignment. 1462 func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { 1463 select { 1464 case s.trienodeHealReqFails <- req: 1465 // Sync event loop notified 1466 case <-req.cancel: 1467 // Sync cycle got cancelled 1468 case <-req.stale: 1469 // Request already reverted 1470 } 1471 } 1472 1473 // revertTrienodeHealRequest cleans up a trienode heal request and returns all 1474 // failed retrieval tasks to the scheduler for reassignment. 1475 // 1476 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1477 // On peer threads, use scheduleRevertTrienodeHealRequest. 1478 func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { 1479 log.Debug("Reverting trienode heal request", "peer", req.peer) 1480 select { 1481 case <-req.stale: 1482 log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) 1483 return 1484 default: 1485 } 1486 close(req.stale) 1487 1488 // Remove the request from the tracked set 1489 s.lock.Lock() 1490 delete(s.trienodeHealReqs, req.id) 1491 s.lock.Unlock() 1492 1493 // If there's a timeout timer still running, abort it and mark the trie node 1494 // retrievals as not-pending, ready for resheduling 1495 req.timeout.Stop() 1496 for i, hash := range req.hashes { 1497 req.task.trieTasks[hash] = req.paths[i] 1498 } 1499 } 1500 1501 // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal 1502 // request and return all failed retrieval tasks to the scheduler for reassignment. 1503 func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { 1504 select { 1505 case s.bytecodeHealReqFails <- req: 1506 // Sync event loop notified 1507 case <-req.cancel: 1508 // Sync cycle got cancelled 1509 case <-req.stale: 1510 // Request already reverted 1511 } 1512 } 1513 1514 // revertBytecodeHealRequest cleans up a bytecode heal request and returns all 1515 // failed retrieval tasks to the scheduler for reassignment. 1516 // 1517 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1518 // On peer threads, use scheduleRevertBytecodeHealRequest. 1519 func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { 1520 log.Debug("Reverting bytecode heal request", "peer", req.peer) 1521 select { 1522 case <-req.stale: 1523 log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) 1524 return 1525 default: 1526 } 1527 close(req.stale) 1528 1529 // Remove the request from the tracked set 1530 s.lock.Lock() 1531 delete(s.bytecodeHealReqs, req.id) 1532 s.lock.Unlock() 1533 1534 // If there's a timeout timer still running, abort it and mark the code 1535 // retrievals as not-pending, ready for resheduling 1536 req.timeout.Stop() 1537 for _, hash := range req.hashes { 1538 req.task.codeTasks[hash] = struct{}{} 1539 } 1540 } 1541 1542 // processAccountResponse integrates an already validated account range response 1543 // into the account tasks. 1544 func (s *Syncer) processAccountResponse(res *accountResponse) { 1545 // Switch the task from pending to filling 1546 res.task.req = nil 1547 res.task.res = res 1548 1549 // Ensure that the response doesn't overflow into the subsequent task 1550 last := res.task.Last.Big() 1551 for i, hash := range res.hashes { 1552 if hash.Big().Cmp(last) > 0 { 1553 // Chunk overflown, cut off excess, but also update the boundary nodes 1554 for j := i; j < len(res.hashes); j++ { 1555 if err := res.trie.Prove(res.hashes[j][:], 0, res.overflow); err != nil { 1556 panic(err) // Account range was already proven, what happened 1557 } 1558 } 1559 res.hashes = res.hashes[:i] 1560 res.accounts = res.accounts[:i] 1561 res.cont = false // Mark range completed 1562 break 1563 } 1564 } 1565 // Iterate over all the accounts and assemble which ones need further sub- 1566 // filling before the entire account range can be persisted. 1567 res.task.needCode = make([]bool, len(res.accounts)) 1568 res.task.needState = make([]bool, len(res.accounts)) 1569 res.task.needHeal = make([]bool, len(res.accounts)) 1570 1571 res.task.codeTasks = make(map[common.Hash]struct{}) 1572 res.task.stateTasks = make(map[common.Hash]common.Hash) 1573 1574 resumed := make(map[common.Hash]struct{}) 1575 1576 res.task.pend = 0 1577 for i, account := range res.accounts { 1578 // Check if the account is a contract with an unknown code 1579 if !bytes.Equal(account.CodeHash, emptyCode[:]) { 1580 if code := rawdb.ReadCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)); code == nil { 1581 res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} 1582 res.task.needCode[i] = true 1583 res.task.pend++ 1584 } 1585 } 1586 // Check if the account is a contract with an unknown storage trie 1587 if account.Root != emptyRoot { 1588 if node, err := s.db.Get(account.Root[:]); err != nil || node == nil { 1589 // If there was a previous large state retrieval in progress, 1590 // don't restart it from scratch. This happens if a sync cycle 1591 // is interrupted and resumed later. However, *do* update the 1592 // previous root hash. 1593 if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok { 1594 log.Error("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root) 1595 for _, subtask := range subtasks { 1596 subtask.root = account.Root 1597 } 1598 res.task.needHeal[i] = true 1599 resumed[res.hashes[i]] = struct{}{} 1600 } else { 1601 res.task.stateTasks[res.hashes[i]] = account.Root 1602 } 1603 res.task.needState[i] = true 1604 res.task.pend++ 1605 } 1606 } 1607 } 1608 // Delete any subtasks that have been aborted but not resumed. This may undo 1609 // some progress if a new peer gives us less accounts than an old one, but for 1610 // now we have to live with that. 1611 for hash := range res.task.SubTasks { 1612 if _, ok := resumed[hash]; !ok { 1613 log.Error("Aborting suspended storage retrieval", "account", hash) 1614 delete(res.task.SubTasks, hash) 1615 } 1616 } 1617 // If the account range contained no contracts, or all have been fully filled 1618 // beforehand, short circuit storage filling and forward to the next task 1619 if res.task.pend == 0 { 1620 s.forwardAccountTask(res.task) 1621 return 1622 } 1623 // Some accounts are incomplete, leave as is for the storage and contract 1624 // task assigners to pick up and fill. 1625 } 1626 1627 // processBytecodeResponse integrates an already validated bytecode response 1628 // into the account tasks. 1629 func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { 1630 batch := s.db.NewBatch() 1631 1632 var ( 1633 codes uint64 1634 bytes common.StorageSize 1635 ) 1636 for i, hash := range res.hashes { 1637 code := res.codes[i] 1638 1639 // If the bytecode was not delivered, reschedule it 1640 if code == nil { 1641 res.task.codeTasks[hash] = struct{}{} 1642 continue 1643 } 1644 // Code was delivered, mark it not needed any more 1645 for j, account := range res.task.res.accounts { 1646 if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { 1647 res.task.needCode[j] = false 1648 res.task.pend-- 1649 } 1650 } 1651 // Push the bytecode into a database batch 1652 s.bytecodeSynced++ 1653 s.bytecodeBytes += common.StorageSize(len(code)) 1654 1655 codes++ 1656 bytes += common.StorageSize(len(code)) 1657 1658 rawdb.WriteCode(batch, hash, code) 1659 s.bloom.Add(hash[:]) 1660 } 1661 if err := batch.Write(); err != nil { 1662 log.Crit("Failed to persist bytecodes", "err", err) 1663 } 1664 log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes) 1665 1666 // If this delivery completed the last pending task, forward the account task 1667 // to the next chunk 1668 if res.task.pend == 0 { 1669 s.forwardAccountTask(res.task) 1670 return 1671 } 1672 // Some accounts are still incomplete, leave as is for the storage and contract 1673 // task assigners to pick up and fill. 1674 } 1675 1676 // processStorageResponse integrates an already validated storage response 1677 // into the account tasks. 1678 func (s *Syncer) processStorageResponse(res *storageResponse) { 1679 // Switch the suntask from pending to idle 1680 if res.subTask != nil { 1681 res.subTask.req = nil 1682 } 1683 batch := s.db.NewBatch() 1684 1685 var ( 1686 slots int 1687 nodes int 1688 skipped int 1689 bytes common.StorageSize 1690 ) 1691 // Iterate over all the accounts and reconstruct their storage tries from the 1692 // delivered slots 1693 for i, account := range res.accounts { 1694 // If the account was not delivered, reschedule it 1695 if i >= len(res.hashes) { 1696 res.mainTask.stateTasks[account] = res.roots[i] 1697 continue 1698 } 1699 // State was delivered, if complete mark as not needed any more, otherwise 1700 // mark the account as needing healing 1701 for j, hash := range res.mainTask.res.hashes { 1702 if account != hash { 1703 continue 1704 } 1705 acc := res.mainTask.res.accounts[j] 1706 1707 // If the packet contains multiple contract storage slots, all 1708 // but the last are surely complete. The last contract may be 1709 // chunked, so check it's continuation flag. 1710 if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { 1711 res.mainTask.needState[j] = false 1712 res.mainTask.pend-- 1713 } 1714 // If the last contract was chunked, mark it as needing healing 1715 // to avoid writing it out to disk prematurely. 1716 if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { 1717 res.mainTask.needHeal[j] = true 1718 } 1719 // If the last contract was chunked, we need to switch to large 1720 // contract handling mode 1721 if res.subTask == nil && i == len(res.hashes)-1 && res.cont { 1722 // If we haven't yet started a large-contract retrieval, create 1723 // the subtasks for it within the main account task 1724 if tasks, ok := res.mainTask.SubTasks[account]; !ok { 1725 var ( 1726 next common.Hash 1727 ) 1728 step := new(big.Int).Sub( 1729 new(big.Int).Div( 1730 new(big.Int).Exp(common.Big2, common.Big256, nil), 1731 big.NewInt(storageConcurrency), 1732 ), common.Big1, 1733 ) 1734 for k := 0; k < storageConcurrency; k++ { 1735 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 1736 if k == storageConcurrency-1 { 1737 // Make sure we don't overflow if the step is not a proper divisor 1738 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 1739 } 1740 tasks = append(tasks, &storageTask{ 1741 Next: next, 1742 Last: last, 1743 root: acc.Root, 1744 }) 1745 log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last) 1746 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 1747 } 1748 res.mainTask.SubTasks[account] = tasks 1749 1750 // Since we've just created the sub-tasks, this response 1751 // is surely for the first one (zero origin) 1752 res.subTask = tasks[0] 1753 } 1754 } 1755 // If we're in large contract delivery mode, forward the subtask 1756 if res.subTask != nil { 1757 // Ensure the response doesn't overflow into the subsequent task 1758 last := res.subTask.Last.Big() 1759 for k, hash := range res.hashes[i] { 1760 if hash.Big().Cmp(last) > 0 { 1761 // Chunk overflown, cut off excess, but also update the boundary 1762 for l := k; l < len(res.hashes[i]); l++ { 1763 if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil { 1764 panic(err) // Account range was already proven, what happened 1765 } 1766 } 1767 res.hashes[i] = res.hashes[i][:k] 1768 res.slots[i] = res.slots[i][:k] 1769 res.cont = false // Mark range completed 1770 break 1771 } 1772 } 1773 // Forward the relevant storage chunk (even if created just now) 1774 if res.cont { 1775 res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1))) 1776 } else { 1777 res.subTask.done = true 1778 } 1779 } 1780 } 1781 // Iterate over all the reconstructed trie nodes and push them to disk 1782 slots += len(res.hashes[i]) 1783 1784 it := res.nodes[i].NewIterator(nil, nil) 1785 for it.Next() { 1786 // Boundary nodes are not written for the last result, since they are incomplete 1787 if i == len(res.hashes)-1 { 1788 if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok { 1789 skipped++ 1790 continue 1791 } 1792 } 1793 // Node is not a boundary, persist to disk 1794 batch.Put(it.Key(), it.Value()) 1795 s.bloom.Add(it.Key()) 1796 1797 bytes += common.StorageSize(common.HashLength + len(it.Value())) 1798 nodes++ 1799 } 1800 it.Release() 1801 } 1802 if err := batch.Write(); err != nil { 1803 log.Crit("Failed to persist storage slots", "err", err) 1804 } 1805 s.storageSynced += uint64(slots) 1806 s.storageBytes += bytes 1807 1808 log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "nodes", nodes, "skipped", skipped, "bytes", bytes) 1809 1810 // If this delivery completed the last pending task, forward the account task 1811 // to the next chunk 1812 if res.mainTask.pend == 0 { 1813 s.forwardAccountTask(res.mainTask) 1814 return 1815 } 1816 // Some accounts are still incomplete, leave as is for the storage and contract 1817 // task assigners to pick up and fill. 1818 } 1819 1820 // processTrienodeHealResponse integrates an already validated trienode response 1821 // into the healer tasks. 1822 func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { 1823 for i, hash := range res.hashes { 1824 node := res.nodes[i] 1825 1826 // If the trie node was not delivered, reschedule it 1827 if node == nil { 1828 res.task.trieTasks[hash] = res.paths[i] 1829 continue 1830 } 1831 // Push the trie node into the state syncer 1832 s.trienodeHealSynced++ 1833 s.trienodeHealBytes += common.StorageSize(len(node)) 1834 1835 err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) 1836 switch err { 1837 case nil: 1838 case trie.ErrAlreadyProcessed: 1839 s.trienodeHealDups++ 1840 case trie.ErrNotRequested: 1841 s.trienodeHealNops++ 1842 default: 1843 log.Error("Invalid trienode processed", "hash", hash, "err", err) 1844 } 1845 } 1846 batch := s.db.NewBatch() 1847 if err := s.healer.scheduler.Commit(batch); err != nil { 1848 log.Error("Failed to commit healing data", "err", err) 1849 } 1850 if err := batch.Write(); err != nil { 1851 log.Crit("Failed to persist healing data", "err", err) 1852 } 1853 log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) 1854 } 1855 1856 // processBytecodeHealResponse integrates an already validated bytecode response 1857 // into the healer tasks. 1858 func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { 1859 for i, hash := range res.hashes { 1860 node := res.codes[i] 1861 1862 // If the trie node was not delivered, reschedule it 1863 if node == nil { 1864 res.task.codeTasks[hash] = struct{}{} 1865 continue 1866 } 1867 // Push the trie node into the state syncer 1868 s.bytecodeHealSynced++ 1869 s.bytecodeHealBytes += common.StorageSize(len(node)) 1870 1871 err := s.healer.scheduler.Process(trie.SyncResult{Hash: hash, Data: node}) 1872 switch err { 1873 case nil: 1874 case trie.ErrAlreadyProcessed: 1875 s.bytecodeHealDups++ 1876 case trie.ErrNotRequested: 1877 s.bytecodeHealNops++ 1878 default: 1879 log.Error("Invalid bytecode processed", "hash", hash, "err", err) 1880 } 1881 } 1882 batch := s.db.NewBatch() 1883 if err := s.healer.scheduler.Commit(batch); err != nil { 1884 log.Error("Failed to commit healing data", "err", err) 1885 } 1886 if err := batch.Write(); err != nil { 1887 log.Crit("Failed to persist healing data", "err", err) 1888 } 1889 log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize())) 1890 } 1891 1892 // forwardAccountTask takes a filled account task and persists anything available 1893 // into the database, after which it forwards the next account marker so that the 1894 // task's next chunk may be filled. 1895 func (s *Syncer) forwardAccountTask(task *accountTask) { 1896 // Remove any pending delivery 1897 res := task.res 1898 if res == nil { 1899 return // nothing to forward 1900 } 1901 task.res = nil 1902 1903 // Iterate over all the accounts and gather all the incomplete trie nodes. A 1904 // node is incomplete if we haven't yet filled it (sync was interrupted), or 1905 // if we filled it in multiple chunks (storage trie), in which case the few 1906 // nodes on the chunk boundaries are missing. 1907 incompletes := light.NewNodeSet() 1908 for i := range res.accounts { 1909 // If the filling was interrupted, mark everything after as incomplete 1910 if task.needCode[i] || task.needState[i] { 1911 for j := i; j < len(res.accounts); j++ { 1912 if err := res.trie.Prove(res.hashes[j][:], 0, incompletes); err != nil { 1913 panic(err) // Account range was already proven, what happened 1914 } 1915 } 1916 break 1917 } 1918 // Filling not interrupted until this point, mark incomplete if needs healing 1919 if task.needHeal[i] { 1920 if err := res.trie.Prove(res.hashes[i][:], 0, incompletes); err != nil { 1921 panic(err) // Account range was already proven, what happened 1922 } 1923 } 1924 } 1925 // Persist every finalized trie node that's not on the boundary 1926 batch := s.db.NewBatch() 1927 1928 var ( 1929 nodes int 1930 skipped int 1931 bytes common.StorageSize 1932 ) 1933 it := res.nodes.NewIterator(nil, nil) 1934 for it.Next() { 1935 // Boundary nodes are not written, since they are incomplete 1936 if _, ok := res.bounds[common.BytesToHash(it.Key())]; ok { 1937 skipped++ 1938 continue 1939 } 1940 // Overflow nodes are not written, since they mess with another task 1941 if _, err := res.overflow.Get(it.Key()); err == nil { 1942 skipped++ 1943 continue 1944 } 1945 // Accounts with split storage requests are incomplete 1946 if _, err := incompletes.Get(it.Key()); err == nil { 1947 skipped++ 1948 continue 1949 } 1950 // Node is neither a boundary, not an incomplete account, persist to disk 1951 batch.Put(it.Key(), it.Value()) 1952 s.bloom.Add(it.Key()) 1953 1954 bytes += common.StorageSize(common.HashLength + len(it.Value())) 1955 nodes++ 1956 } 1957 it.Release() 1958 1959 if err := batch.Write(); err != nil { 1960 log.Crit("Failed to persist accounts", "err", err) 1961 } 1962 s.accountBytes += bytes 1963 s.accountSynced += uint64(len(res.accounts)) 1964 1965 log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "nodes", nodes, "skipped", skipped, "bytes", bytes) 1966 1967 // Task filling persisted, push it the chunk marker forward to the first 1968 // account still missing data. 1969 for i, hash := range res.hashes { 1970 if task.needCode[i] || task.needState[i] { 1971 return 1972 } 1973 task.Next = common.BigToHash(new(big.Int).Add(hash.Big(), big.NewInt(1))) 1974 } 1975 // All accounts marked as complete, track if the entire task is done 1976 task.done = !res.cont 1977 } 1978 1979 // OnAccounts is a callback method to invoke when a range of accounts are 1980 // received from a remote peer. 1981 func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { 1982 size := common.StorageSize(len(hashes) * common.HashLength) 1983 for _, account := range accounts { 1984 size += common.StorageSize(len(account)) 1985 } 1986 for _, node := range proof { 1987 size += common.StorageSize(len(node)) 1988 } 1989 logger := peer.Log().New("reqid", id) 1990 logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) 1991 1992 // Whether or not the response is valid, we can mark the peer as idle and 1993 // notify the scheduler to assign a new task. If the response is invalid, 1994 // we'll drop the peer in a bit. 1995 s.lock.Lock() 1996 if _, ok := s.peers[peer.ID()]; ok { 1997 s.accountIdlers[peer.ID()] = struct{}{} 1998 } 1999 select { 2000 case s.update <- struct{}{}: 2001 default: 2002 } 2003 // Ensure the response is for a valid request 2004 req, ok := s.accountReqs[id] 2005 if !ok { 2006 // Request stale, perhaps the peer timed out but came through in the end 2007 logger.Warn("Unexpected account range packet") 2008 s.lock.Unlock() 2009 return nil 2010 } 2011 delete(s.accountReqs, id) 2012 2013 // Clean up the request timeout timer, we'll see how to proceed further based 2014 // on the actual delivered content 2015 if !req.timeout.Stop() { 2016 // The timeout is already triggered, and this request will be reverted+rescheduled 2017 s.lock.Unlock() 2018 return nil 2019 } 2020 2021 // Response is valid, but check if peer is signalling that it does not have 2022 // the requested data. For account range queries that means the state being 2023 // retrieved was either already pruned remotely, or the peer is not yet 2024 // synced to our head. 2025 if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { 2026 logger.Debug("Peer rejected account range request", "root", s.root) 2027 s.statelessPeers[peer.ID()] = struct{}{} 2028 s.lock.Unlock() 2029 2030 // Signal this request as failed, and ready for rescheduling 2031 s.scheduleRevertAccountRequest(req) 2032 return nil 2033 } 2034 root := s.root 2035 s.lock.Unlock() 2036 2037 // Reconstruct a partial trie from the response and verify it 2038 keys := make([][]byte, len(hashes)) 2039 for i, key := range hashes { 2040 keys[i] = common.CopyBytes(key[:]) 2041 } 2042 nodes := make(light.NodeList, len(proof)) 2043 for i, node := range proof { 2044 nodes[i] = node 2045 } 2046 proofdb := nodes.NodeSet() 2047 2048 var end []byte 2049 if len(keys) > 0 { 2050 end = keys[len(keys)-1] 2051 } 2052 db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) 2053 if err != nil { 2054 logger.Warn("Account range failed proof", "err", err) 2055 // Signal this request as failed, and ready for rescheduling 2056 s.scheduleRevertAccountRequest(req) 2057 return err 2058 } 2059 // Partial trie reconstructed, send it to the scheduler for storage filling 2060 bounds := make(map[common.Hash]struct{}) 2061 2062 it := notary.Accessed().NewIterator(nil, nil) 2063 for it.Next() { 2064 bounds[common.BytesToHash(it.Key())] = struct{}{} 2065 } 2066 it.Release() 2067 2068 accs := make([]*state.Account, len(accounts)) 2069 for i, account := range accounts { 2070 acc := new(state.Account) 2071 if err := rlp.DecodeBytes(account, acc); err != nil { 2072 panic(err) // We created these blobs, we must be able to decode them 2073 } 2074 accs[i] = acc 2075 } 2076 response := &accountResponse{ 2077 task: req.task, 2078 hashes: hashes, 2079 accounts: accs, 2080 nodes: db, 2081 trie: tr, 2082 bounds: bounds, 2083 overflow: light.NewNodeSet(), 2084 cont: cont, 2085 } 2086 select { 2087 case s.accountResps <- response: 2088 case <-req.cancel: 2089 case <-req.stale: 2090 } 2091 return nil 2092 } 2093 2094 // OnByteCodes is a callback method to invoke when a batch of contract 2095 // bytes codes are received from a remote peer. 2096 func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2097 s.lock.RLock() 2098 syncing := !s.snapped 2099 s.lock.RUnlock() 2100 2101 if syncing { 2102 return s.onByteCodes(peer, id, bytecodes) 2103 } 2104 return s.onHealByteCodes(peer, id, bytecodes) 2105 } 2106 2107 // onByteCodes is a callback method to invoke when a batch of contract 2108 // bytes codes are received from a remote peer in the syncing phase. 2109 func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2110 var size common.StorageSize 2111 for _, code := range bytecodes { 2112 size += common.StorageSize(len(code)) 2113 } 2114 logger := peer.Log().New("reqid", id) 2115 logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2116 2117 // Whether or not the response is valid, we can mark the peer as idle and 2118 // notify the scheduler to assign a new task. If the response is invalid, 2119 // we'll drop the peer in a bit. 2120 s.lock.Lock() 2121 if _, ok := s.peers[peer.ID()]; ok { 2122 s.bytecodeIdlers[peer.ID()] = struct{}{} 2123 } 2124 select { 2125 case s.update <- struct{}{}: 2126 default: 2127 } 2128 // Ensure the response is for a valid request 2129 req, ok := s.bytecodeReqs[id] 2130 if !ok { 2131 // Request stale, perhaps the peer timed out but came through in the end 2132 logger.Warn("Unexpected bytecode packet") 2133 s.lock.Unlock() 2134 return nil 2135 } 2136 delete(s.bytecodeReqs, id) 2137 2138 // Clean up the request timeout timer, we'll see how to proceed further based 2139 // on the actual delivered content 2140 if !req.timeout.Stop() { 2141 // The timeout is already triggered, and this request will be reverted+rescheduled 2142 s.lock.Unlock() 2143 return nil 2144 } 2145 2146 // Response is valid, but check if peer is signalling that it does not have 2147 // the requested data. For bytecode range queries that means the peer is not 2148 // yet synced. 2149 if len(bytecodes) == 0 { 2150 logger.Debug("Peer rejected bytecode request") 2151 s.statelessPeers[peer.ID()] = struct{}{} 2152 s.lock.Unlock() 2153 2154 // Signal this request as failed, and ready for rescheduling 2155 s.scheduleRevertBytecodeRequest(req) 2156 return nil 2157 } 2158 s.lock.Unlock() 2159 2160 // Cross reference the requested bytecodes with the response to find gaps 2161 // that the serving node is missing 2162 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2163 hash := make([]byte, 32) 2164 2165 codes := make([][]byte, len(req.hashes)) 2166 for i, j := 0, 0; i < len(bytecodes); i++ { 2167 // Find the next hash that we've been served, leaving misses with nils 2168 hasher.Reset() 2169 hasher.Write(bytecodes[i]) 2170 hasher.Read(hash) 2171 2172 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2173 j++ 2174 } 2175 if j < len(req.hashes) { 2176 codes[j] = bytecodes[i] 2177 j++ 2178 continue 2179 } 2180 // We've either ran out of hashes, or got unrequested data 2181 logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) 2182 // Signal this request as failed, and ready for rescheduling 2183 s.scheduleRevertBytecodeRequest(req) 2184 return errors.New("unexpected bytecode") 2185 } 2186 // Response validated, send it to the scheduler for filling 2187 response := &bytecodeResponse{ 2188 task: req.task, 2189 hashes: req.hashes, 2190 codes: codes, 2191 } 2192 select { 2193 case s.bytecodeResps <- response: 2194 case <-req.cancel: 2195 case <-req.stale: 2196 } 2197 return nil 2198 } 2199 2200 // OnStorage is a callback method to invoke when ranges of storage slots 2201 // are received from a remote peer. 2202 func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { 2203 // Gather some trace stats to aid in debugging issues 2204 var ( 2205 hashCount int 2206 slotCount int 2207 size common.StorageSize 2208 ) 2209 for _, hashset := range hashes { 2210 size += common.StorageSize(common.HashLength * len(hashset)) 2211 hashCount += len(hashset) 2212 } 2213 for _, slotset := range slots { 2214 for _, slot := range slotset { 2215 size += common.StorageSize(len(slot)) 2216 } 2217 slotCount += len(slotset) 2218 } 2219 for _, node := range proof { 2220 size += common.StorageSize(len(node)) 2221 } 2222 logger := peer.Log().New("reqid", id) 2223 logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) 2224 2225 // Whether or not the response is valid, we can mark the peer as idle and 2226 // notify the scheduler to assign a new task. If the response is invalid, 2227 // we'll drop the peer in a bit. 2228 s.lock.Lock() 2229 if _, ok := s.peers[peer.ID()]; ok { 2230 s.storageIdlers[peer.ID()] = struct{}{} 2231 } 2232 select { 2233 case s.update <- struct{}{}: 2234 default: 2235 } 2236 // Ensure the response is for a valid request 2237 req, ok := s.storageReqs[id] 2238 if !ok { 2239 // Request stale, perhaps the peer timed out but came through in the end 2240 logger.Warn("Unexpected storage ranges packet") 2241 s.lock.Unlock() 2242 return nil 2243 } 2244 delete(s.storageReqs, id) 2245 2246 // Clean up the request timeout timer, we'll see how to proceed further based 2247 // on the actual delivered content 2248 if !req.timeout.Stop() { 2249 // The timeout is already triggered, and this request will be reverted+rescheduled 2250 s.lock.Unlock() 2251 return nil 2252 } 2253 2254 // Reject the response if the hash sets and slot sets don't match, or if the 2255 // peer sent more data than requested. 2256 if len(hashes) != len(slots) { 2257 s.lock.Unlock() 2258 s.scheduleRevertStorageRequest(req) // reschedule request 2259 logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) 2260 return errors.New("hash and slot set size mismatch") 2261 } 2262 if len(hashes) > len(req.accounts) { 2263 s.lock.Unlock() 2264 s.scheduleRevertStorageRequest(req) // reschedule request 2265 logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) 2266 return errors.New("hash set larger than requested") 2267 } 2268 // Response is valid, but check if peer is signalling that it does not have 2269 // the requested data. For storage range queries that means the state being 2270 // retrieved was either already pruned remotely, or the peer is not yet 2271 // synced to our head. 2272 if len(hashes) == 0 { 2273 logger.Debug("Peer rejected storage request") 2274 s.statelessPeers[peer.ID()] = struct{}{} 2275 s.lock.Unlock() 2276 s.scheduleRevertStorageRequest(req) // reschedule request 2277 return nil 2278 } 2279 s.lock.Unlock() 2280 2281 // Reconstruct the partial tries from the response and verify them 2282 var ( 2283 dbs = make([]ethdb.KeyValueStore, len(hashes)) 2284 tries = make([]*trie.Trie, len(hashes)) 2285 notary *trie.KeyValueNotary 2286 cont bool 2287 ) 2288 for i := 0; i < len(hashes); i++ { 2289 // Convert the keys and proofs into an internal format 2290 keys := make([][]byte, len(hashes[i])) 2291 for j, key := range hashes[i] { 2292 keys[j] = common.CopyBytes(key[:]) 2293 } 2294 nodes := make(light.NodeList, 0, len(proof)) 2295 if i == len(hashes)-1 { 2296 for _, node := range proof { 2297 nodes = append(nodes, node) 2298 } 2299 } 2300 var err error 2301 if len(nodes) == 0 { 2302 // No proof has been attached, the response must cover the entire key 2303 // space and hash to the origin root. 2304 dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) 2305 if err != nil { 2306 s.scheduleRevertStorageRequest(req) // reschedule request 2307 logger.Warn("Storage slots failed proof", "err", err) 2308 return err 2309 } 2310 } else { 2311 // A proof was attached, the response is only partial, check that the 2312 // returned data is indeed part of the storage trie 2313 proofdb := nodes.NodeSet() 2314 2315 var end []byte 2316 if len(keys) > 0 { 2317 end = keys[len(keys)-1] 2318 } 2319 dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) 2320 if err != nil { 2321 s.scheduleRevertStorageRequest(req) // reschedule request 2322 logger.Warn("Storage range failed proof", "err", err) 2323 return err 2324 } 2325 } 2326 } 2327 // Partial tries reconstructed, send them to the scheduler for storage filling 2328 bounds := make(map[common.Hash]struct{}) 2329 2330 if notary != nil { // if all contract storages are delivered in full, no notary will be created 2331 it := notary.Accessed().NewIterator(nil, nil) 2332 for it.Next() { 2333 bounds[common.BytesToHash(it.Key())] = struct{}{} 2334 } 2335 it.Release() 2336 } 2337 response := &storageResponse{ 2338 mainTask: req.mainTask, 2339 subTask: req.subTask, 2340 accounts: req.accounts, 2341 roots: req.roots, 2342 hashes: hashes, 2343 slots: slots, 2344 nodes: dbs, 2345 tries: tries, 2346 bounds: bounds, 2347 overflow: light.NewNodeSet(), 2348 cont: cont, 2349 } 2350 select { 2351 case s.storageResps <- response: 2352 case <-req.cancel: 2353 case <-req.stale: 2354 } 2355 return nil 2356 } 2357 2358 // OnTrieNodes is a callback method to invoke when a batch of trie nodes 2359 // are received from a remote peer. 2360 func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { 2361 var size common.StorageSize 2362 for _, node := range trienodes { 2363 size += common.StorageSize(len(node)) 2364 } 2365 logger := peer.Log().New("reqid", id) 2366 logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) 2367 2368 // Whether or not the response is valid, we can mark the peer as idle and 2369 // notify the scheduler to assign a new task. If the response is invalid, 2370 // we'll drop the peer in a bit. 2371 s.lock.Lock() 2372 if _, ok := s.peers[peer.ID()]; ok { 2373 s.trienodeHealIdlers[peer.ID()] = struct{}{} 2374 } 2375 select { 2376 case s.update <- struct{}{}: 2377 default: 2378 } 2379 // Ensure the response is for a valid request 2380 req, ok := s.trienodeHealReqs[id] 2381 if !ok { 2382 // Request stale, perhaps the peer timed out but came through in the end 2383 logger.Warn("Unexpected trienode heal packet") 2384 s.lock.Unlock() 2385 return nil 2386 } 2387 delete(s.trienodeHealReqs, id) 2388 2389 // Clean up the request timeout timer, we'll see how to proceed further based 2390 // on the actual delivered content 2391 if !req.timeout.Stop() { 2392 // The timeout is already triggered, and this request will be reverted+rescheduled 2393 s.lock.Unlock() 2394 return nil 2395 } 2396 2397 // Response is valid, but check if peer is signalling that it does not have 2398 // the requested data. For bytecode range queries that means the peer is not 2399 // yet synced. 2400 if len(trienodes) == 0 { 2401 logger.Debug("Peer rejected trienode heal request") 2402 s.statelessPeers[peer.ID()] = struct{}{} 2403 s.lock.Unlock() 2404 2405 // Signal this request as failed, and ready for rescheduling 2406 s.scheduleRevertTrienodeHealRequest(req) 2407 return nil 2408 } 2409 s.lock.Unlock() 2410 2411 // Cross reference the requested trienodes with the response to find gaps 2412 // that the serving node is missing 2413 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2414 hash := make([]byte, 32) 2415 2416 nodes := make([][]byte, len(req.hashes)) 2417 for i, j := 0, 0; i < len(trienodes); i++ { 2418 // Find the next hash that we've been served, leaving misses with nils 2419 hasher.Reset() 2420 hasher.Write(trienodes[i]) 2421 hasher.Read(hash) 2422 2423 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2424 j++ 2425 } 2426 if j < len(req.hashes) { 2427 nodes[j] = trienodes[i] 2428 j++ 2429 continue 2430 } 2431 // We've either ran out of hashes, or got unrequested data 2432 logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) 2433 // Signal this request as failed, and ready for rescheduling 2434 s.scheduleRevertTrienodeHealRequest(req) 2435 return errors.New("unexpected healing trienode") 2436 } 2437 // Response validated, send it to the scheduler for filling 2438 response := &trienodeHealResponse{ 2439 task: req.task, 2440 hashes: req.hashes, 2441 paths: req.paths, 2442 nodes: nodes, 2443 } 2444 select { 2445 case s.trienodeHealResps <- response: 2446 case <-req.cancel: 2447 case <-req.stale: 2448 } 2449 return nil 2450 } 2451 2452 // onHealByteCodes is a callback method to invoke when a batch of contract 2453 // bytes codes are received from a remote peer in the healing phase. 2454 func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2455 var size common.StorageSize 2456 for _, code := range bytecodes { 2457 size += common.StorageSize(len(code)) 2458 } 2459 logger := peer.Log().New("reqid", id) 2460 logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2461 2462 // Whether or not the response is valid, we can mark the peer as idle and 2463 // notify the scheduler to assign a new task. If the response is invalid, 2464 // we'll drop the peer in a bit. 2465 s.lock.Lock() 2466 if _, ok := s.peers[peer.ID()]; ok { 2467 s.bytecodeHealIdlers[peer.ID()] = struct{}{} 2468 } 2469 select { 2470 case s.update <- struct{}{}: 2471 default: 2472 } 2473 // Ensure the response is for a valid request 2474 req, ok := s.bytecodeHealReqs[id] 2475 if !ok { 2476 // Request stale, perhaps the peer timed out but came through in the end 2477 logger.Warn("Unexpected bytecode heal packet") 2478 s.lock.Unlock() 2479 return nil 2480 } 2481 delete(s.bytecodeHealReqs, id) 2482 2483 // Clean up the request timeout timer, we'll see how to proceed further based 2484 // on the actual delivered content 2485 if !req.timeout.Stop() { 2486 // The timeout is already triggered, and this request will be reverted+rescheduled 2487 s.lock.Unlock() 2488 return nil 2489 } 2490 2491 // Response is valid, but check if peer is signalling that it does not have 2492 // the requested data. For bytecode range queries that means the peer is not 2493 // yet synced. 2494 if len(bytecodes) == 0 { 2495 logger.Debug("Peer rejected bytecode heal request") 2496 s.statelessPeers[peer.ID()] = struct{}{} 2497 s.lock.Unlock() 2498 2499 // Signal this request as failed, and ready for rescheduling 2500 s.scheduleRevertBytecodeHealRequest(req) 2501 return nil 2502 } 2503 s.lock.Unlock() 2504 2505 // Cross reference the requested bytecodes with the response to find gaps 2506 // that the serving node is missing 2507 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2508 hash := make([]byte, 32) 2509 2510 codes := make([][]byte, len(req.hashes)) 2511 for i, j := 0, 0; i < len(bytecodes); i++ { 2512 // Find the next hash that we've been served, leaving misses with nils 2513 hasher.Reset() 2514 hasher.Write(bytecodes[i]) 2515 hasher.Read(hash) 2516 2517 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2518 j++ 2519 } 2520 if j < len(req.hashes) { 2521 codes[j] = bytecodes[i] 2522 j++ 2523 continue 2524 } 2525 // We've either ran out of hashes, or got unrequested data 2526 logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) 2527 // Signal this request as failed, and ready for rescheduling 2528 s.scheduleRevertBytecodeHealRequest(req) 2529 return errors.New("unexpected healing bytecode") 2530 } 2531 // Response validated, send it to the scheduler for filling 2532 response := &bytecodeHealResponse{ 2533 task: req.task, 2534 hashes: req.hashes, 2535 codes: codes, 2536 } 2537 select { 2538 case s.bytecodeHealResps <- response: 2539 case <-req.cancel: 2540 case <-req.stale: 2541 } 2542 return nil 2543 } 2544 2545 // hashSpace is the total size of the 256 bit hash space for accounts. 2546 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) 2547 2548 // report calculates various status reports and provides it to the user. 2549 func (s *Syncer) report(force bool) { 2550 if len(s.tasks) > 0 { 2551 s.reportSyncProgress(force) 2552 return 2553 } 2554 s.reportHealProgress(force) 2555 } 2556 2557 // reportSyncProgress calculates various status reports and provides it to the user. 2558 func (s *Syncer) reportSyncProgress(force bool) { 2559 // Don't report all the events, just occasionally 2560 if !force && time.Since(s.logTime) < 3*time.Second { 2561 return 2562 } 2563 // Don't report anything until we have a meaningful progress 2564 synced := s.accountBytes + s.bytecodeBytes + s.storageBytes 2565 if synced == 0 { 2566 return 2567 } 2568 accountGaps := new(big.Int) 2569 for _, task := range s.tasks { 2570 accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big())) 2571 } 2572 accountFills := new(big.Int).Sub(hashSpace, accountGaps) 2573 if accountFills.BitLen() == 0 { 2574 return 2575 } 2576 s.logTime = time.Now() 2577 estBytes := float64(new(big.Int).Div( 2578 new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), 2579 accountFills, 2580 ).Uint64()) 2581 2582 elapsed := time.Since(s.startTime) 2583 estTime := elapsed / time.Duration(synced) * time.Duration(estBytes) 2584 2585 // Create a mega progress report 2586 var ( 2587 progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes) 2588 accounts = fmt.Sprintf("%d@%v", s.accountSynced, s.accountBytes.TerminalString()) 2589 storage = fmt.Sprintf("%d@%v", s.storageSynced, s.storageBytes.TerminalString()) 2590 bytecode = fmt.Sprintf("%d@%v", s.bytecodeSynced, s.bytecodeBytes.TerminalString()) 2591 ) 2592 log.Info("State sync in progress", "synced", progress, "state", synced, 2593 "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) 2594 } 2595 2596 // reportHealProgress calculates various status reports and provides it to the user. 2597 func (s *Syncer) reportHealProgress(force bool) { 2598 // Don't report all the events, just occasionally 2599 if !force && time.Since(s.logTime) < 3*time.Second { 2600 return 2601 } 2602 s.logTime = time.Now() 2603 2604 // Create a mega progress report 2605 var ( 2606 trienode = fmt.Sprintf("%d@%v", s.trienodeHealSynced, s.trienodeHealBytes.TerminalString()) 2607 bytecode = fmt.Sprintf("%d@%v", s.bytecodeHealSynced, s.bytecodeHealBytes.TerminalString()) 2608 ) 2609 log.Info("State heal in progress", "nodes", trienode, "codes", bytecode, 2610 "pending", s.healer.scheduler.Pending()) 2611 }