github.com/MikyChow/arbitrum-go-ethereum@v0.0.0-20230306102812-078da49636de/eth/protocols/snap/sync.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snap 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "errors" 23 "fmt" 24 gomath "math" 25 "math/big" 26 "math/rand" 27 "sort" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/MikyChow/arbitrum-go-ethereum/common" 33 "github.com/MikyChow/arbitrum-go-ethereum/common/math" 34 "github.com/MikyChow/arbitrum-go-ethereum/core/rawdb" 35 "github.com/MikyChow/arbitrum-go-ethereum/core/state" 36 "github.com/MikyChow/arbitrum-go-ethereum/core/state/snapshot" 37 "github.com/MikyChow/arbitrum-go-ethereum/core/types" 38 "github.com/MikyChow/arbitrum-go-ethereum/crypto" 39 "github.com/MikyChow/arbitrum-go-ethereum/ethdb" 40 "github.com/MikyChow/arbitrum-go-ethereum/event" 41 "github.com/MikyChow/arbitrum-go-ethereum/light" 42 "github.com/MikyChow/arbitrum-go-ethereum/log" 43 "github.com/MikyChow/arbitrum-go-ethereum/p2p/msgrate" 44 "github.com/MikyChow/arbitrum-go-ethereum/rlp" 45 "github.com/MikyChow/arbitrum-go-ethereum/trie" 46 "golang.org/x/crypto/sha3" 47 ) 48 49 var ( 50 // emptyRoot is the known root hash of an empty trie. 51 emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") 52 53 // emptyCode is the known hash of the empty EVM bytecode. 54 emptyCode = crypto.Keccak256Hash(nil) 55 ) 56 57 const ( 58 // minRequestSize is the minimum number of bytes to request from a remote peer. 59 // This number is used as the low cap for account and storage range requests. 60 // Bytecode and trienode are limited inherently by item count (1). 61 minRequestSize = 64 * 1024 62 63 // maxRequestSize is the maximum number of bytes to request from a remote peer. 64 // This number is used as the high cap for account and storage range requests. 65 // Bytecode and trienode are limited more explicitly by the caps below. 66 maxRequestSize = 512 * 1024 67 68 // maxCodeRequestCount is the maximum number of bytecode blobs to request in a 69 // single query. If this number is too low, we're not filling responses fully 70 // and waste round trip times. If it's too high, we're capping responses and 71 // waste bandwidth. 72 // 73 // Depoyed bytecodes are currently capped at 24KB, so the minimum request 74 // size should be maxRequestSize / 24K. Assuming that most contracts do not 75 // come close to that, requesting 4x should be a good approximation. 76 maxCodeRequestCount = maxRequestSize / (24 * 1024) * 4 77 78 // maxTrieRequestCount is the maximum number of trie node blobs to request in 79 // a single query. If this number is too low, we're not filling responses fully 80 // and waste round trip times. If it's too high, we're capping responses and 81 // waste bandwidth. 82 maxTrieRequestCount = maxRequestSize / 512 83 84 // trienodeHealRateMeasurementImpact is the impact a single measurement has on 85 // the local node's trienode processing capacity. A value closer to 0 reacts 86 // slower to sudden changes, but it is also more stable against temporary hiccups. 87 trienodeHealRateMeasurementImpact = 0.005 88 89 // minTrienodeHealThrottle is the minimum divisor for throttling trie node 90 // heal requests to avoid overloading the local node and exessively expanding 91 // the state trie bedth wise. 92 minTrienodeHealThrottle = 1 93 94 // maxTrienodeHealThrottle is the maximum divisor for throttling trie node 95 // heal requests to avoid overloading the local node and exessively expanding 96 // the state trie bedth wise. 97 maxTrienodeHealThrottle = maxTrieRequestCount 98 99 // trienodeHealThrottleIncrease is the multiplier for the throttle when the 100 // rate of arriving data is higher than the rate of processing it. 101 trienodeHealThrottleIncrease = 1.33 102 103 // trienodeHealThrottleDecrease is the divisor for the throttle when the 104 // rate of arriving data is lower than the rate of processing it. 105 trienodeHealThrottleDecrease = 1.25 106 ) 107 108 var ( 109 // accountConcurrency is the number of chunks to split the account trie into 110 // to allow concurrent retrievals. 111 accountConcurrency = 16 112 113 // storageConcurrency is the number of chunks to split the a large contract 114 // storage trie into to allow concurrent retrievals. 115 storageConcurrency = 16 116 ) 117 118 // ErrCancelled is returned from snap syncing if the operation was prematurely 119 // terminated. 120 var ErrCancelled = errors.New("sync cancelled") 121 122 // accountRequest tracks a pending account range request to ensure responses are 123 // to actual requests and to validate any security constraints. 124 // 125 // Concurrency note: account requests and responses are handled concurrently from 126 // the main runloop to allow Merkle proof verifications on the peer's thread and 127 // to drop on invalid response. The request struct must contain all the data to 128 // construct the response without accessing runloop internals (i.e. task). That 129 // is only included to allow the runloop to match a response to the task being 130 // synced without having yet another set of maps. 131 type accountRequest struct { 132 peer string // Peer to which this request is assigned 133 id uint64 // Request ID of this request 134 time time.Time // Timestamp when the request was sent 135 136 deliver chan *accountResponse // Channel to deliver successful response on 137 revert chan *accountRequest // Channel to deliver request failure on 138 cancel chan struct{} // Channel to track sync cancellation 139 timeout *time.Timer // Timer to track delivery timeout 140 stale chan struct{} // Channel to signal the request was dropped 141 142 origin common.Hash // First account requested to allow continuation checks 143 limit common.Hash // Last account requested to allow non-overlapping chunking 144 145 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 146 } 147 148 // accountResponse is an already Merkle-verified remote response to an account 149 // range request. It contains the subtrie for the requested account range and 150 // the database that's going to be filled with the internal nodes on commit. 151 type accountResponse struct { 152 task *accountTask // Task which this request is filling 153 154 hashes []common.Hash // Account hashes in the returned range 155 accounts []*types.StateAccount // Expanded accounts in the returned range 156 157 cont bool // Whether the account range has a continuation 158 } 159 160 // bytecodeRequest tracks a pending bytecode request to ensure responses are to 161 // actual requests and to validate any security constraints. 162 // 163 // Concurrency note: bytecode requests and responses are handled concurrently from 164 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 165 // to drop on invalid response. The request struct must contain all the data to 166 // construct the response without accessing runloop internals (i.e. task). That 167 // is only included to allow the runloop to match a response to the task being 168 // synced without having yet another set of maps. 169 type bytecodeRequest struct { 170 peer string // Peer to which this request is assigned 171 id uint64 // Request ID of this request 172 time time.Time // Timestamp when the request was sent 173 174 deliver chan *bytecodeResponse // Channel to deliver successful response on 175 revert chan *bytecodeRequest // Channel to deliver request failure on 176 cancel chan struct{} // Channel to track sync cancellation 177 timeout *time.Timer // Timer to track delivery timeout 178 stale chan struct{} // Channel to signal the request was dropped 179 180 hashes []common.Hash // Bytecode hashes to validate responses 181 task *accountTask // Task which this request is filling (only access fields through the runloop!!) 182 } 183 184 // bytecodeResponse is an already verified remote response to a bytecode request. 185 type bytecodeResponse struct { 186 task *accountTask // Task which this request is filling 187 188 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 189 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 190 } 191 192 // storageRequest tracks a pending storage ranges request to ensure responses are 193 // to actual requests and to validate any security constraints. 194 // 195 // Concurrency note: storage requests and responses are handled concurrently from 196 // the main runloop to allow Merkle proof verifications on the peer's thread and 197 // to drop on invalid response. The request struct must contain all the data to 198 // construct the response without accessing runloop internals (i.e. tasks). That 199 // is only included to allow the runloop to match a response to the task being 200 // synced without having yet another set of maps. 201 type storageRequest struct { 202 peer string // Peer to which this request is assigned 203 id uint64 // Request ID of this request 204 time time.Time // Timestamp when the request was sent 205 206 deliver chan *storageResponse // Channel to deliver successful response on 207 revert chan *storageRequest // Channel to deliver request failure on 208 cancel chan struct{} // Channel to track sync cancellation 209 timeout *time.Timer // Timer to track delivery timeout 210 stale chan struct{} // Channel to signal the request was dropped 211 212 accounts []common.Hash // Account hashes to validate responses 213 roots []common.Hash // Storage roots to validate responses 214 215 origin common.Hash // First storage slot requested to allow continuation checks 216 limit common.Hash // Last storage slot requested to allow non-overlapping chunking 217 218 mainTask *accountTask // Task which this response belongs to (only access fields through the runloop!!) 219 subTask *storageTask // Task which this response is filling (only access fields through the runloop!!) 220 } 221 222 // storageResponse is an already Merkle-verified remote response to a storage 223 // range request. It contains the subtries for the requested storage ranges and 224 // the databases that's going to be filled with the internal nodes on commit. 225 type storageResponse struct { 226 mainTask *accountTask // Task which this response belongs to 227 subTask *storageTask // Task which this response is filling 228 229 accounts []common.Hash // Account hashes requested, may be only partially filled 230 roots []common.Hash // Storage roots requested, may be only partially filled 231 232 hashes [][]common.Hash // Storage slot hashes in the returned range 233 slots [][][]byte // Storage slot values in the returned range 234 235 cont bool // Whether the last storage range has a continuation 236 } 237 238 // trienodeHealRequest tracks a pending state trie request to ensure responses 239 // are to actual requests and to validate any security constraints. 240 // 241 // Concurrency note: trie node requests and responses are handled concurrently from 242 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 243 // to drop on invalid response. The request struct must contain all the data to 244 // construct the response without accessing runloop internals (i.e. task). That 245 // is only included to allow the runloop to match a response to the task being 246 // synced without having yet another set of maps. 247 type trienodeHealRequest struct { 248 peer string // Peer to which this request is assigned 249 id uint64 // Request ID of this request 250 time time.Time // Timestamp when the request was sent 251 252 deliver chan *trienodeHealResponse // Channel to deliver successful response on 253 revert chan *trienodeHealRequest // Channel to deliver request failure on 254 cancel chan struct{} // Channel to track sync cancellation 255 timeout *time.Timer // Timer to track delivery timeout 256 stale chan struct{} // Channel to signal the request was dropped 257 258 paths []string // Trie node paths for identifying trie node 259 hashes []common.Hash // Trie node hashes to validate responses 260 261 task *healTask // Task which this request is filling (only access fields through the runloop!!) 262 } 263 264 // trienodeHealResponse is an already verified remote response to a trie node request. 265 type trienodeHealResponse struct { 266 task *healTask // Task which this request is filling 267 268 paths []string // Paths of the trie nodes 269 hashes []common.Hash // Hashes of the trie nodes to avoid double hashing 270 nodes [][]byte // Actual trie nodes to store into the database (nil = missing) 271 } 272 273 // bytecodeHealRequest tracks a pending bytecode request to ensure responses are to 274 // actual requests and to validate any security constraints. 275 // 276 // Concurrency note: bytecode requests and responses are handled concurrently from 277 // the main runloop to allow Keccak256 hash verifications on the peer's thread and 278 // to drop on invalid response. The request struct must contain all the data to 279 // construct the response without accessing runloop internals (i.e. task). That 280 // is only included to allow the runloop to match a response to the task being 281 // synced without having yet another set of maps. 282 type bytecodeHealRequest struct { 283 peer string // Peer to which this request is assigned 284 id uint64 // Request ID of this request 285 time time.Time // Timestamp when the request was sent 286 287 deliver chan *bytecodeHealResponse // Channel to deliver successful response on 288 revert chan *bytecodeHealRequest // Channel to deliver request failure on 289 cancel chan struct{} // Channel to track sync cancellation 290 timeout *time.Timer // Timer to track delivery timeout 291 stale chan struct{} // Channel to signal the request was dropped 292 293 hashes []common.Hash // Bytecode hashes to validate responses 294 task *healTask // Task which this request is filling (only access fields through the runloop!!) 295 } 296 297 // bytecodeHealResponse is an already verified remote response to a bytecode request. 298 type bytecodeHealResponse struct { 299 task *healTask // Task which this request is filling 300 301 hashes []common.Hash // Hashes of the bytecode to avoid double hashing 302 codes [][]byte // Actual bytecodes to store into the database (nil = missing) 303 } 304 305 // accountTask represents the sync task for a chunk of the account snapshot. 306 type accountTask struct { 307 // These fields get serialized to leveldb on shutdown 308 Next common.Hash // Next account to sync in this interval 309 Last common.Hash // Last account to sync in this interval 310 SubTasks map[common.Hash][]*storageTask // Storage intervals needing fetching for large contracts 311 312 // These fields are internals used during runtime 313 req *accountRequest // Pending request to fill this task 314 res *accountResponse // Validate response filling this task 315 pend int // Number of pending subtasks for this round 316 317 needCode []bool // Flags whether the filling accounts need code retrieval 318 needState []bool // Flags whether the filling accounts need storage retrieval 319 needHeal []bool // Flags whether the filling accounts's state was chunked and need healing 320 321 codeTasks map[common.Hash]struct{} // Code hashes that need retrieval 322 stateTasks map[common.Hash]common.Hash // Account hashes->roots that need full state retrieval 323 324 genBatch ethdb.Batch // Batch used by the node generator 325 genTrie *trie.StackTrie // Node generator from storage slots 326 327 done bool // Flag whether the task can be removed 328 } 329 330 // storageTask represents the sync task for a chunk of the storage snapshot. 331 type storageTask struct { 332 Next common.Hash // Next account to sync in this interval 333 Last common.Hash // Last account to sync in this interval 334 335 // These fields are internals used during runtime 336 root common.Hash // Storage root hash for this instance 337 req *storageRequest // Pending request to fill this task 338 339 genBatch ethdb.Batch // Batch used by the node generator 340 genTrie *trie.StackTrie // Node generator from storage slots 341 342 done bool // Flag whether the task can be removed 343 } 344 345 // healTask represents the sync task for healing the snap-synced chunk boundaries. 346 type healTask struct { 347 scheduler *trie.Sync // State trie sync scheduler defining the tasks 348 349 trieTasks map[string]common.Hash // Set of trie node tasks currently queued for retrieval, indexed by node path 350 codeTasks map[common.Hash]struct{} // Set of byte code tasks currently queued for retrieval, indexed by code hash 351 } 352 353 // SyncProgress is a database entry to allow suspending and resuming a snapshot state 354 // sync. Opposed to full and fast sync, there is no way to restart a suspended 355 // snap sync without prior knowledge of the suspension point. 356 type SyncProgress struct { 357 Tasks []*accountTask // The suspended account tasks (contract tasks within) 358 359 // Status report during syncing phase 360 AccountSynced uint64 // Number of accounts downloaded 361 AccountBytes common.StorageSize // Number of account trie bytes persisted to disk 362 BytecodeSynced uint64 // Number of bytecodes downloaded 363 BytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 364 StorageSynced uint64 // Number of storage slots downloaded 365 StorageBytes common.StorageSize // Number of storage trie bytes persisted to disk 366 367 // Status report during healing phase 368 TrienodeHealSynced uint64 // Number of state trie nodes downloaded 369 TrienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 370 BytecodeHealSynced uint64 // Number of bytecodes downloaded 371 BytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 372 } 373 374 // SyncPending is analogous to SyncProgress, but it's used to report on pending 375 // ephemeral sync progress that doesn't get persisted into the database. 376 type SyncPending struct { 377 TrienodeHeal uint64 // Number of state trie nodes pending 378 BytecodeHeal uint64 // Number of bytecodes pending 379 } 380 381 // SyncPeer abstracts out the methods required for a peer to be synced against 382 // with the goal of allowing the construction of mock peers without the full 383 // blown networking. 384 type SyncPeer interface { 385 // ID retrieves the peer's unique identifier. 386 ID() string 387 388 // RequestAccountRange fetches a batch of accounts rooted in a specific account 389 // trie, starting with the origin. 390 RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error 391 392 // RequestStorageRanges fetches a batch of storage slots belonging to one or 393 // more accounts. If slots from only one account is requested, an origin marker 394 // may also be used to retrieve from there. 395 RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error 396 397 // RequestByteCodes fetches a batch of bytecodes by hash. 398 RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error 399 400 // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in 401 // a specific state trie. 402 RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error 403 404 // Log retrieves the peer's own contextual logger. 405 Log() log.Logger 406 } 407 408 // Syncer is an Ethereum account and storage trie syncer based on snapshots and 409 // the snap protocol. It's purpose is to download all the accounts and storage 410 // slots from remote peers and reassemble chunks of the state trie, on top of 411 // which a state sync can be run to fix any gaps / overlaps. 412 // 413 // Every network request has a variety of failure events: 414 // - The peer disconnects after task assignment, failing to send the request 415 // - The peer disconnects after sending the request, before delivering on it 416 // - The peer remains connected, but does not deliver a response in time 417 // - The peer delivers a stale response after a previous timeout 418 // - The peer delivers a refusal to serve the requested state 419 type Syncer struct { 420 db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) 421 422 root common.Hash // Current state trie root being synced 423 tasks []*accountTask // Current account task set being synced 424 snapped bool // Flag to signal that snap phase is done 425 healer *healTask // Current state healing task being executed 426 update chan struct{} // Notification channel for possible sync progression 427 428 peers map[string]SyncPeer // Currently active peers to download from 429 peerJoin *event.Feed // Event feed to react to peers joining 430 peerDrop *event.Feed // Event feed to react to peers dropping 431 rates *msgrate.Trackers // Message throughput rates for peers 432 433 // Request tracking during syncing phase 434 statelessPeers map[string]struct{} // Peers that failed to deliver state data 435 accountIdlers map[string]struct{} // Peers that aren't serving account requests 436 bytecodeIdlers map[string]struct{} // Peers that aren't serving bytecode requests 437 storageIdlers map[string]struct{} // Peers that aren't serving storage requests 438 439 accountReqs map[uint64]*accountRequest // Account requests currently running 440 bytecodeReqs map[uint64]*bytecodeRequest // Bytecode requests currently running 441 storageReqs map[uint64]*storageRequest // Storage requests currently running 442 443 accountSynced uint64 // Number of accounts downloaded 444 accountBytes common.StorageSize // Number of account trie bytes persisted to disk 445 bytecodeSynced uint64 // Number of bytecodes downloaded 446 bytecodeBytes common.StorageSize // Number of bytecode bytes downloaded 447 storageSynced uint64 // Number of storage slots downloaded 448 storageBytes common.StorageSize // Number of storage trie bytes persisted to disk 449 450 extProgress *SyncProgress // progress that can be exposed to external caller. 451 452 // Request tracking during healing phase 453 trienodeHealIdlers map[string]struct{} // Peers that aren't serving trie node requests 454 bytecodeHealIdlers map[string]struct{} // Peers that aren't serving bytecode requests 455 456 trienodeHealReqs map[uint64]*trienodeHealRequest // Trie node requests currently running 457 bytecodeHealReqs map[uint64]*bytecodeHealRequest // Bytecode requests currently running 458 459 trienodeHealRate float64 // Average heal rate for processing trie node data 460 trienodeHealPend uint64 // Number of trie nodes currently pending for processing 461 trienodeHealThrottle float64 // Divisor for throttling the amount of trienode heal data requested 462 trienodeHealThrottled time.Time // Timestamp the last time the throttle was updated 463 464 trienodeHealSynced uint64 // Number of state trie nodes downloaded 465 trienodeHealBytes common.StorageSize // Number of state trie bytes persisted to disk 466 trienodeHealDups uint64 // Number of state trie nodes already processed 467 trienodeHealNops uint64 // Number of state trie nodes not requested 468 bytecodeHealSynced uint64 // Number of bytecodes downloaded 469 bytecodeHealBytes common.StorageSize // Number of bytecodes persisted to disk 470 bytecodeHealDups uint64 // Number of bytecodes already processed 471 bytecodeHealNops uint64 // Number of bytecodes not requested 472 473 stateWriter ethdb.Batch // Shared batch writer used for persisting raw states 474 accountHealed uint64 // Number of accounts downloaded during the healing stage 475 accountHealedBytes common.StorageSize // Number of raw account bytes persisted to disk during the healing stage 476 storageHealed uint64 // Number of storage slots downloaded during the healing stage 477 storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage 478 479 startTime time.Time // Time instance when snapshot sync started 480 logTime time.Time // Time instance when status was last reported 481 482 pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown 483 lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) 484 } 485 486 // NewSyncer creates a new snapshot syncer to download the Ethereum state over the 487 // snap protocol. 488 func NewSyncer(db ethdb.KeyValueStore) *Syncer { 489 return &Syncer{ 490 db: db, 491 492 peers: make(map[string]SyncPeer), 493 peerJoin: new(event.Feed), 494 peerDrop: new(event.Feed), 495 rates: msgrate.NewTrackers(log.New("proto", "snap")), 496 update: make(chan struct{}, 1), 497 498 accountIdlers: make(map[string]struct{}), 499 storageIdlers: make(map[string]struct{}), 500 bytecodeIdlers: make(map[string]struct{}), 501 502 accountReqs: make(map[uint64]*accountRequest), 503 storageReqs: make(map[uint64]*storageRequest), 504 bytecodeReqs: make(map[uint64]*bytecodeRequest), 505 506 trienodeHealIdlers: make(map[string]struct{}), 507 bytecodeHealIdlers: make(map[string]struct{}), 508 509 trienodeHealReqs: make(map[uint64]*trienodeHealRequest), 510 bytecodeHealReqs: make(map[uint64]*bytecodeHealRequest), 511 trienodeHealThrottle: maxTrienodeHealThrottle, // Tune downward instead of insta-filling with junk 512 stateWriter: db.NewBatch(), 513 514 extProgress: new(SyncProgress), 515 } 516 } 517 518 // Register injects a new data source into the syncer's peerset. 519 func (s *Syncer) Register(peer SyncPeer) error { 520 // Make sure the peer is not registered yet 521 id := peer.ID() 522 523 s.lock.Lock() 524 if _, ok := s.peers[id]; ok { 525 log.Error("Snap peer already registered", "id", id) 526 527 s.lock.Unlock() 528 return errors.New("already registered") 529 } 530 s.peers[id] = peer 531 s.rates.Track(id, msgrate.NewTracker(s.rates.MeanCapacities(), s.rates.MedianRoundTrip())) 532 533 // Mark the peer as idle, even if no sync is running 534 s.accountIdlers[id] = struct{}{} 535 s.storageIdlers[id] = struct{}{} 536 s.bytecodeIdlers[id] = struct{}{} 537 s.trienodeHealIdlers[id] = struct{}{} 538 s.bytecodeHealIdlers[id] = struct{}{} 539 s.lock.Unlock() 540 541 // Notify any active syncs that a new peer can be assigned data 542 s.peerJoin.Send(id) 543 return nil 544 } 545 546 // Unregister injects a new data source into the syncer's peerset. 547 func (s *Syncer) Unregister(id string) error { 548 // Remove all traces of the peer from the registry 549 s.lock.Lock() 550 if _, ok := s.peers[id]; !ok { 551 log.Error("Snap peer not registered", "id", id) 552 553 s.lock.Unlock() 554 return errors.New("not registered") 555 } 556 delete(s.peers, id) 557 s.rates.Untrack(id) 558 559 // Remove status markers, even if no sync is running 560 delete(s.statelessPeers, id) 561 562 delete(s.accountIdlers, id) 563 delete(s.storageIdlers, id) 564 delete(s.bytecodeIdlers, id) 565 delete(s.trienodeHealIdlers, id) 566 delete(s.bytecodeHealIdlers, id) 567 s.lock.Unlock() 568 569 // Notify any active syncs that pending requests need to be reverted 570 s.peerDrop.Send(id) 571 return nil 572 } 573 574 // Sync starts (or resumes a previous) sync cycle to iterate over a state trie 575 // with the given root and reconstruct the nodes based on the snapshot leaves. 576 // Previously downloaded segments will not be redownloaded of fixed, rather any 577 // errors will be healed after the leaves are fully accumulated. 578 func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { 579 // Move the trie root from any previous value, revert stateless markers for 580 // any peers and initialize the syncer if it was not yet run 581 s.lock.Lock() 582 s.root = root 583 s.healer = &healTask{ 584 scheduler: state.NewStateSync(root, s.db, s.onHealState), 585 trieTasks: make(map[string]common.Hash), 586 codeTasks: make(map[common.Hash]struct{}), 587 } 588 s.statelessPeers = make(map[string]struct{}) 589 s.lock.Unlock() 590 591 if s.startTime == (time.Time{}) { 592 s.startTime = time.Now() 593 } 594 // Retrieve the previous sync status from LevelDB and abort if already synced 595 s.loadSyncStatus() 596 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 597 log.Debug("Snapshot sync already completed") 598 return nil 599 } 600 defer func() { // Persist any progress, independent of failure 601 for _, task := range s.tasks { 602 s.forwardAccountTask(task) 603 } 604 s.cleanAccountTasks() 605 s.saveSyncStatus() 606 }() 607 608 log.Debug("Starting snapshot sync cycle", "root", root) 609 610 // Flush out the last committed raw states 611 defer func() { 612 if s.stateWriter.ValueSize() > 0 { 613 s.stateWriter.Write() 614 s.stateWriter.Reset() 615 } 616 }() 617 defer s.report(true) 618 619 // Whether sync completed or not, disregard any future packets 620 defer func() { 621 log.Debug("Terminating snapshot sync cycle", "root", root) 622 s.lock.Lock() 623 s.accountReqs = make(map[uint64]*accountRequest) 624 s.storageReqs = make(map[uint64]*storageRequest) 625 s.bytecodeReqs = make(map[uint64]*bytecodeRequest) 626 s.trienodeHealReqs = make(map[uint64]*trienodeHealRequest) 627 s.bytecodeHealReqs = make(map[uint64]*bytecodeHealRequest) 628 s.lock.Unlock() 629 }() 630 // Keep scheduling sync tasks 631 peerJoin := make(chan string, 16) 632 peerJoinSub := s.peerJoin.Subscribe(peerJoin) 633 defer peerJoinSub.Unsubscribe() 634 635 peerDrop := make(chan string, 16) 636 peerDropSub := s.peerDrop.Subscribe(peerDrop) 637 defer peerDropSub.Unsubscribe() 638 639 // Create a set of unique channels for this sync cycle. We need these to be 640 // ephemeral so a data race doesn't accidentally deliver something stale on 641 // a persistent channel across syncs (yup, this happened) 642 var ( 643 accountReqFails = make(chan *accountRequest) 644 storageReqFails = make(chan *storageRequest) 645 bytecodeReqFails = make(chan *bytecodeRequest) 646 accountResps = make(chan *accountResponse) 647 storageResps = make(chan *storageResponse) 648 bytecodeResps = make(chan *bytecodeResponse) 649 trienodeHealReqFails = make(chan *trienodeHealRequest) 650 bytecodeHealReqFails = make(chan *bytecodeHealRequest) 651 trienodeHealResps = make(chan *trienodeHealResponse) 652 bytecodeHealResps = make(chan *bytecodeHealResponse) 653 ) 654 for { 655 // Remove all completed tasks and terminate sync if everything's done 656 s.cleanStorageTasks() 657 s.cleanAccountTasks() 658 if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 { 659 return nil 660 } 661 // Assign all the data retrieval tasks to any free peers 662 s.assignAccountTasks(accountResps, accountReqFails, cancel) 663 s.assignBytecodeTasks(bytecodeResps, bytecodeReqFails, cancel) 664 s.assignStorageTasks(storageResps, storageReqFails, cancel) 665 666 if len(s.tasks) == 0 { 667 // Sync phase done, run heal phase 668 s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel) 669 s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel) 670 } 671 // Update sync progress 672 s.lock.Lock() 673 s.extProgress = &SyncProgress{ 674 AccountSynced: s.accountSynced, 675 AccountBytes: s.accountBytes, 676 BytecodeSynced: s.bytecodeSynced, 677 BytecodeBytes: s.bytecodeBytes, 678 StorageSynced: s.storageSynced, 679 StorageBytes: s.storageBytes, 680 TrienodeHealSynced: s.trienodeHealSynced, 681 TrienodeHealBytes: s.trienodeHealBytes, 682 BytecodeHealSynced: s.bytecodeHealSynced, 683 BytecodeHealBytes: s.bytecodeHealBytes, 684 } 685 s.lock.Unlock() 686 // Wait for something to happen 687 select { 688 case <-s.update: 689 // Something happened (new peer, delivery, timeout), recheck tasks 690 case <-peerJoin: 691 // A new peer joined, try to schedule it new tasks 692 case id := <-peerDrop: 693 s.revertRequests(id) 694 case <-cancel: 695 return ErrCancelled 696 697 case req := <-accountReqFails: 698 s.revertAccountRequest(req) 699 case req := <-bytecodeReqFails: 700 s.revertBytecodeRequest(req) 701 case req := <-storageReqFails: 702 s.revertStorageRequest(req) 703 case req := <-trienodeHealReqFails: 704 s.revertTrienodeHealRequest(req) 705 case req := <-bytecodeHealReqFails: 706 s.revertBytecodeHealRequest(req) 707 708 case res := <-accountResps: 709 s.processAccountResponse(res) 710 case res := <-bytecodeResps: 711 s.processBytecodeResponse(res) 712 case res := <-storageResps: 713 s.processStorageResponse(res) 714 case res := <-trienodeHealResps: 715 s.processTrienodeHealResponse(res) 716 case res := <-bytecodeHealResps: 717 s.processBytecodeHealResponse(res) 718 } 719 // Report stats if something meaningful happened 720 s.report(false) 721 } 722 } 723 724 // loadSyncStatus retrieves a previously aborted sync status from the database, 725 // or generates a fresh one if none is available. 726 func (s *Syncer) loadSyncStatus() { 727 var progress SyncProgress 728 729 if status := rawdb.ReadSnapshotSyncStatus(s.db); status != nil { 730 if err := json.Unmarshal(status, &progress); err != nil { 731 log.Error("Failed to decode snap sync status", "err", err) 732 } else { 733 for _, task := range progress.Tasks { 734 log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) 735 } 736 s.tasks = progress.Tasks 737 for _, task := range s.tasks { 738 task.genBatch = ethdb.HookedBatch{ 739 Batch: s.db.NewBatch(), 740 OnPut: func(key []byte, value []byte) { 741 s.accountBytes += common.StorageSize(len(key) + len(value)) 742 }, 743 } 744 task.genTrie = trie.NewStackTrie(task.genBatch) 745 746 for accountHash, subtasks := range task.SubTasks { 747 for _, subtask := range subtasks { 748 subtask.genBatch = ethdb.HookedBatch{ 749 Batch: s.db.NewBatch(), 750 OnPut: func(key []byte, value []byte) { 751 s.storageBytes += common.StorageSize(len(key) + len(value)) 752 }, 753 } 754 subtask.genTrie = trie.NewStackTrieWithOwner(subtask.genBatch, accountHash) 755 } 756 } 757 } 758 s.lock.Lock() 759 defer s.lock.Unlock() 760 761 s.snapped = len(s.tasks) == 0 762 763 s.accountSynced = progress.AccountSynced 764 s.accountBytes = progress.AccountBytes 765 s.bytecodeSynced = progress.BytecodeSynced 766 s.bytecodeBytes = progress.BytecodeBytes 767 s.storageSynced = progress.StorageSynced 768 s.storageBytes = progress.StorageBytes 769 770 s.trienodeHealSynced = progress.TrienodeHealSynced 771 s.trienodeHealBytes = progress.TrienodeHealBytes 772 s.bytecodeHealSynced = progress.BytecodeHealSynced 773 s.bytecodeHealBytes = progress.BytecodeHealBytes 774 return 775 } 776 } 777 // Either we've failed to decode the previous state, or there was none. 778 // Start a fresh sync by chunking up the account range and scheduling 779 // them for retrieval. 780 s.tasks = nil 781 s.accountSynced, s.accountBytes = 0, 0 782 s.bytecodeSynced, s.bytecodeBytes = 0, 0 783 s.storageSynced, s.storageBytes = 0, 0 784 s.trienodeHealSynced, s.trienodeHealBytes = 0, 0 785 s.bytecodeHealSynced, s.bytecodeHealBytes = 0, 0 786 787 var next common.Hash 788 step := new(big.Int).Sub( 789 new(big.Int).Div( 790 new(big.Int).Exp(common.Big2, common.Big256, nil), 791 big.NewInt(int64(accountConcurrency)), 792 ), common.Big1, 793 ) 794 for i := 0; i < accountConcurrency; i++ { 795 last := common.BigToHash(new(big.Int).Add(next.Big(), step)) 796 if i == accountConcurrency-1 { 797 // Make sure we don't overflow if the step is not a proper divisor 798 last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") 799 } 800 batch := ethdb.HookedBatch{ 801 Batch: s.db.NewBatch(), 802 OnPut: func(key []byte, value []byte) { 803 s.accountBytes += common.StorageSize(len(key) + len(value)) 804 }, 805 } 806 s.tasks = append(s.tasks, &accountTask{ 807 Next: next, 808 Last: last, 809 SubTasks: make(map[common.Hash][]*storageTask), 810 genBatch: batch, 811 genTrie: trie.NewStackTrie(batch), 812 }) 813 log.Debug("Created account sync task", "from", next, "last", last) 814 next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) 815 } 816 } 817 818 // saveSyncStatus marshals the remaining sync tasks into leveldb. 819 func (s *Syncer) saveSyncStatus() { 820 // Serialize any partial progress to disk before spinning down 821 for _, task := range s.tasks { 822 if err := task.genBatch.Write(); err != nil { 823 log.Error("Failed to persist account slots", "err", err) 824 } 825 for _, subtasks := range task.SubTasks { 826 for _, subtask := range subtasks { 827 if err := subtask.genBatch.Write(); err != nil { 828 log.Error("Failed to persist storage slots", "err", err) 829 } 830 } 831 } 832 } 833 // Store the actual progress markers 834 progress := &SyncProgress{ 835 Tasks: s.tasks, 836 AccountSynced: s.accountSynced, 837 AccountBytes: s.accountBytes, 838 BytecodeSynced: s.bytecodeSynced, 839 BytecodeBytes: s.bytecodeBytes, 840 StorageSynced: s.storageSynced, 841 StorageBytes: s.storageBytes, 842 TrienodeHealSynced: s.trienodeHealSynced, 843 TrienodeHealBytes: s.trienodeHealBytes, 844 BytecodeHealSynced: s.bytecodeHealSynced, 845 BytecodeHealBytes: s.bytecodeHealBytes, 846 } 847 status, err := json.Marshal(progress) 848 if err != nil { 849 panic(err) // This can only fail during implementation 850 } 851 rawdb.WriteSnapshotSyncStatus(s.db, status) 852 } 853 854 // Progress returns the snap sync status statistics. 855 func (s *Syncer) Progress() (*SyncProgress, *SyncPending) { 856 s.lock.Lock() 857 defer s.lock.Unlock() 858 pending := new(SyncPending) 859 if s.healer != nil { 860 pending.TrienodeHeal = uint64(len(s.healer.trieTasks)) 861 pending.BytecodeHeal = uint64(len(s.healer.codeTasks)) 862 } 863 return s.extProgress, pending 864 } 865 866 // cleanAccountTasks removes account range retrieval tasks that have already been 867 // completed. 868 func (s *Syncer) cleanAccountTasks() { 869 // If the sync was already done before, don't even bother 870 if len(s.tasks) == 0 { 871 return 872 } 873 // Sync wasn't finished previously, check for any task that can be finalized 874 for i := 0; i < len(s.tasks); i++ { 875 if s.tasks[i].done { 876 s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) 877 i-- 878 } 879 } 880 // If everything was just finalized just, generate the account trie and start heal 881 if len(s.tasks) == 0 { 882 s.lock.Lock() 883 s.snapped = true 884 s.lock.Unlock() 885 886 // Push the final sync report 887 s.reportSyncProgress(true) 888 } 889 } 890 891 // cleanStorageTasks iterates over all the account tasks and storage sub-tasks 892 // within, cleaning any that have been completed. 893 func (s *Syncer) cleanStorageTasks() { 894 for _, task := range s.tasks { 895 for account, subtasks := range task.SubTasks { 896 // Remove storage range retrieval tasks that completed 897 for j := 0; j < len(subtasks); j++ { 898 if subtasks[j].done { 899 subtasks = append(subtasks[:j], subtasks[j+1:]...) 900 j-- 901 } 902 } 903 if len(subtasks) > 0 { 904 task.SubTasks[account] = subtasks 905 continue 906 } 907 // If all storage chunks are done, mark the account as done too 908 for j, hash := range task.res.hashes { 909 if hash == account { 910 task.needState[j] = false 911 } 912 } 913 delete(task.SubTasks, account) 914 task.pend-- 915 916 // If this was the last pending task, forward the account task 917 if task.pend == 0 { 918 s.forwardAccountTask(task) 919 } 920 } 921 } 922 } 923 924 // assignAccountTasks attempts to match idle peers to pending account range 925 // retrievals. 926 func (s *Syncer) assignAccountTasks(success chan *accountResponse, fail chan *accountRequest, cancel chan struct{}) { 927 s.lock.Lock() 928 defer s.lock.Unlock() 929 930 // Sort the peers by download capacity to use faster ones if many available 931 idlers := &capacitySort{ 932 ids: make([]string, 0, len(s.accountIdlers)), 933 caps: make([]int, 0, len(s.accountIdlers)), 934 } 935 targetTTL := s.rates.TargetTimeout() 936 for id := range s.accountIdlers { 937 if _, ok := s.statelessPeers[id]; ok { 938 continue 939 } 940 idlers.ids = append(idlers.ids, id) 941 idlers.caps = append(idlers.caps, s.rates.Capacity(id, AccountRangeMsg, targetTTL)) 942 } 943 if len(idlers.ids) == 0 { 944 return 945 } 946 sort.Sort(sort.Reverse(idlers)) 947 948 // Iterate over all the tasks and try to find a pending one 949 for _, task := range s.tasks { 950 // Skip any tasks already filling 951 if task.req != nil || task.res != nil { 952 continue 953 } 954 // Task pending retrieval, try to find an idle peer. If no such peer 955 // exists, we probably assigned tasks for all (or they are stateless). 956 // Abort the entire assignment mechanism. 957 if len(idlers.ids) == 0 { 958 return 959 } 960 var ( 961 idle = idlers.ids[0] 962 peer = s.peers[idle] 963 cap = idlers.caps[0] 964 ) 965 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 966 967 // Matched a pending task to an idle peer, allocate a unique request id 968 var reqid uint64 969 for { 970 reqid = uint64(rand.Int63()) 971 if reqid == 0 { 972 continue 973 } 974 if _, ok := s.accountReqs[reqid]; ok { 975 continue 976 } 977 break 978 } 979 // Generate the network query and send it to the peer 980 req := &accountRequest{ 981 peer: idle, 982 id: reqid, 983 time: time.Now(), 984 deliver: success, 985 revert: fail, 986 cancel: cancel, 987 stale: make(chan struct{}), 988 origin: task.Next, 989 limit: task.Last, 990 task: task, 991 } 992 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 993 peer.Log().Debug("Account range request timed out", "reqid", reqid) 994 s.rates.Update(idle, AccountRangeMsg, 0, 0) 995 s.scheduleRevertAccountRequest(req) 996 }) 997 s.accountReqs[reqid] = req 998 delete(s.accountIdlers, idle) 999 1000 s.pend.Add(1) 1001 go func(root common.Hash) { 1002 defer s.pend.Done() 1003 1004 // Attempt to send the remote request and revert if it fails 1005 if cap > maxRequestSize { 1006 cap = maxRequestSize 1007 } 1008 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1009 cap = minRequestSize 1010 } 1011 if err := peer.RequestAccountRange(reqid, root, req.origin, req.limit, uint64(cap)); err != nil { 1012 peer.Log().Debug("Failed to request account range", "err", err) 1013 s.scheduleRevertAccountRequest(req) 1014 } 1015 }(s.root) 1016 1017 // Inject the request into the task to block further assignments 1018 task.req = req 1019 } 1020 } 1021 1022 // assignBytecodeTasks attempts to match idle peers to pending code retrievals. 1023 func (s *Syncer) assignBytecodeTasks(success chan *bytecodeResponse, fail chan *bytecodeRequest, cancel chan struct{}) { 1024 s.lock.Lock() 1025 defer s.lock.Unlock() 1026 1027 // Sort the peers by download capacity to use faster ones if many available 1028 idlers := &capacitySort{ 1029 ids: make([]string, 0, len(s.bytecodeIdlers)), 1030 caps: make([]int, 0, len(s.bytecodeIdlers)), 1031 } 1032 targetTTL := s.rates.TargetTimeout() 1033 for id := range s.bytecodeIdlers { 1034 if _, ok := s.statelessPeers[id]; ok { 1035 continue 1036 } 1037 idlers.ids = append(idlers.ids, id) 1038 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1039 } 1040 if len(idlers.ids) == 0 { 1041 return 1042 } 1043 sort.Sort(sort.Reverse(idlers)) 1044 1045 // Iterate over all the tasks and try to find a pending one 1046 for _, task := range s.tasks { 1047 // Skip any tasks not in the bytecode retrieval phase 1048 if task.res == nil { 1049 continue 1050 } 1051 // Skip tasks that are already retrieving (or done with) all codes 1052 if len(task.codeTasks) == 0 { 1053 continue 1054 } 1055 // Task pending retrieval, try to find an idle peer. If no such peer 1056 // exists, we probably assigned tasks for all (or they are stateless). 1057 // Abort the entire assignment mechanism. 1058 if len(idlers.ids) == 0 { 1059 return 1060 } 1061 var ( 1062 idle = idlers.ids[0] 1063 peer = s.peers[idle] 1064 cap = idlers.caps[0] 1065 ) 1066 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1067 1068 // Matched a pending task to an idle peer, allocate a unique request id 1069 var reqid uint64 1070 for { 1071 reqid = uint64(rand.Int63()) 1072 if reqid == 0 { 1073 continue 1074 } 1075 if _, ok := s.bytecodeReqs[reqid]; ok { 1076 continue 1077 } 1078 break 1079 } 1080 // Generate the network query and send it to the peer 1081 if cap > maxCodeRequestCount { 1082 cap = maxCodeRequestCount 1083 } 1084 hashes := make([]common.Hash, 0, cap) 1085 for hash := range task.codeTasks { 1086 delete(task.codeTasks, hash) 1087 hashes = append(hashes, hash) 1088 if len(hashes) >= cap { 1089 break 1090 } 1091 } 1092 req := &bytecodeRequest{ 1093 peer: idle, 1094 id: reqid, 1095 time: time.Now(), 1096 deliver: success, 1097 revert: fail, 1098 cancel: cancel, 1099 stale: make(chan struct{}), 1100 hashes: hashes, 1101 task: task, 1102 } 1103 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1104 peer.Log().Debug("Bytecode request timed out", "reqid", reqid) 1105 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1106 s.scheduleRevertBytecodeRequest(req) 1107 }) 1108 s.bytecodeReqs[reqid] = req 1109 delete(s.bytecodeIdlers, idle) 1110 1111 s.pend.Add(1) 1112 go func() { 1113 defer s.pend.Done() 1114 1115 // Attempt to send the remote request and revert if it fails 1116 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1117 log.Debug("Failed to request bytecodes", "err", err) 1118 s.scheduleRevertBytecodeRequest(req) 1119 } 1120 }() 1121 } 1122 } 1123 1124 // assignStorageTasks attempts to match idle peers to pending storage range 1125 // retrievals. 1126 func (s *Syncer) assignStorageTasks(success chan *storageResponse, fail chan *storageRequest, cancel chan struct{}) { 1127 s.lock.Lock() 1128 defer s.lock.Unlock() 1129 1130 // Sort the peers by download capacity to use faster ones if many available 1131 idlers := &capacitySort{ 1132 ids: make([]string, 0, len(s.storageIdlers)), 1133 caps: make([]int, 0, len(s.storageIdlers)), 1134 } 1135 targetTTL := s.rates.TargetTimeout() 1136 for id := range s.storageIdlers { 1137 if _, ok := s.statelessPeers[id]; ok { 1138 continue 1139 } 1140 idlers.ids = append(idlers.ids, id) 1141 idlers.caps = append(idlers.caps, s.rates.Capacity(id, StorageRangesMsg, targetTTL)) 1142 } 1143 if len(idlers.ids) == 0 { 1144 return 1145 } 1146 sort.Sort(sort.Reverse(idlers)) 1147 1148 // Iterate over all the tasks and try to find a pending one 1149 for _, task := range s.tasks { 1150 // Skip any tasks not in the storage retrieval phase 1151 if task.res == nil { 1152 continue 1153 } 1154 // Skip tasks that are already retrieving (or done with) all small states 1155 if len(task.SubTasks) == 0 && len(task.stateTasks) == 0 { 1156 continue 1157 } 1158 // Task pending retrieval, try to find an idle peer. If no such peer 1159 // exists, we probably assigned tasks for all (or they are stateless). 1160 // Abort the entire assignment mechanism. 1161 if len(idlers.ids) == 0 { 1162 return 1163 } 1164 var ( 1165 idle = idlers.ids[0] 1166 peer = s.peers[idle] 1167 cap = idlers.caps[0] 1168 ) 1169 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1170 1171 // Matched a pending task to an idle peer, allocate a unique request id 1172 var reqid uint64 1173 for { 1174 reqid = uint64(rand.Int63()) 1175 if reqid == 0 { 1176 continue 1177 } 1178 if _, ok := s.storageReqs[reqid]; ok { 1179 continue 1180 } 1181 break 1182 } 1183 // Generate the network query and send it to the peer. If there are 1184 // large contract tasks pending, complete those before diving into 1185 // even more new contracts. 1186 if cap > maxRequestSize { 1187 cap = maxRequestSize 1188 } 1189 if cap < minRequestSize { // Don't bother with peers below a bare minimum performance 1190 cap = minRequestSize 1191 } 1192 storageSets := cap / 1024 1193 1194 var ( 1195 accounts = make([]common.Hash, 0, storageSets) 1196 roots = make([]common.Hash, 0, storageSets) 1197 subtask *storageTask 1198 ) 1199 for account, subtasks := range task.SubTasks { 1200 for _, st := range subtasks { 1201 // Skip any subtasks already filling 1202 if st.req != nil { 1203 continue 1204 } 1205 // Found an incomplete storage chunk, schedule it 1206 accounts = append(accounts, account) 1207 roots = append(roots, st.root) 1208 subtask = st 1209 break // Large contract chunks are downloaded individually 1210 } 1211 if subtask != nil { 1212 break // Large contract chunks are downloaded individually 1213 } 1214 } 1215 if subtask == nil { 1216 // No large contract required retrieval, but small ones available 1217 for account, root := range task.stateTasks { 1218 delete(task.stateTasks, account) 1219 1220 accounts = append(accounts, account) 1221 roots = append(roots, root) 1222 1223 if len(accounts) >= storageSets { 1224 break 1225 } 1226 } 1227 } 1228 // If nothing was found, it means this task is actually already fully 1229 // retrieving, but large contracts are hard to detect. Skip to the next. 1230 if len(accounts) == 0 { 1231 continue 1232 } 1233 req := &storageRequest{ 1234 peer: idle, 1235 id: reqid, 1236 time: time.Now(), 1237 deliver: success, 1238 revert: fail, 1239 cancel: cancel, 1240 stale: make(chan struct{}), 1241 accounts: accounts, 1242 roots: roots, 1243 mainTask: task, 1244 subTask: subtask, 1245 } 1246 if subtask != nil { 1247 req.origin = subtask.Next 1248 req.limit = subtask.Last 1249 } 1250 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1251 peer.Log().Debug("Storage request timed out", "reqid", reqid) 1252 s.rates.Update(idle, StorageRangesMsg, 0, 0) 1253 s.scheduleRevertStorageRequest(req) 1254 }) 1255 s.storageReqs[reqid] = req 1256 delete(s.storageIdlers, idle) 1257 1258 s.pend.Add(1) 1259 go func(root common.Hash) { 1260 defer s.pend.Done() 1261 1262 // Attempt to send the remote request and revert if it fails 1263 var origin, limit []byte 1264 if subtask != nil { 1265 origin, limit = req.origin[:], req.limit[:] 1266 } 1267 if err := peer.RequestStorageRanges(reqid, root, accounts, origin, limit, uint64(cap)); err != nil { 1268 log.Debug("Failed to request storage", "err", err) 1269 s.scheduleRevertStorageRequest(req) 1270 } 1271 }(s.root) 1272 1273 // Inject the request into the subtask to block further assignments 1274 if subtask != nil { 1275 subtask.req = req 1276 } 1277 } 1278 } 1279 1280 // assignTrienodeHealTasks attempts to match idle peers to trie node requests to 1281 // heal any trie errors caused by the snap sync's chunked retrieval model. 1282 func (s *Syncer) assignTrienodeHealTasks(success chan *trienodeHealResponse, fail chan *trienodeHealRequest, cancel chan struct{}) { 1283 s.lock.Lock() 1284 defer s.lock.Unlock() 1285 1286 // Sort the peers by download capacity to use faster ones if many available 1287 idlers := &capacitySort{ 1288 ids: make([]string, 0, len(s.trienodeHealIdlers)), 1289 caps: make([]int, 0, len(s.trienodeHealIdlers)), 1290 } 1291 targetTTL := s.rates.TargetTimeout() 1292 for id := range s.trienodeHealIdlers { 1293 if _, ok := s.statelessPeers[id]; ok { 1294 continue 1295 } 1296 idlers.ids = append(idlers.ids, id) 1297 idlers.caps = append(idlers.caps, s.rates.Capacity(id, TrieNodesMsg, targetTTL)) 1298 } 1299 if len(idlers.ids) == 0 { 1300 return 1301 } 1302 sort.Sort(sort.Reverse(idlers)) 1303 1304 // Iterate over pending tasks and try to find a peer to retrieve with 1305 for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1306 // If there are not enough trie tasks queued to fully assign, fill the 1307 // queue from the state sync scheduler. The trie synced schedules these 1308 // together with bytecodes, so we need to queue them combined. 1309 var ( 1310 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1311 want = maxTrieRequestCount + maxCodeRequestCount 1312 ) 1313 if have < want { 1314 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1315 for i, path := range paths { 1316 s.healer.trieTasks[path] = hashes[i] 1317 } 1318 for _, hash := range codes { 1319 s.healer.codeTasks[hash] = struct{}{} 1320 } 1321 } 1322 // If all the heal tasks are bytecodes or already downloading, bail 1323 if len(s.healer.trieTasks) == 0 { 1324 return 1325 } 1326 // Task pending retrieval, try to find an idle peer. If no such peer 1327 // exists, we probably assigned tasks for all (or they are stateless). 1328 // Abort the entire assignment mechanism. 1329 if len(idlers.ids) == 0 { 1330 return 1331 } 1332 var ( 1333 idle = idlers.ids[0] 1334 peer = s.peers[idle] 1335 cap = idlers.caps[0] 1336 ) 1337 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1338 1339 // Matched a pending task to an idle peer, allocate a unique request id 1340 var reqid uint64 1341 for { 1342 reqid = uint64(rand.Int63()) 1343 if reqid == 0 { 1344 continue 1345 } 1346 if _, ok := s.trienodeHealReqs[reqid]; ok { 1347 continue 1348 } 1349 break 1350 } 1351 // Generate the network query and send it to the peer 1352 if cap > maxTrieRequestCount { 1353 cap = maxTrieRequestCount 1354 } 1355 cap = int(float64(cap) / s.trienodeHealThrottle) 1356 if cap <= 0 { 1357 cap = 1 1358 } 1359 var ( 1360 hashes = make([]common.Hash, 0, cap) 1361 paths = make([]string, 0, cap) 1362 pathsets = make([]TrieNodePathSet, 0, cap) 1363 ) 1364 for path, hash := range s.healer.trieTasks { 1365 delete(s.healer.trieTasks, path) 1366 1367 paths = append(paths, path) 1368 hashes = append(hashes, hash) 1369 if len(paths) >= cap { 1370 break 1371 } 1372 } 1373 // Group requests by account hash 1374 paths, hashes, _, pathsets = sortByAccountPath(paths, hashes) 1375 req := &trienodeHealRequest{ 1376 peer: idle, 1377 id: reqid, 1378 time: time.Now(), 1379 deliver: success, 1380 revert: fail, 1381 cancel: cancel, 1382 stale: make(chan struct{}), 1383 paths: paths, 1384 hashes: hashes, 1385 task: s.healer, 1386 } 1387 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1388 peer.Log().Debug("Trienode heal request timed out", "reqid", reqid) 1389 s.rates.Update(idle, TrieNodesMsg, 0, 0) 1390 s.scheduleRevertTrienodeHealRequest(req) 1391 }) 1392 s.trienodeHealReqs[reqid] = req 1393 delete(s.trienodeHealIdlers, idle) 1394 1395 s.pend.Add(1) 1396 go func(root common.Hash) { 1397 defer s.pend.Done() 1398 1399 // Attempt to send the remote request and revert if it fails 1400 if err := peer.RequestTrieNodes(reqid, root, pathsets, maxRequestSize); err != nil { 1401 log.Debug("Failed to request trienode healers", "err", err) 1402 s.scheduleRevertTrienodeHealRequest(req) 1403 } 1404 }(s.root) 1405 } 1406 } 1407 1408 // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to 1409 // heal any trie errors caused by the snap sync's chunked retrieval model. 1410 func (s *Syncer) assignBytecodeHealTasks(success chan *bytecodeHealResponse, fail chan *bytecodeHealRequest, cancel chan struct{}) { 1411 s.lock.Lock() 1412 defer s.lock.Unlock() 1413 1414 // Sort the peers by download capacity to use faster ones if many available 1415 idlers := &capacitySort{ 1416 ids: make([]string, 0, len(s.bytecodeHealIdlers)), 1417 caps: make([]int, 0, len(s.bytecodeHealIdlers)), 1418 } 1419 targetTTL := s.rates.TargetTimeout() 1420 for id := range s.bytecodeHealIdlers { 1421 if _, ok := s.statelessPeers[id]; ok { 1422 continue 1423 } 1424 idlers.ids = append(idlers.ids, id) 1425 idlers.caps = append(idlers.caps, s.rates.Capacity(id, ByteCodesMsg, targetTTL)) 1426 } 1427 if len(idlers.ids) == 0 { 1428 return 1429 } 1430 sort.Sort(sort.Reverse(idlers)) 1431 1432 // Iterate over pending tasks and try to find a peer to retrieve with 1433 for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { 1434 // If there are not enough trie tasks queued to fully assign, fill the 1435 // queue from the state sync scheduler. The trie synced schedules these 1436 // together with trie nodes, so we need to queue them combined. 1437 var ( 1438 have = len(s.healer.trieTasks) + len(s.healer.codeTasks) 1439 want = maxTrieRequestCount + maxCodeRequestCount 1440 ) 1441 if have < want { 1442 paths, hashes, codes := s.healer.scheduler.Missing(want - have) 1443 for i, path := range paths { 1444 s.healer.trieTasks[path] = hashes[i] 1445 } 1446 for _, hash := range codes { 1447 s.healer.codeTasks[hash] = struct{}{} 1448 } 1449 } 1450 // If all the heal tasks are trienodes or already downloading, bail 1451 if len(s.healer.codeTasks) == 0 { 1452 return 1453 } 1454 // Task pending retrieval, try to find an idle peer. If no such peer 1455 // exists, we probably assigned tasks for all (or they are stateless). 1456 // Abort the entire assignment mechanism. 1457 if len(idlers.ids) == 0 { 1458 return 1459 } 1460 var ( 1461 idle = idlers.ids[0] 1462 peer = s.peers[idle] 1463 cap = idlers.caps[0] 1464 ) 1465 idlers.ids, idlers.caps = idlers.ids[1:], idlers.caps[1:] 1466 1467 // Matched a pending task to an idle peer, allocate a unique request id 1468 var reqid uint64 1469 for { 1470 reqid = uint64(rand.Int63()) 1471 if reqid == 0 { 1472 continue 1473 } 1474 if _, ok := s.bytecodeHealReqs[reqid]; ok { 1475 continue 1476 } 1477 break 1478 } 1479 // Generate the network query and send it to the peer 1480 if cap > maxCodeRequestCount { 1481 cap = maxCodeRequestCount 1482 } 1483 hashes := make([]common.Hash, 0, cap) 1484 for hash := range s.healer.codeTasks { 1485 delete(s.healer.codeTasks, hash) 1486 1487 hashes = append(hashes, hash) 1488 if len(hashes) >= cap { 1489 break 1490 } 1491 } 1492 req := &bytecodeHealRequest{ 1493 peer: idle, 1494 id: reqid, 1495 time: time.Now(), 1496 deliver: success, 1497 revert: fail, 1498 cancel: cancel, 1499 stale: make(chan struct{}), 1500 hashes: hashes, 1501 task: s.healer, 1502 } 1503 req.timeout = time.AfterFunc(s.rates.TargetTimeout(), func() { 1504 peer.Log().Debug("Bytecode heal request timed out", "reqid", reqid) 1505 s.rates.Update(idle, ByteCodesMsg, 0, 0) 1506 s.scheduleRevertBytecodeHealRequest(req) 1507 }) 1508 s.bytecodeHealReqs[reqid] = req 1509 delete(s.bytecodeHealIdlers, idle) 1510 1511 s.pend.Add(1) 1512 go func() { 1513 defer s.pend.Done() 1514 1515 // Attempt to send the remote request and revert if it fails 1516 if err := peer.RequestByteCodes(reqid, hashes, maxRequestSize); err != nil { 1517 log.Debug("Failed to request bytecode healers", "err", err) 1518 s.scheduleRevertBytecodeHealRequest(req) 1519 } 1520 }() 1521 } 1522 } 1523 1524 // revertRequests locates all the currently pending requests from a particular 1525 // peer and reverts them, rescheduling for others to fulfill. 1526 func (s *Syncer) revertRequests(peer string) { 1527 // Gather the requests first, revertals need the lock too 1528 s.lock.Lock() 1529 var accountReqs []*accountRequest 1530 for _, req := range s.accountReqs { 1531 if req.peer == peer { 1532 accountReqs = append(accountReqs, req) 1533 } 1534 } 1535 var bytecodeReqs []*bytecodeRequest 1536 for _, req := range s.bytecodeReqs { 1537 if req.peer == peer { 1538 bytecodeReqs = append(bytecodeReqs, req) 1539 } 1540 } 1541 var storageReqs []*storageRequest 1542 for _, req := range s.storageReqs { 1543 if req.peer == peer { 1544 storageReqs = append(storageReqs, req) 1545 } 1546 } 1547 var trienodeHealReqs []*trienodeHealRequest 1548 for _, req := range s.trienodeHealReqs { 1549 if req.peer == peer { 1550 trienodeHealReqs = append(trienodeHealReqs, req) 1551 } 1552 } 1553 var bytecodeHealReqs []*bytecodeHealRequest 1554 for _, req := range s.bytecodeHealReqs { 1555 if req.peer == peer { 1556 bytecodeHealReqs = append(bytecodeHealReqs, req) 1557 } 1558 } 1559 s.lock.Unlock() 1560 1561 // Revert all the requests matching the peer 1562 for _, req := range accountReqs { 1563 s.revertAccountRequest(req) 1564 } 1565 for _, req := range bytecodeReqs { 1566 s.revertBytecodeRequest(req) 1567 } 1568 for _, req := range storageReqs { 1569 s.revertStorageRequest(req) 1570 } 1571 for _, req := range trienodeHealReqs { 1572 s.revertTrienodeHealRequest(req) 1573 } 1574 for _, req := range bytecodeHealReqs { 1575 s.revertBytecodeHealRequest(req) 1576 } 1577 } 1578 1579 // scheduleRevertAccountRequest asks the event loop to clean up an account range 1580 // request and return all failed retrieval tasks to the scheduler for reassignment. 1581 func (s *Syncer) scheduleRevertAccountRequest(req *accountRequest) { 1582 select { 1583 case req.revert <- req: 1584 // Sync event loop notified 1585 case <-req.cancel: 1586 // Sync cycle got cancelled 1587 case <-req.stale: 1588 // Request already reverted 1589 } 1590 } 1591 1592 // revertAccountRequest cleans up an account range request and returns all failed 1593 // retrieval tasks to the scheduler for reassignment. 1594 // 1595 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1596 // On peer threads, use scheduleRevertAccountRequest. 1597 func (s *Syncer) revertAccountRequest(req *accountRequest) { 1598 log.Debug("Reverting account request", "peer", req.peer, "reqid", req.id) 1599 select { 1600 case <-req.stale: 1601 log.Trace("Account request already reverted", "peer", req.peer, "reqid", req.id) 1602 return 1603 default: 1604 } 1605 close(req.stale) 1606 1607 // Remove the request from the tracked set 1608 s.lock.Lock() 1609 delete(s.accountReqs, req.id) 1610 s.lock.Unlock() 1611 1612 // If there's a timeout timer still running, abort it and mark the account 1613 // task as not-pending, ready for rescheduling 1614 req.timeout.Stop() 1615 if req.task.req == req { 1616 req.task.req = nil 1617 } 1618 } 1619 1620 // scheduleRevertBytecodeRequest asks the event loop to clean up a bytecode request 1621 // and return all failed retrieval tasks to the scheduler for reassignment. 1622 func (s *Syncer) scheduleRevertBytecodeRequest(req *bytecodeRequest) { 1623 select { 1624 case req.revert <- req: 1625 // Sync event loop notified 1626 case <-req.cancel: 1627 // Sync cycle got cancelled 1628 case <-req.stale: 1629 // Request already reverted 1630 } 1631 } 1632 1633 // revertBytecodeRequest cleans up a bytecode request and returns all failed 1634 // retrieval tasks to the scheduler for reassignment. 1635 // 1636 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1637 // On peer threads, use scheduleRevertBytecodeRequest. 1638 func (s *Syncer) revertBytecodeRequest(req *bytecodeRequest) { 1639 log.Debug("Reverting bytecode request", "peer", req.peer) 1640 select { 1641 case <-req.stale: 1642 log.Trace("Bytecode request already reverted", "peer", req.peer, "reqid", req.id) 1643 return 1644 default: 1645 } 1646 close(req.stale) 1647 1648 // Remove the request from the tracked set 1649 s.lock.Lock() 1650 delete(s.bytecodeReqs, req.id) 1651 s.lock.Unlock() 1652 1653 // If there's a timeout timer still running, abort it and mark the code 1654 // retrievals as not-pending, ready for rescheduling 1655 req.timeout.Stop() 1656 for _, hash := range req.hashes { 1657 req.task.codeTasks[hash] = struct{}{} 1658 } 1659 } 1660 1661 // scheduleRevertStorageRequest asks the event loop to clean up a storage range 1662 // request and return all failed retrieval tasks to the scheduler for reassignment. 1663 func (s *Syncer) scheduleRevertStorageRequest(req *storageRequest) { 1664 select { 1665 case req.revert <- req: 1666 // Sync event loop notified 1667 case <-req.cancel: 1668 // Sync cycle got cancelled 1669 case <-req.stale: 1670 // Request already reverted 1671 } 1672 } 1673 1674 // revertStorageRequest cleans up a storage range request and returns all failed 1675 // retrieval tasks to the scheduler for reassignment. 1676 // 1677 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1678 // On peer threads, use scheduleRevertStorageRequest. 1679 func (s *Syncer) revertStorageRequest(req *storageRequest) { 1680 log.Debug("Reverting storage request", "peer", req.peer) 1681 select { 1682 case <-req.stale: 1683 log.Trace("Storage request already reverted", "peer", req.peer, "reqid", req.id) 1684 return 1685 default: 1686 } 1687 close(req.stale) 1688 1689 // Remove the request from the tracked set 1690 s.lock.Lock() 1691 delete(s.storageReqs, req.id) 1692 s.lock.Unlock() 1693 1694 // If there's a timeout timer still running, abort it and mark the storage 1695 // task as not-pending, ready for rescheduling 1696 req.timeout.Stop() 1697 if req.subTask != nil { 1698 req.subTask.req = nil 1699 } else { 1700 for i, account := range req.accounts { 1701 req.mainTask.stateTasks[account] = req.roots[i] 1702 } 1703 } 1704 } 1705 1706 // scheduleRevertTrienodeHealRequest asks the event loop to clean up a trienode heal 1707 // request and return all failed retrieval tasks to the scheduler for reassignment. 1708 func (s *Syncer) scheduleRevertTrienodeHealRequest(req *trienodeHealRequest) { 1709 select { 1710 case req.revert <- req: 1711 // Sync event loop notified 1712 case <-req.cancel: 1713 // Sync cycle got cancelled 1714 case <-req.stale: 1715 // Request already reverted 1716 } 1717 } 1718 1719 // revertTrienodeHealRequest cleans up a trienode heal request and returns all 1720 // failed retrieval tasks to the scheduler for reassignment. 1721 // 1722 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1723 // On peer threads, use scheduleRevertTrienodeHealRequest. 1724 func (s *Syncer) revertTrienodeHealRequest(req *trienodeHealRequest) { 1725 log.Debug("Reverting trienode heal request", "peer", req.peer) 1726 select { 1727 case <-req.stale: 1728 log.Trace("Trienode heal request already reverted", "peer", req.peer, "reqid", req.id) 1729 return 1730 default: 1731 } 1732 close(req.stale) 1733 1734 // Remove the request from the tracked set 1735 s.lock.Lock() 1736 delete(s.trienodeHealReqs, req.id) 1737 s.lock.Unlock() 1738 1739 // If there's a timeout timer still running, abort it and mark the trie node 1740 // retrievals as not-pending, ready for rescheduling 1741 req.timeout.Stop() 1742 for i, path := range req.paths { 1743 req.task.trieTasks[path] = req.hashes[i] 1744 } 1745 } 1746 1747 // scheduleRevertBytecodeHealRequest asks the event loop to clean up a bytecode heal 1748 // request and return all failed retrieval tasks to the scheduler for reassignment. 1749 func (s *Syncer) scheduleRevertBytecodeHealRequest(req *bytecodeHealRequest) { 1750 select { 1751 case req.revert <- req: 1752 // Sync event loop notified 1753 case <-req.cancel: 1754 // Sync cycle got cancelled 1755 case <-req.stale: 1756 // Request already reverted 1757 } 1758 } 1759 1760 // revertBytecodeHealRequest cleans up a bytecode heal request and returns all 1761 // failed retrieval tasks to the scheduler for reassignment. 1762 // 1763 // Note, this needs to run on the event runloop thread to reschedule to idle peers. 1764 // On peer threads, use scheduleRevertBytecodeHealRequest. 1765 func (s *Syncer) revertBytecodeHealRequest(req *bytecodeHealRequest) { 1766 log.Debug("Reverting bytecode heal request", "peer", req.peer) 1767 select { 1768 case <-req.stale: 1769 log.Trace("Bytecode heal request already reverted", "peer", req.peer, "reqid", req.id) 1770 return 1771 default: 1772 } 1773 close(req.stale) 1774 1775 // Remove the request from the tracked set 1776 s.lock.Lock() 1777 delete(s.bytecodeHealReqs, req.id) 1778 s.lock.Unlock() 1779 1780 // If there's a timeout timer still running, abort it and mark the code 1781 // retrievals as not-pending, ready for rescheduling 1782 req.timeout.Stop() 1783 for _, hash := range req.hashes { 1784 req.task.codeTasks[hash] = struct{}{} 1785 } 1786 } 1787 1788 // processAccountResponse integrates an already validated account range response 1789 // into the account tasks. 1790 func (s *Syncer) processAccountResponse(res *accountResponse) { 1791 // Switch the task from pending to filling 1792 res.task.req = nil 1793 res.task.res = res 1794 1795 // Ensure that the response doesn't overflow into the subsequent task 1796 last := res.task.Last.Big() 1797 for i, hash := range res.hashes { 1798 // Mark the range complete if the last is already included. 1799 // Keep iteration to delete the extra states if exists. 1800 cmp := hash.Big().Cmp(last) 1801 if cmp == 0 { 1802 res.cont = false 1803 continue 1804 } 1805 if cmp > 0 { 1806 // Chunk overflown, cut off excess 1807 res.hashes = res.hashes[:i] 1808 res.accounts = res.accounts[:i] 1809 res.cont = false // Mark range completed 1810 break 1811 } 1812 } 1813 // Iterate over all the accounts and assemble which ones need further sub- 1814 // filling before the entire account range can be persisted. 1815 res.task.needCode = make([]bool, len(res.accounts)) 1816 res.task.needState = make([]bool, len(res.accounts)) 1817 res.task.needHeal = make([]bool, len(res.accounts)) 1818 1819 res.task.codeTasks = make(map[common.Hash]struct{}) 1820 res.task.stateTasks = make(map[common.Hash]common.Hash) 1821 1822 resumed := make(map[common.Hash]struct{}) 1823 1824 res.task.pend = 0 1825 for i, account := range res.accounts { 1826 // Check if the account is a contract with an unknown code 1827 if !bytes.Equal(account.CodeHash, emptyCode[:]) { 1828 if !rawdb.HasCodeWithPrefix(s.db, common.BytesToHash(account.CodeHash)) { 1829 res.task.codeTasks[common.BytesToHash(account.CodeHash)] = struct{}{} 1830 res.task.needCode[i] = true 1831 res.task.pend++ 1832 } 1833 } 1834 // Check if the account is a contract with an unknown storage trie 1835 if account.Root != emptyRoot { 1836 if ok, err := s.db.Has(account.Root[:]); err != nil || !ok { 1837 // If there was a previous large state retrieval in progress, 1838 // don't restart it from scratch. This happens if a sync cycle 1839 // is interrupted and resumed later. However, *do* update the 1840 // previous root hash. 1841 if subtasks, ok := res.task.SubTasks[res.hashes[i]]; ok { 1842 log.Debug("Resuming large storage retrieval", "account", res.hashes[i], "root", account.Root) 1843 for _, subtask := range subtasks { 1844 subtask.root = account.Root 1845 } 1846 res.task.needHeal[i] = true 1847 resumed[res.hashes[i]] = struct{}{} 1848 } else { 1849 res.task.stateTasks[res.hashes[i]] = account.Root 1850 } 1851 res.task.needState[i] = true 1852 res.task.pend++ 1853 } 1854 } 1855 } 1856 // Delete any subtasks that have been aborted but not resumed. This may undo 1857 // some progress if a new peer gives us less accounts than an old one, but for 1858 // now we have to live with that. 1859 for hash := range res.task.SubTasks { 1860 if _, ok := resumed[hash]; !ok { 1861 log.Debug("Aborting suspended storage retrieval", "account", hash) 1862 delete(res.task.SubTasks, hash) 1863 } 1864 } 1865 // If the account range contained no contracts, or all have been fully filled 1866 // beforehand, short circuit storage filling and forward to the next task 1867 if res.task.pend == 0 { 1868 s.forwardAccountTask(res.task) 1869 return 1870 } 1871 // Some accounts are incomplete, leave as is for the storage and contract 1872 // task assigners to pick up and fill. 1873 } 1874 1875 // processBytecodeResponse integrates an already validated bytecode response 1876 // into the account tasks. 1877 func (s *Syncer) processBytecodeResponse(res *bytecodeResponse) { 1878 batch := s.db.NewBatch() 1879 1880 var ( 1881 codes uint64 1882 ) 1883 for i, hash := range res.hashes { 1884 code := res.codes[i] 1885 1886 // If the bytecode was not delivered, reschedule it 1887 if code == nil { 1888 res.task.codeTasks[hash] = struct{}{} 1889 continue 1890 } 1891 // Code was delivered, mark it not needed any more 1892 for j, account := range res.task.res.accounts { 1893 if res.task.needCode[j] && hash == common.BytesToHash(account.CodeHash) { 1894 res.task.needCode[j] = false 1895 res.task.pend-- 1896 } 1897 } 1898 // Push the bytecode into a database batch 1899 codes++ 1900 rawdb.WriteCode(batch, hash, code) 1901 } 1902 bytes := common.StorageSize(batch.ValueSize()) 1903 if err := batch.Write(); err != nil { 1904 log.Crit("Failed to persist bytecodes", "err", err) 1905 } 1906 s.bytecodeSynced += codes 1907 s.bytecodeBytes += bytes 1908 1909 log.Debug("Persisted set of bytecodes", "count", codes, "bytes", bytes) 1910 1911 // If this delivery completed the last pending task, forward the account task 1912 // to the next chunk 1913 if res.task.pend == 0 { 1914 s.forwardAccountTask(res.task) 1915 return 1916 } 1917 // Some accounts are still incomplete, leave as is for the storage and contract 1918 // task assigners to pick up and fill. 1919 } 1920 1921 // processStorageResponse integrates an already validated storage response 1922 // into the account tasks. 1923 func (s *Syncer) processStorageResponse(res *storageResponse) { 1924 // Switch the subtask from pending to idle 1925 if res.subTask != nil { 1926 res.subTask.req = nil 1927 } 1928 batch := ethdb.HookedBatch{ 1929 Batch: s.db.NewBatch(), 1930 OnPut: func(key []byte, value []byte) { 1931 s.storageBytes += common.StorageSize(len(key) + len(value)) 1932 }, 1933 } 1934 var ( 1935 slots int 1936 oldStorageBytes = s.storageBytes 1937 ) 1938 // Iterate over all the accounts and reconstruct their storage tries from the 1939 // delivered slots 1940 for i, account := range res.accounts { 1941 // If the account was not delivered, reschedule it 1942 if i >= len(res.hashes) { 1943 res.mainTask.stateTasks[account] = res.roots[i] 1944 continue 1945 } 1946 // State was delivered, if complete mark as not needed any more, otherwise 1947 // mark the account as needing healing 1948 for j, hash := range res.mainTask.res.hashes { 1949 if account != hash { 1950 continue 1951 } 1952 acc := res.mainTask.res.accounts[j] 1953 1954 // If the packet contains multiple contract storage slots, all 1955 // but the last are surely complete. The last contract may be 1956 // chunked, so check it's continuation flag. 1957 if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { 1958 res.mainTask.needState[j] = false 1959 res.mainTask.pend-- 1960 } 1961 // If the last contract was chunked, mark it as needing healing 1962 // to avoid writing it out to disk prematurely. 1963 if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { 1964 res.mainTask.needHeal[j] = true 1965 } 1966 // If the last contract was chunked, we need to switch to large 1967 // contract handling mode 1968 if res.subTask == nil && i == len(res.hashes)-1 && res.cont { 1969 // If we haven't yet started a large-contract retrieval, create 1970 // the subtasks for it within the main account task 1971 if tasks, ok := res.mainTask.SubTasks[account]; !ok { 1972 var ( 1973 keys = res.hashes[i] 1974 chunks = uint64(storageConcurrency) 1975 lastKey common.Hash 1976 ) 1977 if len(keys) > 0 { 1978 lastKey = keys[len(keys)-1] 1979 } 1980 // If the number of slots remaining is low, decrease the 1981 // number of chunks. Somewhere on the order of 10-15K slots 1982 // fit into a packet of 500KB. A key/slot pair is maximum 64 1983 // bytes, so pessimistically maxRequestSize/64 = 8K. 1984 // 1985 // Chunk so that at least 2 packets are needed to fill a task. 1986 if estimate, err := estimateRemainingSlots(len(keys), lastKey); err == nil { 1987 if n := estimate / (2 * (maxRequestSize / 64)); n+1 < chunks { 1988 chunks = n + 1 1989 } 1990 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "remaining", estimate, "chunks", chunks) 1991 } else { 1992 log.Debug("Chunked large contract", "initiators", len(keys), "tail", lastKey, "chunks", chunks) 1993 } 1994 r := newHashRange(lastKey, chunks) 1995 1996 // Our first task is the one that was just filled by this response. 1997 batch := ethdb.HookedBatch{ 1998 Batch: s.db.NewBatch(), 1999 OnPut: func(key []byte, value []byte) { 2000 s.storageBytes += common.StorageSize(len(key) + len(value)) 2001 }, 2002 } 2003 tasks = append(tasks, &storageTask{ 2004 Next: common.Hash{}, 2005 Last: r.End(), 2006 root: acc.Root, 2007 genBatch: batch, 2008 genTrie: trie.NewStackTrieWithOwner(batch, account), 2009 }) 2010 for r.Next() { 2011 batch := ethdb.HookedBatch{ 2012 Batch: s.db.NewBatch(), 2013 OnPut: func(key []byte, value []byte) { 2014 s.storageBytes += common.StorageSize(len(key) + len(value)) 2015 }, 2016 } 2017 tasks = append(tasks, &storageTask{ 2018 Next: r.Start(), 2019 Last: r.End(), 2020 root: acc.Root, 2021 genBatch: batch, 2022 genTrie: trie.NewStackTrieWithOwner(batch, account), 2023 }) 2024 } 2025 for _, task := range tasks { 2026 log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", task.Next, "last", task.Last) 2027 } 2028 res.mainTask.SubTasks[account] = tasks 2029 2030 // Since we've just created the sub-tasks, this response 2031 // is surely for the first one (zero origin) 2032 res.subTask = tasks[0] 2033 } 2034 } 2035 // If we're in large contract delivery mode, forward the subtask 2036 if res.subTask != nil { 2037 // Ensure the response doesn't overflow into the subsequent task 2038 last := res.subTask.Last.Big() 2039 // Find the first overflowing key. While at it, mark res as complete 2040 // if we find the range to include or pass the 'last' 2041 index := sort.Search(len(res.hashes[i]), func(k int) bool { 2042 cmp := res.hashes[i][k].Big().Cmp(last) 2043 if cmp >= 0 { 2044 res.cont = false 2045 } 2046 return cmp > 0 2047 }) 2048 if index >= 0 { 2049 // cut off excess 2050 res.hashes[i] = res.hashes[i][:index] 2051 res.slots[i] = res.slots[i][:index] 2052 } 2053 // Forward the relevant storage chunk (even if created just now) 2054 if res.cont { 2055 res.subTask.Next = incHash(res.hashes[i][len(res.hashes[i])-1]) 2056 } else { 2057 res.subTask.done = true 2058 } 2059 } 2060 } 2061 // Iterate over all the complete contracts, reconstruct the trie nodes and 2062 // push them to disk. If the contract is chunked, the trie nodes will be 2063 // reconstructed later. 2064 slots += len(res.hashes[i]) 2065 2066 if i < len(res.hashes)-1 || res.subTask == nil { 2067 tr := trie.NewStackTrieWithOwner(batch, account) 2068 for j := 0; j < len(res.hashes[i]); j++ { 2069 tr.Update(res.hashes[i][j][:], res.slots[i][j]) 2070 } 2071 tr.Commit() 2072 } 2073 // Persist the received storage segments. These flat state maybe 2074 // outdated during the sync, but it can be fixed later during the 2075 // snapshot generation. 2076 for j := 0; j < len(res.hashes[i]); j++ { 2077 rawdb.WriteStorageSnapshot(batch, account, res.hashes[i][j], res.slots[i][j]) 2078 2079 // If we're storing large contracts, generate the trie nodes 2080 // on the fly to not trash the gluing points 2081 if i == len(res.hashes)-1 && res.subTask != nil { 2082 res.subTask.genTrie.Update(res.hashes[i][j][:], res.slots[i][j]) 2083 } 2084 } 2085 } 2086 // Large contracts could have generated new trie nodes, flush them to disk 2087 if res.subTask != nil { 2088 if res.subTask.done { 2089 if root, err := res.subTask.genTrie.Commit(); err != nil { 2090 log.Error("Failed to commit stack slots", "err", err) 2091 } else if root == res.subTask.root { 2092 // If the chunk's root is an overflown but full delivery, clear the heal request 2093 for i, account := range res.mainTask.res.hashes { 2094 if account == res.accounts[len(res.accounts)-1] { 2095 res.mainTask.needHeal[i] = false 2096 } 2097 } 2098 } 2099 } 2100 if res.subTask.genBatch.ValueSize() > ethdb.IdealBatchSize || res.subTask.done { 2101 if err := res.subTask.genBatch.Write(); err != nil { 2102 log.Error("Failed to persist stack slots", "err", err) 2103 } 2104 res.subTask.genBatch.Reset() 2105 } 2106 } 2107 // Flush anything written just now and update the stats 2108 if err := batch.Write(); err != nil { 2109 log.Crit("Failed to persist storage slots", "err", err) 2110 } 2111 s.storageSynced += uint64(slots) 2112 2113 log.Debug("Persisted set of storage slots", "accounts", len(res.hashes), "slots", slots, "bytes", s.storageBytes-oldStorageBytes) 2114 2115 // If this delivery completed the last pending task, forward the account task 2116 // to the next chunk 2117 if res.mainTask.pend == 0 { 2118 s.forwardAccountTask(res.mainTask) 2119 return 2120 } 2121 // Some accounts are still incomplete, leave as is for the storage and contract 2122 // task assigners to pick up and fill. 2123 } 2124 2125 // processTrienodeHealResponse integrates an already validated trienode response 2126 // into the healer tasks. 2127 func (s *Syncer) processTrienodeHealResponse(res *trienodeHealResponse) { 2128 var ( 2129 start = time.Now() 2130 fills int 2131 ) 2132 for i, hash := range res.hashes { 2133 node := res.nodes[i] 2134 2135 // If the trie node was not delivered, reschedule it 2136 if node == nil { 2137 res.task.trieTasks[res.paths[i]] = res.hashes[i] 2138 continue 2139 } 2140 fills++ 2141 2142 // Push the trie node into the state syncer 2143 s.trienodeHealSynced++ 2144 s.trienodeHealBytes += common.StorageSize(len(node)) 2145 2146 err := s.healer.scheduler.ProcessNode(trie.NodeSyncResult{Path: res.paths[i], Data: node}) 2147 switch err { 2148 case nil: 2149 case trie.ErrAlreadyProcessed: 2150 s.trienodeHealDups++ 2151 case trie.ErrNotRequested: 2152 s.trienodeHealNops++ 2153 default: 2154 log.Error("Invalid trienode processed", "hash", hash, "err", err) 2155 } 2156 } 2157 batch := s.db.NewBatch() 2158 if err := s.healer.scheduler.Commit(batch); err != nil { 2159 log.Error("Failed to commit healing data", "err", err) 2160 } 2161 if err := batch.Write(); err != nil { 2162 log.Crit("Failed to persist healing data", "err", err) 2163 } 2164 log.Debug("Persisted set of healing data", "type", "trienodes", "bytes", common.StorageSize(batch.ValueSize())) 2165 2166 // Calculate the processing rate of one filled trie node 2167 rate := float64(fills) / (float64(time.Since(start)) / float64(time.Second)) 2168 2169 // Update the currently measured trienode queueing and processing throughput. 2170 // 2171 // The processing rate needs to be updated uniformly independent if we've 2172 // processed 1x100 trie nodes or 100x1 to keep the rate consistent even in 2173 // the face of varying network packets. As such, we cannot just measure the 2174 // time it took to process N trie nodes and update once, we need one update 2175 // per trie node. 2176 // 2177 // Naively, that would be: 2178 // 2179 // for i:=0; i<fills; i++ { 2180 // healRate = (1-measurementImpact)*oldRate + measurementImpact*newRate 2181 // } 2182 // 2183 // Essentially, a recursive expansion of HR = (1-MI)*HR + MI*NR. 2184 // 2185 // We can expand that formula for the Nth item as: 2186 // HR(N) = (1-MI)^N*OR + (1-MI)^(N-1)*MI*NR + (1-MI)^(N-2)*MI*NR + ... + (1-MI)^0*MI*NR 2187 // 2188 // The above is a geometric sequence that can be summed to: 2189 // HR(N) = (1-MI)^N*(OR-NR) + NR 2190 s.trienodeHealRate = gomath.Pow(1-trienodeHealRateMeasurementImpact, float64(fills))*(s.trienodeHealRate-rate) + rate 2191 2192 pending := atomic.LoadUint64(&s.trienodeHealPend) 2193 if time.Since(s.trienodeHealThrottled) > time.Second { 2194 // Periodically adjust the trie node throttler 2195 if float64(pending) > 2*s.trienodeHealRate { 2196 s.trienodeHealThrottle *= trienodeHealThrottleIncrease 2197 } else { 2198 s.trienodeHealThrottle /= trienodeHealThrottleDecrease 2199 } 2200 if s.trienodeHealThrottle > maxTrienodeHealThrottle { 2201 s.trienodeHealThrottle = maxTrienodeHealThrottle 2202 } else if s.trienodeHealThrottle < minTrienodeHealThrottle { 2203 s.trienodeHealThrottle = minTrienodeHealThrottle 2204 } 2205 s.trienodeHealThrottled = time.Now() 2206 2207 log.Debug("Updated trie node heal throttler", "rate", s.trienodeHealRate, "pending", pending, "throttle", s.trienodeHealThrottle) 2208 } 2209 } 2210 2211 // processBytecodeHealResponse integrates an already validated bytecode response 2212 // into the healer tasks. 2213 func (s *Syncer) processBytecodeHealResponse(res *bytecodeHealResponse) { 2214 for i, hash := range res.hashes { 2215 node := res.codes[i] 2216 2217 // If the trie node was not delivered, reschedule it 2218 if node == nil { 2219 res.task.codeTasks[hash] = struct{}{} 2220 continue 2221 } 2222 // Push the trie node into the state syncer 2223 s.bytecodeHealSynced++ 2224 s.bytecodeHealBytes += common.StorageSize(len(node)) 2225 2226 err := s.healer.scheduler.ProcessCode(trie.CodeSyncResult{Hash: hash, Data: node}) 2227 switch err { 2228 case nil: 2229 case trie.ErrAlreadyProcessed: 2230 s.bytecodeHealDups++ 2231 case trie.ErrNotRequested: 2232 s.bytecodeHealNops++ 2233 default: 2234 log.Error("Invalid bytecode processed", "hash", hash, "err", err) 2235 } 2236 } 2237 batch := s.db.NewBatch() 2238 if err := s.healer.scheduler.Commit(batch); err != nil { 2239 log.Error("Failed to commit healing data", "err", err) 2240 } 2241 if err := batch.Write(); err != nil { 2242 log.Crit("Failed to persist healing data", "err", err) 2243 } 2244 log.Debug("Persisted set of healing data", "type", "bytecode", "bytes", common.StorageSize(batch.ValueSize())) 2245 } 2246 2247 // forwardAccountTask takes a filled account task and persists anything available 2248 // into the database, after which it forwards the next account marker so that the 2249 // task's next chunk may be filled. 2250 func (s *Syncer) forwardAccountTask(task *accountTask) { 2251 // Remove any pending delivery 2252 res := task.res 2253 if res == nil { 2254 return // nothing to forward 2255 } 2256 task.res = nil 2257 2258 // Persist the received account segments. These flat state maybe 2259 // outdated during the sync, but it can be fixed later during the 2260 // snapshot generation. 2261 oldAccountBytes := s.accountBytes 2262 2263 batch := ethdb.HookedBatch{ 2264 Batch: s.db.NewBatch(), 2265 OnPut: func(key []byte, value []byte) { 2266 s.accountBytes += common.StorageSize(len(key) + len(value)) 2267 }, 2268 } 2269 for i, hash := range res.hashes { 2270 if task.needCode[i] || task.needState[i] { 2271 break 2272 } 2273 slim := snapshot.SlimAccountRLP(res.accounts[i].Nonce, res.accounts[i].Balance, res.accounts[i].Root, res.accounts[i].CodeHash) 2274 rawdb.WriteAccountSnapshot(batch, hash, slim) 2275 2276 // If the task is complete, drop it into the stack trie to generate 2277 // account trie nodes for it 2278 if !task.needHeal[i] { 2279 full, err := snapshot.FullAccountRLP(slim) // TODO(karalabe): Slim parsing can be omitted 2280 if err != nil { 2281 panic(err) // Really shouldn't ever happen 2282 } 2283 task.genTrie.Update(hash[:], full) 2284 } 2285 } 2286 // Flush anything written just now and update the stats 2287 if err := batch.Write(); err != nil { 2288 log.Crit("Failed to persist accounts", "err", err) 2289 } 2290 s.accountSynced += uint64(len(res.accounts)) 2291 2292 // Task filling persisted, push it the chunk marker forward to the first 2293 // account still missing data. 2294 for i, hash := range res.hashes { 2295 if task.needCode[i] || task.needState[i] { 2296 return 2297 } 2298 task.Next = incHash(hash) 2299 } 2300 // All accounts marked as complete, track if the entire task is done 2301 task.done = !res.cont 2302 2303 // Stack trie could have generated trie nodes, push them to disk (we need to 2304 // flush after finalizing task.done. It's fine even if we crash and lose this 2305 // write as it will only cause more data to be downloaded during heal. 2306 if task.done { 2307 if _, err := task.genTrie.Commit(); err != nil { 2308 log.Error("Failed to commit stack account", "err", err) 2309 } 2310 } 2311 if task.genBatch.ValueSize() > ethdb.IdealBatchSize || task.done { 2312 if err := task.genBatch.Write(); err != nil { 2313 log.Error("Failed to persist stack account", "err", err) 2314 } 2315 task.genBatch.Reset() 2316 } 2317 log.Debug("Persisted range of accounts", "accounts", len(res.accounts), "bytes", s.accountBytes-oldAccountBytes) 2318 } 2319 2320 // OnAccounts is a callback method to invoke when a range of accounts are 2321 // received from a remote peer. 2322 func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { 2323 size := common.StorageSize(len(hashes) * common.HashLength) 2324 for _, account := range accounts { 2325 size += common.StorageSize(len(account)) 2326 } 2327 for _, node := range proof { 2328 size += common.StorageSize(len(node)) 2329 } 2330 logger := peer.Log().New("reqid", id) 2331 logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) 2332 2333 // Whether or not the response is valid, we can mark the peer as idle and 2334 // notify the scheduler to assign a new task. If the response is invalid, 2335 // we'll drop the peer in a bit. 2336 defer func() { 2337 s.lock.Lock() 2338 defer s.lock.Unlock() 2339 if _, ok := s.peers[peer.ID()]; ok { 2340 s.accountIdlers[peer.ID()] = struct{}{} 2341 } 2342 select { 2343 case s.update <- struct{}{}: 2344 default: 2345 } 2346 }() 2347 s.lock.Lock() 2348 // Ensure the response is for a valid request 2349 req, ok := s.accountReqs[id] 2350 if !ok { 2351 // Request stale, perhaps the peer timed out but came through in the end 2352 logger.Warn("Unexpected account range packet") 2353 s.lock.Unlock() 2354 return nil 2355 } 2356 delete(s.accountReqs, id) 2357 s.rates.Update(peer.ID(), AccountRangeMsg, time.Since(req.time), int(size)) 2358 2359 // Clean up the request timeout timer, we'll see how to proceed further based 2360 // on the actual delivered content 2361 if !req.timeout.Stop() { 2362 // The timeout is already triggered, and this request will be reverted+rescheduled 2363 s.lock.Unlock() 2364 return nil 2365 } 2366 // Response is valid, but check if peer is signalling that it does not have 2367 // the requested data. For account range queries that means the state being 2368 // retrieved was either already pruned remotely, or the peer is not yet 2369 // synced to our head. 2370 if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { 2371 logger.Debug("Peer rejected account range request", "root", s.root) 2372 s.statelessPeers[peer.ID()] = struct{}{} 2373 s.lock.Unlock() 2374 2375 // Signal this request as failed, and ready for rescheduling 2376 s.scheduleRevertAccountRequest(req) 2377 return nil 2378 } 2379 root := s.root 2380 s.lock.Unlock() 2381 2382 // Reconstruct a partial trie from the response and verify it 2383 keys := make([][]byte, len(hashes)) 2384 for i, key := range hashes { 2385 keys[i] = common.CopyBytes(key[:]) 2386 } 2387 nodes := make(light.NodeList, len(proof)) 2388 for i, node := range proof { 2389 nodes[i] = node 2390 } 2391 proofdb := nodes.NodeSet() 2392 2393 var end []byte 2394 if len(keys) > 0 { 2395 end = keys[len(keys)-1] 2396 } 2397 cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) 2398 if err != nil { 2399 logger.Warn("Account range failed proof", "err", err) 2400 // Signal this request as failed, and ready for rescheduling 2401 s.scheduleRevertAccountRequest(req) 2402 return err 2403 } 2404 accs := make([]*types.StateAccount, len(accounts)) 2405 for i, account := range accounts { 2406 acc := new(types.StateAccount) 2407 if err := rlp.DecodeBytes(account, acc); err != nil { 2408 panic(err) // We created these blobs, we must be able to decode them 2409 } 2410 accs[i] = acc 2411 } 2412 response := &accountResponse{ 2413 task: req.task, 2414 hashes: hashes, 2415 accounts: accs, 2416 cont: cont, 2417 } 2418 select { 2419 case req.deliver <- response: 2420 case <-req.cancel: 2421 case <-req.stale: 2422 } 2423 return nil 2424 } 2425 2426 // OnByteCodes is a callback method to invoke when a batch of contract 2427 // bytes codes are received from a remote peer. 2428 func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2429 s.lock.RLock() 2430 syncing := !s.snapped 2431 s.lock.RUnlock() 2432 2433 if syncing { 2434 return s.onByteCodes(peer, id, bytecodes) 2435 } 2436 return s.onHealByteCodes(peer, id, bytecodes) 2437 } 2438 2439 // onByteCodes is a callback method to invoke when a batch of contract 2440 // bytes codes are received from a remote peer in the syncing phase. 2441 func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2442 var size common.StorageSize 2443 for _, code := range bytecodes { 2444 size += common.StorageSize(len(code)) 2445 } 2446 logger := peer.Log().New("reqid", id) 2447 logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2448 2449 // Whether or not the response is valid, we can mark the peer as idle and 2450 // notify the scheduler to assign a new task. If the response is invalid, 2451 // we'll drop the peer in a bit. 2452 defer func() { 2453 s.lock.Lock() 2454 defer s.lock.Unlock() 2455 if _, ok := s.peers[peer.ID()]; ok { 2456 s.bytecodeIdlers[peer.ID()] = struct{}{} 2457 } 2458 select { 2459 case s.update <- struct{}{}: 2460 default: 2461 } 2462 }() 2463 s.lock.Lock() 2464 // Ensure the response is for a valid request 2465 req, ok := s.bytecodeReqs[id] 2466 if !ok { 2467 // Request stale, perhaps the peer timed out but came through in the end 2468 logger.Warn("Unexpected bytecode packet") 2469 s.lock.Unlock() 2470 return nil 2471 } 2472 delete(s.bytecodeReqs, id) 2473 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2474 2475 // Clean up the request timeout timer, we'll see how to proceed further based 2476 // on the actual delivered content 2477 if !req.timeout.Stop() { 2478 // The timeout is already triggered, and this request will be reverted+rescheduled 2479 s.lock.Unlock() 2480 return nil 2481 } 2482 2483 // Response is valid, but check if peer is signalling that it does not have 2484 // the requested data. For bytecode range queries that means the peer is not 2485 // yet synced. 2486 if len(bytecodes) == 0 { 2487 logger.Debug("Peer rejected bytecode request") 2488 s.statelessPeers[peer.ID()] = struct{}{} 2489 s.lock.Unlock() 2490 2491 // Signal this request as failed, and ready for rescheduling 2492 s.scheduleRevertBytecodeRequest(req) 2493 return nil 2494 } 2495 s.lock.Unlock() 2496 2497 // Cross reference the requested bytecodes with the response to find gaps 2498 // that the serving node is missing 2499 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2500 hash := make([]byte, 32) 2501 2502 codes := make([][]byte, len(req.hashes)) 2503 for i, j := 0, 0; i < len(bytecodes); i++ { 2504 // Find the next hash that we've been served, leaving misses with nils 2505 hasher.Reset() 2506 hasher.Write(bytecodes[i]) 2507 hasher.Read(hash) 2508 2509 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2510 j++ 2511 } 2512 if j < len(req.hashes) { 2513 codes[j] = bytecodes[i] 2514 j++ 2515 continue 2516 } 2517 // We've either ran out of hashes, or got unrequested data 2518 logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) 2519 // Signal this request as failed, and ready for rescheduling 2520 s.scheduleRevertBytecodeRequest(req) 2521 return errors.New("unexpected bytecode") 2522 } 2523 // Response validated, send it to the scheduler for filling 2524 response := &bytecodeResponse{ 2525 task: req.task, 2526 hashes: req.hashes, 2527 codes: codes, 2528 } 2529 select { 2530 case req.deliver <- response: 2531 case <-req.cancel: 2532 case <-req.stale: 2533 } 2534 return nil 2535 } 2536 2537 // OnStorage is a callback method to invoke when ranges of storage slots 2538 // are received from a remote peer. 2539 func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { 2540 // Gather some trace stats to aid in debugging issues 2541 var ( 2542 hashCount int 2543 slotCount int 2544 size common.StorageSize 2545 ) 2546 for _, hashset := range hashes { 2547 size += common.StorageSize(common.HashLength * len(hashset)) 2548 hashCount += len(hashset) 2549 } 2550 for _, slotset := range slots { 2551 for _, slot := range slotset { 2552 size += common.StorageSize(len(slot)) 2553 } 2554 slotCount += len(slotset) 2555 } 2556 for _, node := range proof { 2557 size += common.StorageSize(len(node)) 2558 } 2559 logger := peer.Log().New("reqid", id) 2560 logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) 2561 2562 // Whether or not the response is valid, we can mark the peer as idle and 2563 // notify the scheduler to assign a new task. If the response is invalid, 2564 // we'll drop the peer in a bit. 2565 defer func() { 2566 s.lock.Lock() 2567 defer s.lock.Unlock() 2568 if _, ok := s.peers[peer.ID()]; ok { 2569 s.storageIdlers[peer.ID()] = struct{}{} 2570 } 2571 select { 2572 case s.update <- struct{}{}: 2573 default: 2574 } 2575 }() 2576 s.lock.Lock() 2577 // Ensure the response is for a valid request 2578 req, ok := s.storageReqs[id] 2579 if !ok { 2580 // Request stale, perhaps the peer timed out but came through in the end 2581 logger.Warn("Unexpected storage ranges packet") 2582 s.lock.Unlock() 2583 return nil 2584 } 2585 delete(s.storageReqs, id) 2586 s.rates.Update(peer.ID(), StorageRangesMsg, time.Since(req.time), int(size)) 2587 2588 // Clean up the request timeout timer, we'll see how to proceed further based 2589 // on the actual delivered content 2590 if !req.timeout.Stop() { 2591 // The timeout is already triggered, and this request will be reverted+rescheduled 2592 s.lock.Unlock() 2593 return nil 2594 } 2595 2596 // Reject the response if the hash sets and slot sets don't match, or if the 2597 // peer sent more data than requested. 2598 if len(hashes) != len(slots) { 2599 s.lock.Unlock() 2600 s.scheduleRevertStorageRequest(req) // reschedule request 2601 logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) 2602 return errors.New("hash and slot set size mismatch") 2603 } 2604 if len(hashes) > len(req.accounts) { 2605 s.lock.Unlock() 2606 s.scheduleRevertStorageRequest(req) // reschedule request 2607 logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) 2608 return errors.New("hash set larger than requested") 2609 } 2610 // Response is valid, but check if peer is signalling that it does not have 2611 // the requested data. For storage range queries that means the state being 2612 // retrieved was either already pruned remotely, or the peer is not yet 2613 // synced to our head. 2614 if len(hashes) == 0 { 2615 logger.Debug("Peer rejected storage request") 2616 s.statelessPeers[peer.ID()] = struct{}{} 2617 s.lock.Unlock() 2618 s.scheduleRevertStorageRequest(req) // reschedule request 2619 return nil 2620 } 2621 s.lock.Unlock() 2622 2623 // Reconstruct the partial tries from the response and verify them 2624 var cont bool 2625 2626 for i := 0; i < len(hashes); i++ { 2627 // Convert the keys and proofs into an internal format 2628 keys := make([][]byte, len(hashes[i])) 2629 for j, key := range hashes[i] { 2630 keys[j] = common.CopyBytes(key[:]) 2631 } 2632 nodes := make(light.NodeList, 0, len(proof)) 2633 if i == len(hashes)-1 { 2634 for _, node := range proof { 2635 nodes = append(nodes, node) 2636 } 2637 } 2638 var err error 2639 if len(nodes) == 0 { 2640 // No proof has been attached, the response must cover the entire key 2641 // space and hash to the origin root. 2642 _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) 2643 if err != nil { 2644 s.scheduleRevertStorageRequest(req) // reschedule request 2645 logger.Warn("Storage slots failed proof", "err", err) 2646 return err 2647 } 2648 } else { 2649 // A proof was attached, the response is only partial, check that the 2650 // returned data is indeed part of the storage trie 2651 proofdb := nodes.NodeSet() 2652 2653 var end []byte 2654 if len(keys) > 0 { 2655 end = keys[len(keys)-1] 2656 } 2657 cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) 2658 if err != nil { 2659 s.scheduleRevertStorageRequest(req) // reschedule request 2660 logger.Warn("Storage range failed proof", "err", err) 2661 return err 2662 } 2663 } 2664 } 2665 // Partial tries reconstructed, send them to the scheduler for storage filling 2666 response := &storageResponse{ 2667 mainTask: req.mainTask, 2668 subTask: req.subTask, 2669 accounts: req.accounts, 2670 roots: req.roots, 2671 hashes: hashes, 2672 slots: slots, 2673 cont: cont, 2674 } 2675 select { 2676 case req.deliver <- response: 2677 case <-req.cancel: 2678 case <-req.stale: 2679 } 2680 return nil 2681 } 2682 2683 // OnTrieNodes is a callback method to invoke when a batch of trie nodes 2684 // are received from a remote peer. 2685 func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { 2686 var size common.StorageSize 2687 for _, node := range trienodes { 2688 size += common.StorageSize(len(node)) 2689 } 2690 logger := peer.Log().New("reqid", id) 2691 logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) 2692 2693 // Whether or not the response is valid, we can mark the peer as idle and 2694 // notify the scheduler to assign a new task. If the response is invalid, 2695 // we'll drop the peer in a bit. 2696 defer func() { 2697 s.lock.Lock() 2698 defer s.lock.Unlock() 2699 if _, ok := s.peers[peer.ID()]; ok { 2700 s.trienodeHealIdlers[peer.ID()] = struct{}{} 2701 } 2702 select { 2703 case s.update <- struct{}{}: 2704 default: 2705 } 2706 }() 2707 s.lock.Lock() 2708 // Ensure the response is for a valid request 2709 req, ok := s.trienodeHealReqs[id] 2710 if !ok { 2711 // Request stale, perhaps the peer timed out but came through in the end 2712 logger.Warn("Unexpected trienode heal packet") 2713 s.lock.Unlock() 2714 return nil 2715 } 2716 delete(s.trienodeHealReqs, id) 2717 s.rates.Update(peer.ID(), TrieNodesMsg, time.Since(req.time), len(trienodes)) 2718 2719 // Clean up the request timeout timer, we'll see how to proceed further based 2720 // on the actual delivered content 2721 if !req.timeout.Stop() { 2722 // The timeout is already triggered, and this request will be reverted+rescheduled 2723 s.lock.Unlock() 2724 return nil 2725 } 2726 2727 // Response is valid, but check if peer is signalling that it does not have 2728 // the requested data. For bytecode range queries that means the peer is not 2729 // yet synced. 2730 if len(trienodes) == 0 { 2731 logger.Debug("Peer rejected trienode heal request") 2732 s.statelessPeers[peer.ID()] = struct{}{} 2733 s.lock.Unlock() 2734 2735 // Signal this request as failed, and ready for rescheduling 2736 s.scheduleRevertTrienodeHealRequest(req) 2737 return nil 2738 } 2739 s.lock.Unlock() 2740 2741 // Cross reference the requested trienodes with the response to find gaps 2742 // that the serving node is missing 2743 var ( 2744 hasher = sha3.NewLegacyKeccak256().(crypto.KeccakState) 2745 hash = make([]byte, 32) 2746 nodes = make([][]byte, len(req.hashes)) 2747 fills uint64 2748 ) 2749 for i, j := 0, 0; i < len(trienodes); i++ { 2750 // Find the next hash that we've been served, leaving misses with nils 2751 hasher.Reset() 2752 hasher.Write(trienodes[i]) 2753 hasher.Read(hash) 2754 2755 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2756 j++ 2757 } 2758 if j < len(req.hashes) { 2759 nodes[j] = trienodes[i] 2760 fills++ 2761 j++ 2762 continue 2763 } 2764 // We've either ran out of hashes, or got unrequested data 2765 logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) 2766 2767 // Signal this request as failed, and ready for rescheduling 2768 s.scheduleRevertTrienodeHealRequest(req) 2769 return errors.New("unexpected healing trienode") 2770 } 2771 // Response validated, send it to the scheduler for filling 2772 atomic.AddUint64(&s.trienodeHealPend, fills) 2773 defer func() { 2774 atomic.AddUint64(&s.trienodeHealPend, ^(fills - 1)) 2775 }() 2776 response := &trienodeHealResponse{ 2777 paths: req.paths, 2778 task: req.task, 2779 hashes: req.hashes, 2780 nodes: nodes, 2781 } 2782 select { 2783 case req.deliver <- response: 2784 case <-req.cancel: 2785 case <-req.stale: 2786 } 2787 return nil 2788 } 2789 2790 // onHealByteCodes is a callback method to invoke when a batch of contract 2791 // bytes codes are received from a remote peer in the healing phase. 2792 func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { 2793 var size common.StorageSize 2794 for _, code := range bytecodes { 2795 size += common.StorageSize(len(code)) 2796 } 2797 logger := peer.Log().New("reqid", id) 2798 logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) 2799 2800 // Whether or not the response is valid, we can mark the peer as idle and 2801 // notify the scheduler to assign a new task. If the response is invalid, 2802 // we'll drop the peer in a bit. 2803 defer func() { 2804 s.lock.Lock() 2805 defer s.lock.Unlock() 2806 if _, ok := s.peers[peer.ID()]; ok { 2807 s.bytecodeHealIdlers[peer.ID()] = struct{}{} 2808 } 2809 select { 2810 case s.update <- struct{}{}: 2811 default: 2812 } 2813 }() 2814 s.lock.Lock() 2815 // Ensure the response is for a valid request 2816 req, ok := s.bytecodeHealReqs[id] 2817 if !ok { 2818 // Request stale, perhaps the peer timed out but came through in the end 2819 logger.Warn("Unexpected bytecode heal packet") 2820 s.lock.Unlock() 2821 return nil 2822 } 2823 delete(s.bytecodeHealReqs, id) 2824 s.rates.Update(peer.ID(), ByteCodesMsg, time.Since(req.time), len(bytecodes)) 2825 2826 // Clean up the request timeout timer, we'll see how to proceed further based 2827 // on the actual delivered content 2828 if !req.timeout.Stop() { 2829 // The timeout is already triggered, and this request will be reverted+rescheduled 2830 s.lock.Unlock() 2831 return nil 2832 } 2833 2834 // Response is valid, but check if peer is signalling that it does not have 2835 // the requested data. For bytecode range queries that means the peer is not 2836 // yet synced. 2837 if len(bytecodes) == 0 { 2838 logger.Debug("Peer rejected bytecode heal request") 2839 s.statelessPeers[peer.ID()] = struct{}{} 2840 s.lock.Unlock() 2841 2842 // Signal this request as failed, and ready for rescheduling 2843 s.scheduleRevertBytecodeHealRequest(req) 2844 return nil 2845 } 2846 s.lock.Unlock() 2847 2848 // Cross reference the requested bytecodes with the response to find gaps 2849 // that the serving node is missing 2850 hasher := sha3.NewLegacyKeccak256().(crypto.KeccakState) 2851 hash := make([]byte, 32) 2852 2853 codes := make([][]byte, len(req.hashes)) 2854 for i, j := 0, 0; i < len(bytecodes); i++ { 2855 // Find the next hash that we've been served, leaving misses with nils 2856 hasher.Reset() 2857 hasher.Write(bytecodes[i]) 2858 hasher.Read(hash) 2859 2860 for j < len(req.hashes) && !bytes.Equal(hash, req.hashes[j][:]) { 2861 j++ 2862 } 2863 if j < len(req.hashes) { 2864 codes[j] = bytecodes[i] 2865 j++ 2866 continue 2867 } 2868 // We've either ran out of hashes, or got unrequested data 2869 logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) 2870 // Signal this request as failed, and ready for rescheduling 2871 s.scheduleRevertBytecodeHealRequest(req) 2872 return errors.New("unexpected healing bytecode") 2873 } 2874 // Response validated, send it to the scheduler for filling 2875 response := &bytecodeHealResponse{ 2876 task: req.task, 2877 hashes: req.hashes, 2878 codes: codes, 2879 } 2880 select { 2881 case req.deliver <- response: 2882 case <-req.cancel: 2883 case <-req.stale: 2884 } 2885 return nil 2886 } 2887 2888 // onHealState is a callback method to invoke when a flat state(account 2889 // or storage slot) is downloaded during the healing stage. The flat states 2890 // can be persisted blindly and can be fixed later in the generation stage. 2891 // Note it's not concurrent safe, please handle the concurrent issue outside. 2892 func (s *Syncer) onHealState(paths [][]byte, value []byte) error { 2893 if len(paths) == 1 { 2894 var account types.StateAccount 2895 if err := rlp.DecodeBytes(value, &account); err != nil { 2896 return nil // Returning the error here would drop the remote peer 2897 } 2898 blob := snapshot.SlimAccountRLP(account.Nonce, account.Balance, account.Root, account.CodeHash) 2899 rawdb.WriteAccountSnapshot(s.stateWriter, common.BytesToHash(paths[0]), blob) 2900 s.accountHealed += 1 2901 s.accountHealedBytes += common.StorageSize(1 + common.HashLength + len(blob)) 2902 } 2903 if len(paths) == 2 { 2904 rawdb.WriteStorageSnapshot(s.stateWriter, common.BytesToHash(paths[0]), common.BytesToHash(paths[1]), value) 2905 s.storageHealed += 1 2906 s.storageHealedBytes += common.StorageSize(1 + 2*common.HashLength + len(value)) 2907 } 2908 if s.stateWriter.ValueSize() > ethdb.IdealBatchSize { 2909 s.stateWriter.Write() // It's fine to ignore the error here 2910 s.stateWriter.Reset() 2911 } 2912 return nil 2913 } 2914 2915 // hashSpace is the total size of the 256 bit hash space for accounts. 2916 var hashSpace = new(big.Int).Exp(common.Big2, common.Big256, nil) 2917 2918 // report calculates various status reports and provides it to the user. 2919 func (s *Syncer) report(force bool) { 2920 if len(s.tasks) > 0 { 2921 s.reportSyncProgress(force) 2922 return 2923 } 2924 s.reportHealProgress(force) 2925 } 2926 2927 // reportSyncProgress calculates various status reports and provides it to the user. 2928 func (s *Syncer) reportSyncProgress(force bool) { 2929 // Don't report all the events, just occasionally 2930 if !force && time.Since(s.logTime) < 8*time.Second { 2931 return 2932 } 2933 // Don't report anything until we have a meaningful progress 2934 synced := s.accountBytes + s.bytecodeBytes + s.storageBytes 2935 if synced == 0 { 2936 return 2937 } 2938 accountGaps := new(big.Int) 2939 for _, task := range s.tasks { 2940 accountGaps.Add(accountGaps, new(big.Int).Sub(task.Last.Big(), task.Next.Big())) 2941 } 2942 accountFills := new(big.Int).Sub(hashSpace, accountGaps) 2943 if accountFills.BitLen() == 0 { 2944 return 2945 } 2946 s.logTime = time.Now() 2947 estBytes := float64(new(big.Int).Div( 2948 new(big.Int).Mul(new(big.Int).SetUint64(uint64(synced)), hashSpace), 2949 accountFills, 2950 ).Uint64()) 2951 // Don't report anything until we have a meaningful progress 2952 if estBytes < 1.0 { 2953 return 2954 } 2955 elapsed := time.Since(s.startTime) 2956 estTime := elapsed / time.Duration(synced) * time.Duration(estBytes) 2957 2958 // Create a mega progress report 2959 var ( 2960 progress = fmt.Sprintf("%.2f%%", float64(synced)*100/estBytes) 2961 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountSynced), s.accountBytes.TerminalString()) 2962 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageSynced), s.storageBytes.TerminalString()) 2963 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeSynced), s.bytecodeBytes.TerminalString()) 2964 ) 2965 log.Info("State sync in progress", "synced", progress, "state", synced, 2966 "accounts", accounts, "slots", storage, "codes", bytecode, "eta", common.PrettyDuration(estTime-elapsed)) 2967 } 2968 2969 // reportHealProgress calculates various status reports and provides it to the user. 2970 func (s *Syncer) reportHealProgress(force bool) { 2971 // Don't report all the events, just occasionally 2972 if !force && time.Since(s.logTime) < 8*time.Second { 2973 return 2974 } 2975 s.logTime = time.Now() 2976 2977 // Create a mega progress report 2978 var ( 2979 trienode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.trienodeHealSynced), s.trienodeHealBytes.TerminalString()) 2980 bytecode = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.bytecodeHealSynced), s.bytecodeHealBytes.TerminalString()) 2981 accounts = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.accountHealed), s.accountHealedBytes.TerminalString()) 2982 storage = fmt.Sprintf("%v@%v", log.FormatLogfmtUint64(s.storageHealed), s.storageHealedBytes.TerminalString()) 2983 ) 2984 log.Info("State heal in progress", "accounts", accounts, "slots", storage, 2985 "codes", bytecode, "nodes", trienode, "pending", s.healer.scheduler.Pending()) 2986 } 2987 2988 // estimateRemainingSlots tries to determine roughly how many slots are left in 2989 // a contract storage, based on the number of keys and the last hash. This method 2990 // assumes that the hashes are lexicographically ordered and evenly distributed. 2991 func estimateRemainingSlots(hashes int, last common.Hash) (uint64, error) { 2992 if last == (common.Hash{}) { 2993 return 0, errors.New("last hash empty") 2994 } 2995 space := new(big.Int).Mul(math.MaxBig256, big.NewInt(int64(hashes))) 2996 space.Div(space, last.Big()) 2997 if !space.IsUint64() { 2998 // Gigantic address space probably due to too few or malicious slots 2999 return 0, errors.New("too few slots for estimation") 3000 } 3001 return space.Uint64() - uint64(hashes), nil 3002 } 3003 3004 // capacitySort implements the Sort interface, allowing sorting by peer message 3005 // throughput. Note, callers should use sort.Reverse to get the desired effect 3006 // of highest capacity being at the front. 3007 type capacitySort struct { 3008 ids []string 3009 caps []int 3010 } 3011 3012 func (s *capacitySort) Len() int { 3013 return len(s.ids) 3014 } 3015 3016 func (s *capacitySort) Less(i, j int) bool { 3017 return s.caps[i] < s.caps[j] 3018 } 3019 3020 func (s *capacitySort) Swap(i, j int) { 3021 s.ids[i], s.ids[j] = s.ids[j], s.ids[i] 3022 s.caps[i], s.caps[j] = s.caps[j], s.caps[i] 3023 } 3024 3025 // healRequestSort implements the Sort interface, allowing sorting trienode 3026 // heal requests, which is a prerequisite for merging storage-requests. 3027 type healRequestSort struct { 3028 paths []string 3029 hashes []common.Hash 3030 syncPaths []trie.SyncPath 3031 } 3032 3033 func (t *healRequestSort) Len() int { 3034 return len(t.hashes) 3035 } 3036 3037 func (t *healRequestSort) Less(i, j int) bool { 3038 a := t.syncPaths[i] 3039 b := t.syncPaths[j] 3040 switch bytes.Compare(a[0], b[0]) { 3041 case -1: 3042 return true 3043 case 1: 3044 return false 3045 } 3046 // identical first part 3047 if len(a) < len(b) { 3048 return true 3049 } 3050 if len(b) < len(a) { 3051 return false 3052 } 3053 if len(a) == 2 { 3054 return bytes.Compare(a[1], b[1]) < 0 3055 } 3056 return false 3057 } 3058 3059 func (t *healRequestSort) Swap(i, j int) { 3060 t.paths[i], t.paths[j] = t.paths[j], t.paths[i] 3061 t.hashes[i], t.hashes[j] = t.hashes[j], t.hashes[i] 3062 t.syncPaths[i], t.syncPaths[j] = t.syncPaths[j], t.syncPaths[i] 3063 } 3064 3065 // Merge merges the pathsets, so that several storage requests concerning the 3066 // same account are merged into one, to reduce bandwidth. 3067 // OBS: This operation is moot if t has not first been sorted. 3068 func (t *healRequestSort) Merge() []TrieNodePathSet { 3069 var result []TrieNodePathSet 3070 for _, path := range t.syncPaths { 3071 pathset := TrieNodePathSet([][]byte(path)) 3072 if len(path) == 1 { 3073 // It's an account reference. 3074 result = append(result, pathset) 3075 } else { 3076 // It's a storage reference. 3077 end := len(result) - 1 3078 if len(result) == 0 || !bytes.Equal(pathset[0], result[end][0]) { 3079 // The account doesn't match last, create a new entry. 3080 result = append(result, pathset) 3081 } else { 3082 // It's the same account as the previous one, add to the storage 3083 // paths of that request. 3084 result[end] = append(result[end], pathset[1]) 3085 } 3086 } 3087 } 3088 return result 3089 } 3090 3091 // sortByAccountPath takes hashes and paths, and sorts them. After that, it generates 3092 // the TrieNodePaths and merges paths which belongs to the same account path. 3093 func sortByAccountPath(paths []string, hashes []common.Hash) ([]string, []common.Hash, []trie.SyncPath, []TrieNodePathSet) { 3094 var syncPaths []trie.SyncPath 3095 for _, path := range paths { 3096 syncPaths = append(syncPaths, trie.NewSyncPath([]byte(path))) 3097 } 3098 n := &healRequestSort{paths, hashes, syncPaths} 3099 sort.Sort(n) 3100 pathsets := n.Merge() 3101 return n.paths, n.hashes, n.syncPaths, pathsets 3102 }