github.com/klaytn/klaytn@v1.12.1/datasync/downloader/downloader.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2015 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from eth/downloader/downloader.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package downloader 22 23 import ( 24 "errors" 25 "fmt" 26 "math/big" 27 "sync" 28 "sync/atomic" 29 "time" 30 31 "github.com/klaytn/klaytn" 32 "github.com/klaytn/klaytn/blockchain/types" 33 "github.com/klaytn/klaytn/common" 34 "github.com/klaytn/klaytn/event" 35 "github.com/klaytn/klaytn/log" 36 "github.com/klaytn/klaytn/node/cn/snap" 37 "github.com/klaytn/klaytn/params" 38 "github.com/klaytn/klaytn/reward" 39 "github.com/klaytn/klaytn/snapshot" 40 "github.com/klaytn/klaytn/storage/database" 41 "github.com/klaytn/klaytn/storage/statedb" 42 "github.com/rcrowley/go-metrics" 43 ) 44 45 var ( 46 MaxHashFetch = 512 // Amount of hashes to be fetched per retrieval request 47 MaxBlockFetch = 128 // Amount of blocks to be fetched per retrieval request 48 MaxHeaderFetch = 192 // Amount of block headers to be fetched per retrieval request 49 MaxSkeletonSize = 128 // Number of header fetches to need for a skeleton assembly 50 MaxBodyFetch = 128 // Amount of block bodies to be fetched per retrieval request 51 MaxReceiptFetch = 256 // Amount of transaction receipts to allow fetching per request 52 MaxStakingInfoFetch = 128 // Amount of staking information to allow fetching per request 53 MaxStateFetch = 384 // Amount of node state values to allow fetching per request 54 55 MaxForkAncestry = 3 * params.EpochDuration // Maximum chain reorganisation 56 rttMinEstimate = 2 * time.Second // Minimum round-trip time to target for download requests 57 rttMaxEstimate = 20 * time.Second // Maximum round-trip time to target for download requests 58 rttMinConfidence = 0.1 // Worse confidence factor in our estimated RTT value 59 ttlScaling = 3 // Constant scaling factor for RTT -> TTL conversion 60 ttlLimit = time.Minute // Maximum TTL allowance to prevent reaching crazy timeouts 61 62 qosTuningPeers = 5 // Number of peers to tune based on (best peers) 63 qosConfidenceCap = 10 // Number of peers above which not to modify RTT confidence 64 qosTuningImpact = 0.25 // Impact that a new tuning target has on the previous value 65 66 maxQueuedHeaders = 32 * 1024 // [klay/62] Maximum number of headers to queue for import (DOS protection) 67 maxHeadersProcess = 2048 // Number of header download results to import at once into the chain 68 maxResultsProcess = 2048 // Number of content download results to import at once into the chain 69 70 fsHeaderCheckFrequency = 100 // Verification frequency of the downloaded headers during fast sync 71 fsHeaderSafetyNet = 2048 // Number of headers to discard in case a chain violation is detected 72 fsHeaderForceVerify = 24 // Number of headers to verify before and after the pivot to accept it 73 fsHeaderContCheck = 3 * time.Second // Time interval to check for header continuations during state download 74 fsMinFullBlocks = 64 // Number of blocks to retrieve fully even in fast sync 75 76 logger = log.NewModuleLogger(log.DatasyncDownloader) 77 ) 78 79 var ( 80 errBusy = errors.New("busy") 81 errUnknownPeer = errors.New("peer is unknown or unhealthy") 82 errBadPeer = errors.New("action from bad peer ignored") 83 errStallingPeer = errors.New("peer is stalling") 84 errNoPeers = errors.New("no peers to keep download active") 85 errTimeout = errors.New("timeout") 86 errEmptyHeaderSet = errors.New("empty header set by peer") 87 errPeersUnavailable = errors.New("no peers available or all tried for download") 88 errInvalidAncestor = errors.New("retrieved ancestor is invalid") 89 errInvalidChain = errors.New("retrieved hash chain is invalid") 90 errInvalidBody = errors.New("retrieved block body is invalid") 91 errInvalidReceipt = errors.New("retrieved receipt is invalid") 92 errCancelStateFetch = errors.New("state data download canceled (requested)") 93 errCancelContentProcessing = errors.New("content processing canceled (requested)") 94 errCanceled = errors.New("syncing canceled (requested)") 95 errNoSyncActive = errors.New("no sync active") 96 errTooOld = errors.New("peer doesn't speak recent enough protocol version (need version >= 62)") 97 ) 98 99 type Downloader struct { 100 mode uint32 // Synchronisation mode defining the strategy used (per sync cycle), use d.getMode() to get the SyncMode 101 mux *event.TypeMux // Event multiplexer to announce sync operation events 102 103 isStakingInfoRecovery bool 104 stakingInfoRecoveryTotal int 105 stakingInfoRecoveryCh chan []*reward.StakingInfo 106 stakingInfoRecoveryBlocks []uint64 107 108 queue *queue // Scheduler for selecting the hashes to download 109 peers *peerSet // Set of active peers from which download can proceed 110 111 stateDB database.DBManager // Database to state sync into (and deduplicate via) 112 stateBloom *statedb.SyncBloom // Bloom filter for fast trie node and contract code existence checks 113 114 rttEstimate uint64 // Round trip time to target for download requests 115 rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops) 116 117 // Statistics 118 syncStatsChainOrigin uint64 // Origin block number where syncing started at 119 syncStatsChainHeight uint64 // Highest block number known when syncing started 120 syncStatsState stateSyncStats 121 syncStatsLock sync.RWMutex // Lock protecting the sync stats fields 122 123 lightchain LightChain 124 blockchain BlockChain 125 126 // Callbacks 127 dropPeer peerDropFn // Drops a peer for misbehaving 128 129 // Status 130 synchroniseMock func(id string, hash common.Hash) error // Replacement for synchronise during testing 131 synchronising int32 132 notified int32 133 committed int32 134 135 // Channels 136 headerCh chan dataPack // [klay/62] Channel receiving inbound block headers 137 bodyCh chan dataPack // [klay/62] Channel receiving inbound block bodies 138 receiptCh chan dataPack // [klay/63] Channel receiving inbound receipts 139 stakingInfoCh chan dataPack // [klay/65] Channel receiving inbound staking infos 140 bodyWakeCh chan bool // [klay/62] Channel to signal the block body fetcher of new tasks 141 receiptWakeCh chan bool // [klay/63] Channel to signal the receipt fetcher of new tasks 142 stakingInfoWakeCh chan bool // [klay/65] Channel to signal the staking info fetcher of new tasks 143 headerProcCh chan []*types.Header // [klay/62] Channel to feed the header processor new tasks 144 145 // for snapsyncer 146 snapSync bool // Whether to run state sync over the snap protocol 147 SnapSyncer *snap.Syncer // TODO-Klaytn-Snapsyncer make private! hack for now 148 149 // for stateFetcher 150 pivotHeader *types.Header 151 pivotLock sync.RWMutex 152 153 stateSyncStart chan *stateSync 154 trackStateReq chan *stateReq 155 stateCh chan dataPack // [klay/63] Channel receiving inbound node state data 156 157 // Cancellation and termination 158 cancelPeer string // Identifier of the peer currently being used as the master (cancel on drop) 159 cancelCh chan struct{} // Channel to cancel mid-flight syncs 160 cancelLock sync.RWMutex // Lock to protect the cancel channel and peer in delivers 161 cancelWg sync.WaitGroup // Make sure all fetcher goroutines have exited. 162 163 quitCh chan struct{} // Quit channel to signal termination 164 quitLock sync.RWMutex // Lock to prevent double closes 165 166 // Testing hooks 167 syncInitHook func(uint64, uint64) // Method to call upon initiating a new sync run 168 bodyFetchHook func([]*types.Header) // Method to call upon starting a block body fetch 169 receiptFetchHook func([]*types.Header) // Method to call upon starting a receipt fetch 170 stakingInfoFetchHook func([]*types.Header) // Method to call upon starting a staking info fetch 171 chainInsertHook func([]*fetchResult) // Method to call upon inserting a chain of blocks (possibly in multiple invocations) 172 } 173 174 // LightChain encapsulates functions required to synchronise a light chain. 175 type LightChain interface { 176 // HasHeader verifies a header's presence in the local chain. 177 HasHeader(common.Hash, uint64) bool 178 179 // GetHeaderByHash retrieves a header from the local chain. 180 GetHeaderByHash(common.Hash) *types.Header 181 182 // CurrentHeader retrieves the head header from the local chain. 183 CurrentHeader() *types.Header 184 185 // GetTd returns the total blockscore of a local block. 186 GetTd(common.Hash, uint64) *big.Int 187 188 // InsertHeaderChain inserts a batch of headers into the local chain. 189 InsertHeaderChain([]*types.Header, int) (int, error) 190 191 // Rollback removes a few recently added elements from the local chain. 192 Rollback([]common.Hash) 193 } 194 195 // BlockChain encapsulates functions required to sync a (full or fast) blockchain. 196 type BlockChain interface { 197 LightChain 198 199 // HasBlock verifies a block's presence in the local chain. 200 HasBlock(common.Hash, uint64) bool 201 202 // GetBlockByHash retrieves a block from the local chain. 203 GetBlockByHash(common.Hash) *types.Block 204 205 // CurrentBlock retrieves the head block from the local chain. 206 CurrentBlock() *types.Block 207 208 // CurrentFastBlock retrieves the head fast block from the local chain. 209 CurrentFastBlock() *types.Block 210 211 // FastSyncCommitHead directly commits the head block to a certain entity. 212 FastSyncCommitHead(common.Hash) error 213 214 // InsertChain inserts a batch of blocks into the local chain. 215 InsertChain(types.Blocks) (int, error) 216 217 // InsertReceiptChain inserts a batch of receipts into the local chain. 218 InsertReceiptChain(types.Blocks, []types.Receipts) (int, error) 219 220 // Snapshots returns the blockchain snapshot tree. 221 Snapshots() *snapshot.Tree 222 } 223 224 // New creates a new downloader to fetch hashes and blocks from remote peers. 225 func New(mode SyncMode, stateDB database.DBManager, stateBloom *statedb.SyncBloom, mux *event.TypeMux, chain BlockChain, lightchain LightChain, dropPeer peerDropFn, proposerPolicy uint64) *Downloader { 226 if lightchain == nil { 227 lightchain = chain 228 } 229 230 dl := &Downloader{ 231 mode: uint32(mode), 232 stateDB: stateDB, 233 stateBloom: stateBloom, 234 mux: mux, 235 isStakingInfoRecovery: false, 236 stakingInfoRecoveryBlocks: []uint64{}, 237 queue: newQueue(blockCacheMaxItems, blockCacheInitialItems, proposerPolicy), 238 peers: newPeerSet(), 239 rttEstimate: uint64(rttMaxEstimate), 240 rttConfidence: uint64(1000000), 241 blockchain: chain, 242 lightchain: lightchain, 243 dropPeer: dropPeer, 244 headerCh: make(chan dataPack, 1), 245 bodyCh: make(chan dataPack, 1), 246 receiptCh: make(chan dataPack, 1), 247 stakingInfoCh: make(chan dataPack, 1), 248 bodyWakeCh: make(chan bool, 1), 249 receiptWakeCh: make(chan bool, 1), 250 stakingInfoWakeCh: make(chan bool, 1), 251 headerProcCh: make(chan []*types.Header, 1), 252 quitCh: make(chan struct{}), 253 stateCh: make(chan dataPack), 254 SnapSyncer: snap.NewSyncer(stateDB), 255 stateSyncStart: make(chan *stateSync), 256 syncStatsState: stateSyncStats{ 257 processed: stateDB.ReadFastTrieProgress(), 258 }, 259 trackStateReq: make(chan *stateReq), 260 } 261 go dl.qosTuner() 262 go dl.stateFetcher() 263 return dl 264 } 265 266 // Progress retrieves the synchronisation boundaries, specifically the origin 267 // block where synchronisation started at (may have failed/suspended); the block 268 // or header sync is currently at; and the latest known block which the sync targets. 269 // 270 // In addition, during the state download phase of fast synchronisation the number 271 // of processed and the total number of known states are also returned. Otherwise 272 // these are zero. 273 func (d *Downloader) Progress() klaytn.SyncProgress { 274 // Lock the current stats and return the progress 275 d.syncStatsLock.RLock() 276 defer d.syncStatsLock.RUnlock() 277 278 current := uint64(0) 279 mode := d.getMode() 280 switch mode { 281 case FullSync: 282 current = d.blockchain.CurrentBlock().NumberU64() 283 case SnapSync: 284 current = d.blockchain.CurrentFastBlock().NumberU64() 285 case FastSync: 286 current = d.blockchain.CurrentFastBlock().NumberU64() 287 case LightSync: 288 current = d.lightchain.CurrentHeader().Number.Uint64() 289 } 290 return klaytn.SyncProgress{ 291 StartingBlock: d.syncStatsChainOrigin, 292 CurrentBlock: current, 293 HighestBlock: d.syncStatsChainHeight, 294 PulledStates: d.syncStatsState.processed, 295 KnownStates: d.syncStatsState.processed + d.syncStatsState.pending, 296 } 297 } 298 299 func (d *Downloader) getMode() SyncMode { 300 return SyncMode(atomic.LoadUint32(&d.mode)) 301 } 302 303 // Synchronising returns whether the downloader is currently retrieving blocks. 304 func (d *Downloader) Synchronising() bool { 305 return atomic.LoadInt32(&d.synchronising) > 0 306 } 307 308 // RegisterPeer injects a new download peer into the set of block source to be 309 // used for fetching hashes and blocks from. 310 func (d *Downloader) RegisterPeer(id string, version int, peer Peer) error { 311 localLogger := logger.NewWith("peer", id) 312 localLogger.Trace("Registering sync peer") 313 if err := d.peers.Register(newPeerConnection(id, version, peer, localLogger)); err != nil { 314 localLogger.Error("Failed to register sync peer", "err", err) 315 return err 316 } 317 d.qosReduceConfidence() 318 319 return nil 320 } 321 322 // RegisterLightPeer injects a light client peer, wrapping it so it appears as a regular peer. 323 func (d *Downloader) RegisterLightPeer(id string, version int, peer LightPeer) error { 324 return d.RegisterPeer(id, version, &lightPeerWrapper{peer}) 325 } 326 327 // UnregisterPeer remove a peer from the known list, preventing any action from 328 // the specified peer. An effort is also made to return any pending fetches into 329 // the queue. 330 func (d *Downloader) UnregisterPeer(id string) error { 331 // Unregister the peer from the active peer set and revoke any fetch tasks 332 localLogger := logger.NewWith("peer", id) 333 localLogger.Trace("Unregistering sync peer") 334 if err := d.peers.Unregister(id); err != nil { 335 localLogger.Error("Failed to unregister sync peer", "err", err) 336 return err 337 } 338 d.queue.Revoke(id) 339 340 return nil 341 } 342 343 func (d *Downloader) GetSnapSyncer() *snap.Syncer { 344 return d.SnapSyncer 345 } 346 347 // Synchronise tries to sync up our local block chain with a remote peer, both 348 // adding various sanity checks as well as wrapping it with various logger entries. 349 func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int, mode SyncMode) error { 350 err := d.synchronise(id, head, td, mode) 351 switch err { 352 case nil, errBusy, errCanceled: 353 return err 354 } 355 356 if errors.Is(err, errInvalidChain) { 357 logger.Warn("Synchronisation failed, dropping peer", "peer", id, "err", err) 358 if d.dropPeer == nil { 359 logger.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", id) 360 } else { 361 d.dropPeer(id) 362 } 363 return err 364 } 365 366 switch err { 367 case errTimeout, errBadPeer, errStallingPeer, 368 errEmptyHeaderSet, errPeersUnavailable, errTooOld, 369 errInvalidAncestor: 370 logger.Warn("Synchronisation failed, dropping peer", "peer", id, "err", err) 371 if d.dropPeer == nil { 372 logger.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", id) 373 } else { 374 d.dropPeer(id) 375 } 376 377 default: 378 logger.Warn("Synchronisation failed, retrying", "err", err) 379 } 380 return err 381 } 382 383 // synchronise will select the peer and use it for synchronising. If an empty string is given 384 // it will use the best peer possible and synchronize if its TD is higher than our own. If any of the 385 // checks fail an error will be returned. This method is synchronous 386 func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode SyncMode) error { 387 // Mock out the synchronisation if testing 388 if d.synchroniseMock != nil { 389 return d.synchroniseMock(id, hash) 390 } 391 // TODO-Klaytn-Downloader Below code can be replaced by mutex. 392 // Make sure only one goroutine is ever allowed past this point at once 393 if !atomic.CompareAndSwapInt32(&d.synchronising, 0, 1) { 394 return errBusy 395 } 396 defer atomic.StoreInt32(&d.synchronising, 0) 397 398 // Post a user notification of the sync (only once per session) 399 if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { 400 logger.Info("Block synchronisation started") 401 } 402 // If we are already full syncing, but have a fast-sync bloom filter laying 403 // around, make sure it does't use memory any more. This is a special case 404 // when the user attempts to fast sync a new empty network. 405 if mode == FullSync && d.stateBloom != nil { 406 d.stateBloom.Close() 407 } 408 409 if mode == SnapSync { 410 if !d.snapSync { 411 // Snap sync uses the snapshot namespace to store potentially flakey data until 412 // sync completely heals and finishes. Pause snapshot maintenance in the mean 413 // time to prevent access. 414 if snapshots := d.blockchain.Snapshots(); snapshots != nil { // Only nil in tests 415 snapshots.Disable() 416 logger.Warn("State snapshot is disabled") 417 } 418 logger.Warn("Enabling snapshot sync prototype") 419 d.snapSync = true 420 } 421 } 422 // Reset the queue, peer set and wake channels to clean any internal leftover state 423 d.queue.Reset(blockCacheMaxItems, blockCacheInitialItems) 424 d.peers.Reset() 425 426 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stakingInfoWakeCh} { 427 select { 428 case <-ch: 429 default: 430 } 431 } 432 for _, ch := range []chan dataPack{d.headerCh, d.bodyCh, d.receiptCh, d.stakingInfoCh} { 433 for empty := false; !empty; { 434 select { 435 case <-ch: 436 default: 437 empty = true 438 } 439 } 440 } 441 for empty := false; !empty; { 442 select { 443 case <-d.headerProcCh: 444 default: 445 empty = true 446 } 447 } 448 // Create cancel channel for aborting mid-flight and mark the master peer 449 d.cancelLock.Lock() 450 d.cancelCh = make(chan struct{}) 451 d.cancelPeer = id 452 d.cancelLock.Unlock() 453 454 defer d.Cancel() // No matter what, we can't leave the cancel channel open 455 456 // Atomically set the requested sync mode 457 atomic.StoreUint32(&d.mode, uint32(mode)) 458 459 // Retrieve the origin peer and initiate the downloading process 460 p := d.peers.Peer(id) 461 if p == nil { 462 return errUnknownPeer 463 } 464 return d.syncWithPeer(p, hash, td) 465 } 466 467 // syncWithPeer starts a block synchronization based on the hash chain from the 468 // specified peer and head hash. 469 func (d *Downloader) syncWithPeer(p *peerConnection, hash common.Hash, td *big.Int) (err error) { 470 d.mux.Post(StartEvent{}) 471 defer func() { 472 // reset on error 473 if err != nil { 474 d.mux.Post(FailedEvent{err}) 475 } else { 476 d.mux.Post(DoneEvent{}) 477 } 478 }() 479 if p.version < 62 { 480 return errTooOld 481 } 482 mode := d.getMode() 483 484 logger.Debug("Synchronising with the network", "peer", p.id, "klay", p.version, "head", hash, "td", td, "mode", mode) 485 defer func(start time.Time) { 486 logger.Debug("Synchronisation terminated", "elapsed", time.Since(start)) 487 }(time.Now()) 488 489 // Look up the sync boundaries: the common ancestor and the target block 490 latest, pivot, err := d.fetchHead(p) 491 if err != nil { 492 return err 493 } 494 if (mode == FastSync || mode == SnapSync) && pivot == nil { 495 // If no pivot block was returned, the head is below the min full block 496 // threshold (i.e. new chain). In that case we won't really fast sync 497 // anyway, but still need a valid pivot block to avoid some code hitting 498 // nil panics on an access. 499 pivot = d.blockchain.CurrentBlock().Header() 500 } 501 height := latest.Number.Uint64() 502 503 origin, err := d.findAncestor(p, height) 504 if err != nil { 505 return err 506 } 507 d.syncStatsLock.Lock() 508 if d.syncStatsChainHeight <= origin || d.syncStatsChainOrigin > origin { 509 d.syncStatsChainOrigin = origin 510 } 511 d.syncStatsChainHeight = height 512 d.syncStatsLock.Unlock() 513 514 // Ensure our origin point is below any fast sync pivot point 515 if mode == FastSync || mode == SnapSync { 516 if height <= uint64(fsMinFullBlocks) { 517 origin = 0 518 } else { 519 pivotNumber := pivot.Number.Uint64() 520 if pivotNumber <= origin { 521 origin = pivotNumber - 1 522 } 523 } 524 } 525 d.committed = 1 526 if (mode == FastSync || mode == SnapSync) && pivot.Number.Uint64() != 0 { 527 d.committed = 0 528 } 529 // Initiate the sync using a concurrent header and content retrieval algorithm 530 d.queue.Prepare(origin+1, mode) 531 if d.syncInitHook != nil { 532 d.syncInitHook(origin, height) 533 } 534 535 fetchers := []func() error{ 536 func() error { return d.fetchHeaders(p, origin+1) }, // Headers are always retrieved 537 func() error { return d.fetchBodies(origin + 1) }, // Bodies are retrieved during normal and fast sync 538 func() error { return d.fetchReceipts(origin + 1) }, // Receipts are retrieved during fast sync 539 func() error { return d.fetchStakingInfos(origin + 1) }, // StakingInfos are retrieved during fast sync 540 func() error { return d.processHeaders(origin+1, td) }, 541 } 542 if mode == FastSync || mode == SnapSync { 543 d.pivotLock.Lock() 544 d.pivotHeader = pivot 545 d.pivotLock.Unlock() 546 fetchers = append(fetchers, func() error { return d.processFastSyncContent() }) 547 } else if mode == FullSync { 548 fetchers = append(fetchers, d.processFullSyncContent) 549 } 550 return d.spawnSync(fetchers, p.id) 551 } 552 553 // spawnSync runs d.process and all given fetcher functions to completion in 554 // separate goroutines, returning the first error that appears. 555 func (d *Downloader) spawnSync(fetchers []func() error, peerID string) error { 556 errc := make(chan error, len(fetchers)) 557 d.cancelWg.Add(len(fetchers)) 558 559 logger.Debug("spawnSync started", "peerID", peerID) 560 561 for _, fn := range fetchers { 562 fn := fn 563 go func() { defer d.cancelWg.Done(); errc <- fn() }() 564 } 565 // Wait for the first error, then terminate the others. 566 var err error 567 for i := 0; i < len(fetchers); i++ { 568 if i == len(fetchers)-1 { 569 // Close the queue when all fetchers have exited. 570 // This will cause the block processor to end when 571 // it has processed the queue. 572 d.queue.Close() 573 } 574 if err = <-errc; err != nil && err != errCanceled { 575 break 576 } 577 } 578 579 d.queue.Close() 580 d.Cancel() 581 582 logger.Debug("spawnSync terminated", "peerID", peerID) 583 584 return err 585 } 586 587 func (d *Downloader) SyncStakingInfo(id string, from, to uint64) error { 588 if d.isStakingInfoRecovery { 589 return errors.New("already syncing") 590 } 591 logger.Info("start syncing staking infos", "from", from, "to", to) 592 d.isStakingInfoRecovery = true 593 594 var ( 595 blockNums []uint64 596 blockHashes []common.Hash 597 ) 598 from = params.CalcStakingBlockNumber(from) 599 for i := from; i <= to; i += params.StakingUpdateInterval() { 600 blockHash := d.stateDB.ReadCanonicalHash(i) 601 if blockHash == (common.Hash{}) { 602 d.isStakingInfoRecovery = false 603 return fmt.Errorf("failed to retrieve block hash by number (blockNumber: %v)", i) 604 } 605 has, err := reward.HasStakingInfoFromDB(i) 606 if err != nil { 607 d.isStakingInfoRecovery = false 608 return err 609 } 610 if !has { 611 blockNums = append(blockNums, i) 612 blockHashes = append(blockHashes, blockHash) 613 } 614 } 615 616 if len(blockNums) == 0 && len(blockHashes) == 0 { 617 d.isStakingInfoRecovery = false 618 return fmt.Errorf("there is no staking info to be synced") 619 } 620 621 conn := d.peers.Peer(id) 622 if conn == nil { 623 d.isStakingInfoRecovery = false 624 return errors.New("the given peer is not registered") 625 } 626 627 d.stakingInfoRecoveryBlocks = blockNums 628 d.stakingInfoRecoveryTotal = len(blockNums) 629 d.stakingInfoRecoveryCh = make(chan []*reward.StakingInfo, 1) 630 631 go func() { 632 defer func() { 633 d.isStakingInfoRecovery = false 634 d.stakingInfoRecoveryBlocks = []uint64{} 635 d.stakingInfoRecoveryTotal = 0 636 }() 637 638 fixed := 0 639 for { 640 timer := time.NewTimer(30 * time.Second) 641 if len(blockHashes) > MaxStakingInfoFetch { 642 go conn.peer.RequestStakingInfo(blockHashes[:MaxStakingInfoFetch]) 643 logger.Info("requested staking infos", "num", MaxStakingInfoFetch) 644 blockHashes = blockHashes[MaxStakingInfoFetch:] 645 } else if len(blockHashes) > 0 { 646 go conn.peer.RequestStakingInfo(blockHashes) 647 logger.Info("requested staking infos", "num", len(blockHashes)) 648 blockHashes = []common.Hash{} 649 } else { 650 logger.Error("no more requests, but not completed", "not completed", len(d.stakingInfoRecoveryBlocks)) 651 return 652 } 653 654 select { 655 case <-timer.C: 656 logger.Error("timeout") 657 return 658 case stakingInfos := <-d.stakingInfoRecoveryCh: 659 logger.Info("received stakinginfos", "len", len(stakingInfos)) 660 for _, stakingInfo := range stakingInfos { 661 if d.stakingInfoRecoveryBlocks[0] != stakingInfo.BlockNum { 662 logger.Error("failed to receive expected block", "expected", d.stakingInfoRecoveryBlocks[0], "actual", stakingInfo.BlockNum) 663 return 664 } 665 666 if err := reward.AddStakingInfoToDB(stakingInfo); err != nil { 667 logger.Error("failed to add staking info", "fixed", fixed, "stakingInfo", stakingInfo, "err", err) 668 return 669 } 670 fixed++ 671 d.stakingInfoRecoveryBlocks = d.stakingInfoRecoveryBlocks[1:] 672 } 673 674 if len(d.stakingInfoRecoveryBlocks) == 0 { 675 logger.Info("syncing staking info is finished", "fixed", fixed) 676 return 677 } 678 } 679 } 680 }() 681 return nil 682 } 683 684 type SyncingStatus struct { 685 Syncing bool `json:"syncing"` 686 Pending []uint64 `json:"pending"` 687 Total int `json:"total"` 688 Completed int `json:"completed"` 689 } 690 691 func (d *Downloader) SyncStakingInfoStatus() *SyncingStatus { 692 return &SyncingStatus{ 693 Syncing: d.isStakingInfoRecovery, 694 Pending: d.stakingInfoRecoveryBlocks, 695 Total: d.stakingInfoRecoveryTotal, 696 Completed: d.stakingInfoRecoveryTotal - len(d.stakingInfoRecoveryBlocks), 697 } 698 } 699 700 // cancel aborts all of the operations and resets the queue. However, cancel does 701 // not wait for the running download goroutines to finish. This method should be 702 // used when cancelling the downloads from inside the downloader. 703 func (d *Downloader) cancel() { 704 // Close the current cancel channel 705 d.cancelLock.Lock() 706 if d.cancelCh != nil { 707 select { 708 case <-d.cancelCh: 709 // Channel was already closed 710 default: 711 close(d.cancelCh) 712 } 713 } 714 d.cancelLock.Unlock() 715 } 716 717 // Cancel aborts all of the operations and waits for all download goroutines to 718 // finish before returning. 719 func (d *Downloader) Cancel() { 720 d.cancel() 721 d.cancelWg.Wait() 722 } 723 724 // Terminate interrupts the downloader, canceling all pending operations. 725 // The downloader cannot be reused after calling Terminate. 726 func (d *Downloader) Terminate() { 727 // Close the termination channel (make sure double close is allowed) 728 d.quitLock.Lock() 729 select { 730 case <-d.quitCh: 731 default: 732 close(d.quitCh) 733 } 734 d.quitLock.Unlock() 735 736 // Cancel any pending download requests 737 d.Cancel() 738 } 739 740 // fetchHead retrieves the head header and prior pivot header (if available) from 741 // a remote peer. 742 func (d *Downloader) fetchHead(p *peerConnection) (head *types.Header, pivot *types.Header, err error) { 743 p.logger.Debug("Retrieving remote chain height") 744 mode := d.getMode() 745 746 // Request the advertised remote head block and wait for the response 747 latest, _ := p.peer.Head() 748 fetch := 1 749 if mode == FastSync || mode == SnapSync { 750 fetch = 2 // head + pivot headers 751 } 752 go p.peer.RequestHeadersByHash(latest, fetch, fsMinFullBlocks-1, true) 753 754 ttl := d.requestTTL() 755 timeout := time.After(ttl) 756 for { 757 select { 758 case <-d.cancelCh: 759 return nil, nil, errCanceled 760 761 case packet := <-d.headerCh: 762 // Discard anything not from the origin peer 763 if packet.PeerId() != p.id { 764 logger.Debug("Received headers from incorrect peer", "peer", packet.PeerId()) 765 break 766 } 767 // Make sure the peer gave us at least one and at most the requested headers 768 headers := packet.(*headerPack).headers 769 if len(headers) == 0 || len(headers) > fetch { 770 return nil, nil, fmt.Errorf("%w: returned headers %d != requested %d", errBadPeer, len(headers), fetch) 771 } 772 // The first header needs to be the head, validate against the checkpoint 773 // and request. If only 1 header was returned, make sure there's no pivot 774 // or there was not one requested. 775 head := headers[0] 776 if len(headers) == 1 { 777 if (mode == FastSync || mode == SnapSync) && head.Number.Uint64() > uint64(fsMinFullBlocks) { 778 return nil, nil, fmt.Errorf("%w: no pivot included along head header", errBadPeer) 779 } 780 p.logger.Debug("Remote head identified, no pivot", "number", head.Number, "hash", head.Hash()) 781 return head, nil, nil 782 } 783 // At this point we have 2 headers in total and the first is the 784 // validated head of the chain. Check the pivot number and return. 785 pivot := headers[1] 786 if pivot.Number.Uint64() != head.Number.Uint64()-uint64(fsMinFullBlocks) { 787 return nil, nil, fmt.Errorf("%w: remote pivot %d != requested %d", errInvalidChain, pivot.Number, head.Number.Uint64()-uint64(fsMinFullBlocks)) 788 } 789 return head, pivot, nil 790 791 case <-timeout: 792 p.logger.Debug("Waiting for head header timed out", "elapsed", ttl) 793 return nil, nil, errTimeout 794 795 case <-d.bodyCh: 796 case <-d.receiptCh: 797 case <-d.stakingInfoCh: 798 // Out of bounds delivery, ignore 799 } 800 } 801 } 802 803 // findAncestor tries to locate the common ancestor link of the local chain and 804 // a remote peers blockchain. In the general case when our node was in sync and 805 // on the correct chain, checking the top N links should already get us a match. 806 // In the rare scenario when we ended up on a long reorganisation (i.e. none of 807 // the head links match), we do a binary search to find the common ancestor. 808 func (d *Downloader) findAncestor(p *peerConnection, height uint64) (uint64, error) { 809 // Figure out the valid ancestor range to prevent rewrite attacks 810 floor, ceil := int64(-1), d.lightchain.CurrentHeader().Number.Uint64() 811 812 mode := d.getMode() 813 if mode == FullSync { 814 ceil = d.blockchain.CurrentBlock().NumberU64() 815 } else if mode == FastSync || mode == SnapSync { 816 ceil = d.blockchain.CurrentFastBlock().NumberU64() 817 } 818 if ceil >= MaxForkAncestry { 819 floor = int64(ceil - MaxForkAncestry) 820 } 821 p.logger.Debug("Looking for common ancestor", "local", ceil, "remote", height) 822 823 // Request the topmost blocks to short circuit binary ancestor lookup 824 head := ceil 825 if head > height { 826 head = height 827 } 828 from := int64(head) - int64(MaxHeaderFetch) 829 if from < 0 { 830 from = 0 831 } 832 // Span out with 15 block gaps into the future to catch bad head reports 833 limit := 2 * MaxHeaderFetch / 16 834 count := 1 + int((int64(ceil)-from)/16) 835 if count > limit { 836 count = limit 837 } 838 go p.peer.RequestHeadersByNumber(uint64(from), count, 15, false) 839 840 // Wait for the remote response to the head fetch 841 number, hash := uint64(0), common.Hash{} 842 843 ttl := d.requestTTL() 844 timeout := time.After(ttl) 845 846 for finished := false; !finished; { 847 select { 848 case <-d.cancelCh: 849 return 0, errCanceled 850 851 case packet := <-d.headerCh: 852 // Discard anything not from the origin peer 853 if packet.PeerId() != p.id { 854 logger.Debug("Received headers from incorrect peer", "peer", packet.PeerId()) 855 break 856 } 857 // Make sure the peer actually gave something valid 858 headers := packet.(*headerPack).headers 859 if len(headers) == 0 { 860 p.logger.Error("Empty head header set") 861 return 0, errEmptyHeaderSet 862 } 863 // Make sure the peer's reply conforms to the request 864 for i := 0; i < len(headers); i++ { 865 if number := headers[i].Number.Int64(); number != from+int64(i)*16 { 866 p.logger.Error("Head headers broke chain ordering", "index", i, "requested", from+int64(i)*16, "received", number) 867 return 0, fmt.Errorf("%w: %v", errInvalidChain, errors.New("head headers broke chain ordering")) 868 } 869 } 870 // Check if a common ancestor was found 871 finished = true 872 for i := len(headers) - 1; i >= 0; i-- { 873 // Skip any headers that underflow/overflow our requested set 874 if headers[i].Number.Int64() < from || headers[i].Number.Uint64() > ceil { 875 continue 876 } 877 // Otherwise check if we already know the header or not 878 if (mode == FullSync && d.blockchain.HasBlock(headers[i].Hash(), headers[i].Number.Uint64())) || (mode != FullSync && d.lightchain.HasHeader(headers[i].Hash(), headers[i].Number.Uint64())) { 879 number, hash = headers[i].Number.Uint64(), headers[i].Hash() 880 881 // If every header is known, even future ones, the peer straight out lied about its head 882 if number > height && i == limit-1 { 883 p.logger.Error("Lied about chain head", "reported", height, "found", number) 884 return 0, errStallingPeer 885 } 886 break 887 } 888 } 889 890 case <-timeout: 891 p.logger.Debug("Waiting for head header timed out", "elapsed", ttl) 892 return 0, errTimeout 893 894 case <-d.bodyCh: 895 case <-d.receiptCh: 896 case <-d.stakingInfoCh: 897 // Out of bounds delivery, ignore 898 } 899 } 900 // If the head fetch already found an ancestor, return 901 if !common.EmptyHash(hash) { 902 if int64(number) <= floor { 903 p.logger.Error("Ancestor below allowance", "number", number, "hash", hash, "allowance", floor) 904 return 0, errInvalidAncestor 905 } 906 p.logger.Debug("Found common ancestor", "number", number, "hash", hash) 907 return number, nil 908 } 909 // Ancestor not found, we need to binary search over our chain 910 start, end := uint64(0), head 911 if floor > 0 { 912 start = uint64(floor) 913 } 914 for start+1 < end { 915 // Split our chain interval in two, and request the hash to cross check 916 check := (start + end) / 2 917 918 ttl := d.requestTTL() 919 timeout := time.After(ttl) 920 921 go p.peer.RequestHeadersByNumber(check, 1, 0, false) 922 923 // Wait until a reply arrives to this request 924 for arrived := false; !arrived; { 925 select { 926 case <-d.cancelCh: 927 return 0, errCanceled 928 929 case packer := <-d.headerCh: 930 // Discard anything not from the origin peer 931 if packer.PeerId() != p.id { 932 logger.Debug("Received headers from incorrect peer", "peer", packer.PeerId()) 933 break 934 } 935 // Make sure the peer actually gave something valid 936 headers := packer.(*headerPack).headers 937 if len(headers) != 1 { 938 p.logger.Debug("Multiple headers for single request", "headers", len(headers)) 939 return 0, errBadPeer 940 } 941 arrived = true 942 943 // Modify the search interval based on the response 944 if (mode == FullSync && !d.blockchain.HasBlock(headers[0].Hash(), headers[0].Number.Uint64())) || (mode != FullSync && !d.lightchain.HasHeader(headers[0].Hash(), headers[0].Number.Uint64())) { 945 end = check 946 break 947 } 948 header := d.lightchain.GetHeaderByHash(headers[0].Hash()) // Independent of sync mode, header surely exists 949 if header.Number.Uint64() != check { 950 p.logger.Debug("Received non requested header", "number", header.Number, "hash", header.Hash(), "request", check) 951 return 0, errBadPeer 952 } 953 start = check 954 955 case <-timeout: 956 p.logger.Debug("Waiting for search header timed out", "elapsed", ttl) 957 return 0, errTimeout 958 959 case <-d.bodyCh: 960 case <-d.receiptCh: 961 case <-d.stakingInfoCh: 962 // Out of bounds delivery, ignore 963 } 964 } 965 } 966 // Ensure valid ancestry and return 967 if int64(start) <= floor { 968 p.logger.Warn("Ancestor below allowance", "number", start, "hash", hash, "allowance", floor) 969 return 0, errInvalidAncestor 970 } 971 p.logger.Debug("Found common ancestor", "number", start, "hash", hash) 972 return start, nil 973 } 974 975 // fetchHeaders keeps retrieving headers concurrently from the number 976 // requested, until no more are returned, potentially throttling on the way. To 977 // facilitate concurrency but still protect against malicious nodes sending bad 978 // headers, we construct a header chain skeleton using the "origin" peer we are 979 // syncing with, and fill in the missing headers using anyone else. Headers from 980 // other peers are only accepted if they map cleanly to the skeleton. If no one 981 // can fill in the skeleton - not even the origin peer - it's assumed invalid and 982 // the origin is dropped. 983 func (d *Downloader) fetchHeaders(p *peerConnection, from uint64) error { 984 p.logger.Debug("Directing header downloads", "origin", from) 985 defer func(start time.Time) { 986 p.logger.Debug("Header download terminated", "elapsed", time.Since(start)) 987 }(time.Now()) 988 989 // Create a timeout timer, and the associated header fetcher 990 skeleton := true // Skeleton assembly phase or finishing up 991 pivoting := false 992 request := time.Now() // time of the last skeleton fetch request 993 timeout := time.NewTimer(0) // timer to dump a non-responsive active peer 994 <-timeout.C // timeout channel should be initially empty 995 defer timeout.Stop() 996 997 var ttl time.Duration 998 getHeaders := func(from uint64) { 999 request = time.Now() 1000 1001 ttl = d.requestTTL() 1002 timeout.Reset(ttl) 1003 1004 if skeleton { 1005 p.logger.Trace("Fetching skeleton headers", "count", MaxHeaderFetch, "from", from) 1006 go p.peer.RequestHeadersByNumber(from+uint64(MaxHeaderFetch)-1, MaxSkeletonSize, MaxHeaderFetch-1, false) 1007 } else { 1008 p.logger.Trace("Fetching full headers", "count", MaxHeaderFetch, "from", from) 1009 go p.peer.RequestHeadersByNumber(from, MaxHeaderFetch, 0, false) 1010 } 1011 } 1012 getNextPivot := func() { 1013 pivoting = true 1014 request = time.Now() 1015 1016 ttl = d.requestTTL() 1017 timeout.Reset(ttl) 1018 1019 d.pivotLock.RLock() 1020 pivotNumber := d.pivotHeader.Number.Uint64() 1021 d.pivotLock.RUnlock() 1022 1023 p.logger.Trace("Fetching next pivot header", "number", pivotNumber+uint64(fsMinFullBlocks)) 1024 go p.peer.RequestHeadersByNumber(pivotNumber+uint64(fsMinFullBlocks), 2, fsMinFullBlocks-9, false) // move +64 when it's 2x64-8 deep 1025 } 1026 // Start pulling the header chain skeleton until all is done 1027 getHeaders(from) 1028 1029 for { 1030 select { 1031 case <-d.cancelCh: 1032 return errCanceled 1033 1034 case packet := <-d.headerCh: 1035 // Make sure the active peer is giving us the skeleton headers 1036 if packet.PeerId() != p.id { 1037 logger.Debug("Received skeleton from incorrect peer", "peer", packet.PeerId()) 1038 break 1039 } 1040 headerReqTimer.Update(time.Since(request)) 1041 timeout.Stop() 1042 1043 // If the pivot is being checked, move if it became stale and run the real retrieval 1044 var pivot uint64 1045 1046 d.pivotLock.RLock() 1047 if d.pivotHeader != nil { 1048 pivot = d.pivotHeader.Number.Uint64() 1049 } 1050 d.pivotLock.RUnlock() 1051 1052 if pivoting { 1053 if packet.Items() == 2 { 1054 // Retrieve the headers and do some sanity checks, just in case 1055 headers := packet.(*headerPack).headers 1056 1057 if have, want := headers[0].Number.Uint64(), pivot+uint64(fsMinFullBlocks); have != want { 1058 logger.Warn("Peer sent invalid next pivot", "have", have, "want", want) 1059 return fmt.Errorf("%w: next pivot number %d != requested %d", errInvalidChain, have, want) 1060 } 1061 if have, want := headers[1].Number.Uint64(), pivot+2*uint64(fsMinFullBlocks)-8; have != want { 1062 logger.Warn("Peer sent invalid pivot confirmer", "have", have, "want", want) 1063 return fmt.Errorf("%w: next pivot confirmer number %d != requested %d", errInvalidChain, have, want) 1064 } 1065 logger.Warn("Pivot seemingly stale, moving", "old", pivot, "new", headers[0].Number) 1066 pivot = headers[0].Number.Uint64() 1067 d.pivotLock.Lock() 1068 d.pivotHeader = headers[0] 1069 d.pivotLock.Unlock() 1070 1071 } 1072 pivoting = false 1073 getHeaders(from) 1074 continue 1075 } 1076 1077 // If the skeleton's finished, pull any remaining head headers directly from the origin 1078 if skeleton && packet.Items() == 0 { 1079 skeleton = false 1080 getHeaders(from) 1081 continue 1082 } 1083 // If no more headers are inbound, notify the content fetchers and return 1084 if packet.Items() == 0 { 1085 // Don't abort header fetches while the pivot is downloading 1086 if atomic.LoadInt32(&d.committed) == 0 && pivot <= from { 1087 p.logger.Debug("No headers, waiting for pivot commit") 1088 select { 1089 case <-time.After(fsHeaderContCheck): 1090 getHeaders(from) 1091 continue 1092 case <-d.cancelCh: 1093 return errCanceled 1094 } 1095 } 1096 // Pivot done (or not in fast sync) and no more headers, terminate the process 1097 p.logger.Debug("No more headers available") 1098 select { 1099 case d.headerProcCh <- nil: 1100 return nil 1101 case <-d.cancelCh: 1102 return errCanceled 1103 } 1104 } 1105 headers := packet.(*headerPack).headers 1106 1107 // If we received a skeleton batch, resolve internals concurrently 1108 if skeleton { 1109 filled, proced, err := d.fillHeaderSkeleton(from, headers) 1110 if err != nil { 1111 p.logger.Debug("Skeleton chain invalid", "err", err) 1112 return fmt.Errorf("%w: %v", errInvalidChain, err) 1113 } 1114 headers = filled[proced:] 1115 from += uint64(proced) 1116 } 1117 // Insert all the new headers and fetch the next batch 1118 if len(headers) > 0 { 1119 p.logger.Trace("Scheduling new headers", "count", len(headers), "from", from) 1120 select { 1121 case d.headerProcCh <- headers: 1122 case <-d.cancelCh: 1123 return errCanceled 1124 } 1125 from += uint64(len(headers)) 1126 } 1127 // If we're still skeleton filling fast sync, check pivot staleness 1128 // before continuing to the next skeleton filling 1129 if skeleton && pivot > 0 { 1130 getNextPivot() 1131 } else { 1132 getHeaders(from) 1133 } 1134 1135 case <-timeout.C: 1136 if d.dropPeer == nil { 1137 logger.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", p.id) 1138 break 1139 } 1140 // Header retrieval timed out, consider the peer bad and drop 1141 p.logger.Debug("Header request timed out", "elapsed", ttl) 1142 headerTimeoutMeter.Mark(1) 1143 d.dropPeer(p.id) 1144 1145 // Finish the sync gracefully instead of dumping the gathered data though 1146 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stakingInfoWakeCh} { 1147 select { 1148 case ch <- false: 1149 case <-d.cancelCh: 1150 } 1151 } 1152 select { 1153 case d.headerProcCh <- nil: 1154 case <-d.cancelCh: 1155 } 1156 return errBadPeer 1157 } 1158 } 1159 } 1160 1161 // fillHeaderSkeleton concurrently retrieves headers from all our available peers 1162 // and maps them to the provided skeleton header chain. 1163 // 1164 // Any partial results from the beginning of the skeleton is (if possible) forwarded 1165 // immediately to the header processor to keep the rest of the pipeline full even 1166 // in the case of header stalls. 1167 // 1168 // The method returns the entire filled skeleton and also the number of headers 1169 // already forwarded for processing. 1170 func (d *Downloader) fillHeaderSkeleton(from uint64, skeleton []*types.Header) ([]*types.Header, int, error) { 1171 logger.Debug("Filling up skeleton", "from", from) 1172 d.queue.ScheduleSkeleton(from, skeleton) 1173 1174 var ( 1175 deliver = func(packet dataPack) (int, error) { 1176 pack := packet.(*headerPack) 1177 return d.queue.DeliverHeaders(pack.peerId, pack.headers, d.headerProcCh) 1178 } 1179 expire = func() map[string]int { return d.queue.ExpireHeaders(d.requestTTL()) } 1180 reserve = func(p *peerConnection, count int) (*fetchRequest, bool, bool) { 1181 return d.queue.ReserveHeaders(p, count), false, false 1182 } 1183 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchHeaders(req.From, MaxHeaderFetch) } 1184 capacity = func(p *peerConnection) int { return p.HeaderCapacity(d.requestRTT()) } 1185 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { 1186 p.SetHeadersIdle(accepted, deliveryTime) 1187 } 1188 ) 1189 err := d.fetchParts(d.headerCh, deliver, d.queue.headerContCh, expire, 1190 d.queue.PendingHeaders, d.queue.InFlightHeaders, reserve, 1191 nil, fetch, d.queue.CancelHeaders, capacity, d.peers.HeaderIdlePeers, setIdle, "headers") 1192 1193 logger.Debug("Skeleton fill terminated", "err", err) 1194 1195 filled, proced := d.queue.RetrieveHeaders() 1196 return filled, proced, err 1197 } 1198 1199 // fetchBodies iteratively downloads the scheduled block bodies, taking any 1200 // available peers, reserving a chunk of blocks for each, waiting for delivery 1201 // and also periodically checking for timeouts. 1202 func (d *Downloader) fetchBodies(from uint64) error { 1203 logger.Debug("Downloading block bodies", "origin", from) 1204 1205 start := time.Now() 1206 var ( 1207 deliver = func(packet dataPack) (int, error) { 1208 pack := packet.(*bodyPack) 1209 return d.queue.DeliverBodies(pack.peerId, pack.transactions) 1210 } 1211 expire = func() map[string]int { return d.queue.ExpireBodies(d.requestTTL()) } 1212 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchBodies(req) } 1213 capacity = func(p *peerConnection) int { return p.BlockCapacity(d.requestRTT()) } 1214 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { 1215 p.SetBodiesIdle(accepted, deliveryTime) 1216 } 1217 ) 1218 err := d.fetchParts(d.bodyCh, deliver, d.bodyWakeCh, expire, 1219 d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ReserveBodies, 1220 d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, d.peers.BodyIdlePeers, setIdle, "bodies") 1221 1222 logger.Debug("Block body download terminated", "err", err, "elapsed", time.Since(start)) 1223 return err 1224 } 1225 1226 // fetchReceipts iteratively downloads the scheduled block receipts, taking any 1227 // available peers, reserving a chunk of receipts for each, waiting for delivery 1228 // and also periodically checking for timeouts. 1229 func (d *Downloader) fetchReceipts(from uint64) error { 1230 logger.Debug("Downloading transaction receipts", "origin", from) 1231 1232 start := time.Now() 1233 var ( 1234 deliver = func(packet dataPack) (int, error) { 1235 pack := packet.(*receiptPack) 1236 return d.queue.DeliverReceipts(pack.peerId, pack.receipts) 1237 } 1238 expire = func() map[string]int { return d.queue.ExpireReceipts(d.requestTTL()) } 1239 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchReceipts(req) } 1240 capacity = func(p *peerConnection) int { return p.ReceiptCapacity(d.requestRTT()) } 1241 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { 1242 p.SetReceiptsIdle(accepted, deliveryTime) 1243 } 1244 ) 1245 err := d.fetchParts(d.receiptCh, deliver, d.receiptWakeCh, expire, 1246 d.queue.PendingReceipts, d.queue.InFlightReceipts, d.queue.ReserveReceipts, 1247 d.receiptFetchHook, fetch, d.queue.CancelReceipts, capacity, d.peers.ReceiptIdlePeers, setIdle, "receipts") 1248 1249 logger.Debug("Transaction receipt download terminated", "err", err, "elapsed", time.Since(start)) 1250 return err 1251 } 1252 1253 // fetchStakingInfos iteratively downloads the scheduled staking information, taking any 1254 // available peers, reserving a chunk of staking information for each, waiting for delivery 1255 // and also periodically checking for timeouts. 1256 func (d *Downloader) fetchStakingInfos(from uint64) error { 1257 logger.Debug("Downloading staking information", "origin", from) 1258 1259 start := time.Now() 1260 var ( 1261 deliver = func(packet dataPack) (int, error) { 1262 pack := packet.(*stakingInfoPack) 1263 return d.queue.DeliverStakingInfos(pack.peerId, pack.stakingInfos) 1264 } 1265 expire = func() map[string]int { return d.queue.ExpireStakingInfos(d.requestTTL()) } 1266 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchStakingInfo(req) } 1267 capacity = func(p *peerConnection) int { return p.StakingInfoCapacity(d.requestRTT()) } 1268 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { 1269 p.SetStakingInfoIdle(accepted, deliveryTime) 1270 } 1271 ) 1272 err := d.fetchParts(d.stakingInfoCh, deliver, d.stakingInfoWakeCh, expire, 1273 d.queue.PendingStakingInfos, d.queue.InFlightStakingInfos, d.queue.ReserveStakingInfos, 1274 d.stakingInfoFetchHook, fetch, d.queue.CancelStakingInfo, capacity, d.peers.StakingInfoIdlePeers, setIdle, "stakingInfos") 1275 1276 logger.Debug("Staking information download terminated", "err", err, "elapsed", time.Since(start)) 1277 return err 1278 } 1279 1280 // fetchParts iteratively downloads scheduled block parts, taking any available 1281 // peers, reserving a chunk of fetch requests for each, waiting for delivery and 1282 // also periodically checking for timeouts. 1283 // 1284 // As the scheduling/timeout logic mostly is the same for all downloaded data 1285 // types, this method is used by each for data gathering and is instrumented with 1286 // various callbacks to handle the slight differences between processing them. 1287 // 1288 // The instrumentation parameters: 1289 // - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) 1290 // - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) 1291 // - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) 1292 // - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) 1293 // - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) 1294 // - pending: task callback for the number of requests still needing download (detect completion/non-completability) 1295 // - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) 1296 // - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) 1297 // - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) 1298 // - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) 1299 // - fetch: network callback to actually send a particular download request to a physical remote peer 1300 // - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) 1301 // - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) 1302 // - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks 1303 // - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) 1304 // - kind: textual label of the type being downloaded to display in log mesages 1305 func (d *Downloader) fetchParts(deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool, 1306 expire func() map[string]int, pending func() int, inFlight func() bool, reserve func(*peerConnection, int) (*fetchRequest, bool, bool), 1307 fetchHook func([]*types.Header), fetch func(*peerConnection, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peerConnection) int, 1308 idlePeers func() ([]*peerConnection, int), setIdle func(*peerConnection, int, time.Time), kind string, 1309 ) error { 1310 // Create a ticker to detect expired retrieval tasks 1311 ticker := time.NewTicker(100 * time.Millisecond) 1312 defer ticker.Stop() 1313 1314 update := make(chan struct{}, 1) 1315 1316 // Prepare the queue and fetch block parts until the block header fetcher's done 1317 finished := false 1318 for { 1319 select { 1320 case <-d.cancelCh: 1321 return errCanceled 1322 1323 case packet := <-deliveryCh: 1324 deliveryTime := time.Now() 1325 // If the peer was previously banned and failed to deliver its pack 1326 // in a reasonable time frame, ignore its message. 1327 if peer := d.peers.Peer(packet.PeerId()); peer != nil { 1328 // Deliver the received chunk of data and check chain validity 1329 accepted, err := deliver(packet) 1330 if errors.Is(err, errInvalidChain) { 1331 return err 1332 } 1333 // Unless a peer delivered something completely else than requested (usually 1334 // caused by a timed out request which came through in the end), set it to 1335 // idle. If the delivery's stale, the peer should have already been idled. 1336 if !errors.Is(err, errStaleDelivery) { 1337 setIdle(peer, accepted, deliveryTime) 1338 } 1339 // Issue a log to the user to see what's going on 1340 switch { 1341 case err == nil && packet.Items() == 0: 1342 peer.logger.Trace("Requested data not delivered", "type", kind) 1343 case err == nil: 1344 peer.logger.Trace("Delivered new batch of data", "type", kind, "count", packet.Stats()) 1345 default: 1346 peer.logger.Trace("Failed to deliver retrieved data", "type", kind, "err", err) 1347 } 1348 } 1349 // Blocks assembled, try to update the progress 1350 select { 1351 case update <- struct{}{}: 1352 default: 1353 } 1354 1355 case cont := <-wakeCh: 1356 // The header fetcher sent a continuation flag, check if it's done 1357 if !cont { 1358 finished = true 1359 } 1360 // Headers arrive, try to update the progress 1361 select { 1362 case update <- struct{}{}: 1363 default: 1364 } 1365 1366 case <-ticker.C: 1367 // Sanity check update the progress 1368 select { 1369 case update <- struct{}{}: 1370 default: 1371 } 1372 1373 case <-update: 1374 // Short circuit if we lost all our peers 1375 if d.peers.Len() == 0 { 1376 return errNoPeers 1377 } 1378 // Check for fetch request timeouts and demote the responsible peers 1379 for pid, fails := range expire() { 1380 if peer := d.peers.Peer(pid); peer != nil { 1381 // If a lot of retrieval elements expired, we might have overestimated the remote peer or perhaps 1382 // ourselves. Only reset to minimal throughput but don't drop just yet. If even the minimal times 1383 // out that sync wise we need to get rid of the peer. 1384 // 1385 // The reason the minimum threshold is 2 is because the downloader tries to estimate the bandwidth 1386 // and latency of a peer separately, which requires pushing the measures capacity a bit and seeing 1387 // how response times reacts, to it always requests one more than the minimum (i.e. min 2). 1388 if fails > 2 { 1389 peer.logger.Trace("Data delivery timed out", "type", kind) 1390 setIdle(peer, 0, time.Now()) 1391 } else { 1392 peer.logger.Debug("Stalling delivery, dropping", "type", kind) 1393 1394 if d.dropPeer == nil { 1395 logger.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", pid) 1396 } else { 1397 d.dropPeer(pid) 1398 1399 // If this peer was the master peer, abort sync immediately 1400 d.cancelLock.RLock() 1401 master := pid == d.cancelPeer 1402 d.cancelLock.RUnlock() 1403 1404 if master { 1405 d.cancel() 1406 return errTimeout 1407 } 1408 } 1409 } 1410 } 1411 } 1412 // If there's nothing more to fetch, wait or terminate 1413 if pending() == 0 { 1414 if !inFlight() && finished { 1415 logger.Debug("Data fetching completed", "type", kind) 1416 return nil 1417 } 1418 break 1419 } 1420 // Send a download request to all idle peers, until throttled 1421 progressed, throttled, running := false, false, inFlight() 1422 1423 // numTotalPeers means the number of peers satisfying the protocol requirement. 1424 idlePeers, numTotalPeers := idlePeers() 1425 pendCount := pending() 1426 1427 for _, peer := range idlePeers { 1428 // Short circuit if throttling activated 1429 if throttled { 1430 break 1431 } 1432 // Short circuit if there is no more available task. 1433 if pendCount = pending(); pendCount == 0 { 1434 break 1435 } 1436 // Reserve a chunk of fetches for a peer. A nil can mean either that 1437 // no more headers are available, or that the peer is known not to 1438 // have them. 1439 request, progress, throttle := reserve(peer, capacity(peer)) 1440 if progress { 1441 progressed = true 1442 } 1443 if throttle { 1444 throttled = true 1445 throttleCounter.Inc(1) 1446 } 1447 if request == nil { 1448 continue 1449 } 1450 if request.From > 0 { 1451 peer.logger.Trace("Requesting new batch of data", "type", kind, "from", request.From) 1452 } else { 1453 peer.logger.Trace("Requesting new batch of data", "type", kind, "count", len(request.Headers), "from", request.Headers[0].Number) 1454 } 1455 // Fetch the chunk and make sure any errors return the hashes to the queue 1456 if fetchHook != nil { 1457 fetchHook(request.Headers) 1458 } 1459 if err := fetch(peer, request); err != nil { 1460 // Although we could try and make an attempt to fix this, this error really 1461 // means that we've double allocated a fetch task to a peer. If that is the 1462 // case, the internal state of the downloader and the queue is very wrong so 1463 // better hard crash and note the error instead of silently accumulating into 1464 // a much bigger issue. 1465 panic(fmt.Sprintf("%v: %s fetch assignment failed", peer, kind)) 1466 } 1467 running = true 1468 } 1469 // Make sure that we have peers available for fetching. If all peers have been tried 1470 // and all failed throw an error 1471 if !progressed && !throttled && !running && len(idlePeers) == numTotalPeers && pendCount > 0 { 1472 return errPeersUnavailable 1473 } 1474 } 1475 } 1476 } 1477 1478 // processHeaders takes batches of retrieved headers from an input channel and 1479 // keeps processing and scheduling them into the header chain and downloader's 1480 // queue until the stream ends or a failure occurs. 1481 func (d *Downloader) processHeaders(origin uint64, td *big.Int) error { 1482 logger.Debug("Processing headers", "origin", origin, "td", td) 1483 defer func(start time.Time) { 1484 logger.Debug("Processing headers terminated", "origin", origin, "td", td, "elapsed", time.Since(start)) 1485 }(time.Now()) 1486 // Keep a count of uncertain headers to roll back 1487 var ( 1488 rollback []*types.Header 1489 rollbackErr error 1490 mode = d.getMode() 1491 ) 1492 defer func() { 1493 if len(rollback) > 0 { 1494 // Flatten the headers and roll them back 1495 hashes := make([]common.Hash, len(rollback)) 1496 for i, header := range rollback { 1497 hashes[i] = header.Hash() 1498 } 1499 lastHeader, lastFastBlock, lastBlock := d.lightchain.CurrentHeader().Number, common.Big0, common.Big0 1500 if mode != LightSync { 1501 lastFastBlock = d.blockchain.CurrentFastBlock().Number() 1502 lastBlock = d.blockchain.CurrentBlock().Number() 1503 } 1504 d.lightchain.Rollback(hashes) 1505 curFastBlock, curBlock := common.Big0, common.Big0 1506 if mode != LightSync { 1507 curFastBlock = d.blockchain.CurrentFastBlock().Number() 1508 curBlock = d.blockchain.CurrentBlock().Number() 1509 } 1510 logger.Info("Rolled back headers", "count", len(hashes), 1511 "header", fmt.Sprintf("%d->%d", lastHeader, d.lightchain.CurrentHeader().Number), 1512 "fast", fmt.Sprintf("%d->%d", lastFastBlock, curFastBlock), 1513 "block", fmt.Sprintf("%d->%d", lastBlock, curBlock), "reason", rollbackErr) 1514 } 1515 }() 1516 1517 // Wait for batches of headers to process 1518 gotHeaders := false 1519 1520 for { 1521 select { 1522 case <-d.cancelCh: 1523 rollbackErr = errCanceled 1524 return errCanceled 1525 1526 case headers := <-d.headerProcCh: 1527 // Terminate header processing if we synced up 1528 if len(headers) == 0 { 1529 // Notify everyone that headers are fully processed 1530 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stakingInfoWakeCh} { 1531 select { 1532 case ch <- false: 1533 case <-d.cancelCh: 1534 } 1535 } 1536 // If no headers were retrieved at all, the peer violated its TD promise that it had a 1537 // better chain compared to ours. The only exception is if its promised blocks were 1538 // already imported by other means (e.g. fecher): 1539 // 1540 // R <remote peer>, L <local node>: Both at block 10 1541 // R: Mine block 11, and propagate it to L 1542 // L: Queue block 11 for import 1543 // L: Notice that R's head and TD increased compared to ours, start sync 1544 // L: Import of block 11 finishes 1545 // L: Sync begins, and finds common ancestor at 11 1546 // L: Request new headers up from 11 (R's TD was higher, it must have something) 1547 // R: Nothing to give 1548 if mode != LightSync { 1549 head := d.blockchain.CurrentBlock() 1550 if !gotHeaders && td.Cmp(d.blockchain.GetTd(head.Hash(), head.NumberU64())) > 0 { 1551 return errStallingPeer 1552 } 1553 } 1554 // If fast or light syncing, ensure promised headers are indeed delivered. This is 1555 // needed to detect scenarios where an attacker feeds a bad pivot and then bails out 1556 // of delivering the post-pivot blocks that would flag the invalid content. 1557 // 1558 // This check cannot be executed "as is" for full imports, since blocks may still be 1559 // queued for processing when the header download completes. However, as long as the 1560 // peer gave us something useful, we're already happy/progressed (above check). 1561 if mode == SnapSync || mode == FastSync || mode == LightSync { 1562 head := d.lightchain.CurrentHeader() 1563 if td.Cmp(d.lightchain.GetTd(head.Hash(), head.Number.Uint64())) > 0 { 1564 return errStallingPeer 1565 } 1566 } 1567 // Disable any rollback and return 1568 rollback = nil 1569 return nil 1570 } 1571 // Otherwise split the chunk of headers into batches and process them 1572 gotHeaders = true 1573 1574 for len(headers) > 0 { 1575 // Terminate if something failed in between processing chunks 1576 select { 1577 case <-d.cancelCh: 1578 rollbackErr = errCanceled 1579 return errCanceled 1580 default: 1581 } 1582 // Select the next chunk of headers to import 1583 limit := maxHeadersProcess 1584 if limit > len(headers) { 1585 limit = len(headers) 1586 } 1587 chunk := headers[:limit] 1588 1589 // In case of header only syncing, validate the chunk immediately 1590 if mode == SnapSync || mode == FastSync || mode == LightSync { 1591 // Collect the yet unknown headers to mark them as uncertain 1592 unknown := make([]*types.Header, 0, len(headers)) 1593 for _, header := range chunk { 1594 if !d.lightchain.HasHeader(header.Hash(), header.Number.Uint64()) { 1595 unknown = append(unknown, header) 1596 } 1597 } 1598 // If we're importing pure headers, verify based on their recentness 1599 var pivot uint64 1600 1601 d.pivotLock.RLock() 1602 if d.pivotHeader != nil { 1603 pivot = d.pivotHeader.Number.Uint64() 1604 } 1605 d.pivotLock.RUnlock() 1606 frequency := fsHeaderCheckFrequency 1607 if chunk[len(chunk)-1].Number.Uint64()+uint64(fsHeaderForceVerify) > pivot { 1608 frequency = 1 1609 } 1610 if n, err := d.lightchain.InsertHeaderChain(chunk, frequency); err != nil { 1611 rollbackErr = err 1612 // If some headers were inserted, add them too to the rollback list 1613 if n > 0 { 1614 rollback = append(rollback, chunk[:n]...) 1615 } 1616 logger.Debug("Invalid header encountered", "number", chunk[n].Number, "hash", chunk[n].Hash(), "parent", chunk[n].ParentHash, "err", err) 1617 return fmt.Errorf("%w: %v", errInvalidChain, err) 1618 } 1619 // All verifications passed, store newly found uncertain headers 1620 rollback = append(rollback, unknown...) 1621 if len(rollback) > fsHeaderSafetyNet { 1622 rollback = append(rollback[:0], rollback[len(rollback)-fsHeaderSafetyNet:]...) 1623 } 1624 } 1625 // Unless we're doing light chains, schedule the headers for associated content retrieval 1626 if mode == FullSync || mode == SnapSync || mode == FastSync { 1627 // If we've reached the allowed number of pending headers, stall a bit 1628 for d.queue.PendingBlocks() >= maxQueuedHeaders || d.queue.PendingReceipts() >= maxQueuedHeaders || 1629 d.queue.PendingStakingInfos() >= maxQueuedHeaders { 1630 select { 1631 case <-d.cancelCh: 1632 rollbackErr = errCanceled 1633 return errCanceled 1634 case <-time.After(time.Second): 1635 } 1636 } 1637 // Otherwise insert the headers for content retrieval 1638 inserts := d.queue.Schedule(chunk, origin) 1639 if len(inserts) != len(chunk) { 1640 rollbackErr = fmt.Errorf("stale headers: len(inserts) - %v, len(chunk) - %v", len(inserts), len(chunk)) 1641 return errBadPeer 1642 } 1643 } 1644 headers = headers[limit:] 1645 origin += uint64(limit) 1646 } 1647 1648 // Update the highest block number we know if a higher one is found. 1649 d.syncStatsLock.Lock() 1650 if d.syncStatsChainHeight < origin { 1651 d.syncStatsChainHeight = origin - 1 1652 } 1653 d.syncStatsLock.Unlock() 1654 1655 // Signal the content downloaders of the availablility of new tasks 1656 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stakingInfoWakeCh} { 1657 select { 1658 case ch <- true: 1659 default: 1660 } 1661 } 1662 } 1663 } 1664 } 1665 1666 // processFullSyncContent takes fetch results from the queue and imports them into the chain. 1667 func (d *Downloader) processFullSyncContent() error { 1668 logger.Debug("Processing full sync content") 1669 defer func(start time.Time) { 1670 logger.Debug("Processing full sync content terminated", "elapsed", time.Since(start)) 1671 }(time.Now()) 1672 for { 1673 results := d.queue.Results(true) 1674 if len(results) == 0 { 1675 return nil 1676 } 1677 if d.chainInsertHook != nil { 1678 d.chainInsertHook(results) 1679 } 1680 if err := d.importBlockResults(results); err != nil { 1681 return err 1682 } 1683 } 1684 } 1685 1686 func (d *Downloader) importBlockResults(results []*fetchResult) error { 1687 // Check for any early termination requests 1688 if len(results) == 0 { 1689 return nil 1690 } 1691 select { 1692 case <-d.quitCh: 1693 return errCancelContentProcessing 1694 default: 1695 } 1696 // Retrieve the a batch of results to import 1697 first, last := results[0].Header, results[len(results)-1].Header 1698 logger.Debug("Inserting downloaded chain", "items", len(results), 1699 "firstnum", first.Number, "firsthash", first.Hash(), 1700 "lastnum", last.Number, "lasthash", last.Hash(), 1701 ) 1702 blocks := make([]*types.Block, len(results)) 1703 for i, result := range results { 1704 blocks[i] = types.NewBlockWithHeader(result.Header).WithBody(result.Transactions) 1705 } 1706 1707 for _, block := range blocks { 1708 select { 1709 case <-d.cancelCh: 1710 return errCanceled 1711 default: 1712 if _, err := d.blockchain.InsertChain(types.Blocks{block}); err != nil { 1713 logger.Debug("Downloaded item processing failed", "number", block.Number(), "hash", block.Hash(), "err", err) 1714 return fmt.Errorf("%w: %v", errInvalidChain, err) 1715 } 1716 } 1717 } 1718 return nil 1719 } 1720 1721 // processFastSyncContent takes fetch results from the queue and writes them to the 1722 // database. It also controls the synchronisation of state nodes of the pivot block. 1723 func (d *Downloader) processFastSyncContent() error { 1724 logger.Debug("Processing fast sync content") 1725 defer func(start time.Time) { 1726 logger.Debug("Processing fast sync content terminated", "elapsed", time.Since(start)) 1727 }(time.Now()) 1728 // Start syncing state of the reported head block. This should get us most of 1729 // the state of the pivot block. 1730 d.pivotLock.RLock() 1731 sync := d.syncState(d.pivotHeader.Root) 1732 d.pivotLock.RUnlock() 1733 defer func() { 1734 // The `sync` object is replaced every time the pivot moves. We need to 1735 // defer close the very last active one, hence the lazy evaluation vs. 1736 // calling defer sync.Cancel() !!! 1737 sync.Cancel() 1738 }() 1739 closeOnErr := func(s *stateSync) { 1740 if err := s.Wait(); err != nil && err != errCancelStateFetch && err != errCanceled && err != snap.ErrCancelled { 1741 d.queue.Close() // wake up WaitResults 1742 } 1743 } 1744 go closeOnErr(sync) 1745 // To cater for moving pivot points, track the pivot block and subsequently 1746 // accumulated download results separately. 1747 var ( 1748 oldPivot *fetchResult // Locked in pivot block, might change eventually 1749 oldTail []*fetchResult // Downloaded content after the pivot 1750 ) 1751 for { 1752 // Wait for the next batch of downloaded data to be available, and if the pivot 1753 // block became stale, move the goalpost 1754 results := d.queue.Results(oldPivot == nil) // Block if we're not monitoring pivot staleness 1755 if len(results) == 0 { 1756 // If pivot sync is done, stop 1757 if oldPivot == nil { 1758 return sync.Cancel() 1759 } 1760 // If sync failed, stop 1761 select { 1762 case <-d.cancelCh: 1763 sync.Cancel() 1764 return errCanceled 1765 default: 1766 } 1767 } 1768 if d.chainInsertHook != nil { 1769 d.chainInsertHook(results) 1770 } 1771 // If we haven't downloaded the pivot block yet, check pivot staleness 1772 // notifications from the header downloader 1773 d.pivotLock.RLock() 1774 pivot := d.pivotHeader 1775 d.pivotLock.RUnlock() 1776 1777 if oldPivot == nil { 1778 if pivot.Root != sync.root { 1779 sync.Cancel() 1780 sync = d.syncState(pivot.Root) 1781 1782 go closeOnErr(sync) 1783 } 1784 } else { 1785 results = append(append([]*fetchResult{oldPivot}, oldTail...), results...) 1786 } 1787 // Split around the pivot block and process the two sides via fast/full sync 1788 if atomic.LoadInt32(&d.committed) == 0 { 1789 latest := results[len(results)-1].Header 1790 // If the height is above the pivot block by 2 sets, it means the pivot 1791 // become stale in the network and it was garbage collected, move to a 1792 // new pivot. 1793 if height := latest.Number.Uint64(); height >= pivot.Number.Uint64()+2*uint64(fsMinFullBlocks) { 1794 logger.Warn("Pivot became stale, moving", "old", pivot.Number.Uint64(), "new", height-uint64(fsMinFullBlocks)) 1795 pivot = results[len(results)-1-fsMinFullBlocks].Header // must exist as lower old pivot is uncommitted 1796 1797 d.pivotLock.Lock() 1798 d.pivotHeader = pivot 1799 d.pivotLock.Unlock() 1800 } 1801 } 1802 P, beforeP, afterP := splitAroundPivot(pivot.Number.Uint64(), results) 1803 if err := d.commitFastSyncData(beforeP, sync); err != nil { 1804 return err 1805 } 1806 if P != nil { 1807 // If new pivot block found, cancel old state retrieval and restart 1808 if oldPivot != P { 1809 sync.Cancel() 1810 1811 sync = d.syncState(P.Header.Root) 1812 go closeOnErr(sync) 1813 oldPivot = P 1814 } 1815 // Wait for completion, occasionally checking for pivot staleness 1816 select { 1817 case <-sync.done: 1818 if sync.err != nil { 1819 return sync.err 1820 } 1821 if err := d.commitPivotBlock(P); err != nil { 1822 return err 1823 } 1824 oldPivot = nil 1825 1826 case <-time.After(time.Second): 1827 oldTail = afterP 1828 continue 1829 } 1830 } 1831 // Fast sync done, pivot commit done, full import 1832 if err := d.importBlockResults(afterP); err != nil { 1833 return err 1834 } 1835 } 1836 } 1837 1838 func splitAroundPivot(pivot uint64, results []*fetchResult) (p *fetchResult, before, after []*fetchResult) { 1839 for _, result := range results { 1840 num := result.Header.Number.Uint64() 1841 switch { 1842 case num < pivot: 1843 before = append(before, result) 1844 case num == pivot: 1845 p = result 1846 default: 1847 after = append(after, result) 1848 } 1849 } 1850 return p, before, after 1851 } 1852 1853 func (d *Downloader) commitFastSyncData(results []*fetchResult, stateSync *stateSync) error { 1854 // Check for any early termination requests 1855 if len(results) == 0 { 1856 return nil 1857 } 1858 select { 1859 case <-d.quitCh: 1860 return errCancelContentProcessing 1861 case <-stateSync.done: 1862 if err := stateSync.Wait(); err != nil { 1863 return err 1864 } 1865 default: 1866 } 1867 // Retrieve the a batch of results to import 1868 first, last := results[0].Header, results[len(results)-1].Header 1869 logger.Debug("Inserting fast-sync blocks", "items", len(results), 1870 "firstnum", first.Number, "firsthash", first.Hash(), 1871 "lastnumn", last.Number, "lasthash", last.Hash(), 1872 ) 1873 blocks := make([]*types.Block, len(results)) 1874 receipts := make([]types.Receipts, len(results)) 1875 for i, result := range results { 1876 blocks[i] = types.NewBlockWithHeader(result.Header).WithBody(result.Transactions) 1877 receipts[i] = result.Receipts 1878 if result.StakingInfo != nil { 1879 if err := reward.AddStakingInfoToDB(result.StakingInfo); err != nil { 1880 logger.Error("Inserting downloaded staking info is failed", "err", err) 1881 return fmt.Errorf("failed to insert the downloaded staking information: %v", err) 1882 } else { 1883 logger.Info("Imported new staking information", "number", result.StakingInfo.BlockNum) 1884 } 1885 } 1886 } 1887 if index, err := d.blockchain.InsertReceiptChain(blocks, receipts); err != nil { 1888 logger.Debug("Downloaded item processing failed", "number", results[index].Header.Number, "hash", results[index].Header.Hash(), "err", err) 1889 return fmt.Errorf("%w: %v", errInvalidChain, err) 1890 } 1891 return nil 1892 } 1893 1894 func (d *Downloader) commitPivotBlock(result *fetchResult) error { 1895 block := types.NewBlockWithHeader(result.Header).WithBody(result.Transactions) 1896 logger.Debug("Committing fast sync pivot as new head", "number", block.Number(), "hash", block.Hash()) 1897 if result.StakingInfo != nil { 1898 if err := reward.AddStakingInfoToDB(result.StakingInfo); err != nil { 1899 logger.Error("Inserting downloaded staking info is failed on pivot block", "err", err, "pivot", block.Number()) 1900 return fmt.Errorf("failed to insert the downloaded staking information on pivot block (%v) : %v", block.Number(), err) 1901 } else { 1902 logger.Info("Imported new staking information on pivot block", "number", result.StakingInfo.BlockNum, "pivot", block.Number()) 1903 } 1904 } 1905 if _, err := d.blockchain.InsertReceiptChain([]*types.Block{block}, []types.Receipts{result.Receipts}); err != nil { 1906 return err 1907 } 1908 if err := d.blockchain.FastSyncCommitHead(block.Hash()); err != nil { 1909 return err 1910 } 1911 atomic.StoreInt32(&d.committed, 1) 1912 1913 // If we had a bloom filter for the state sync, deallocate it now. Note, we only 1914 // deallocate internally, but keep the empty wrapper. This ensures that if we do 1915 // a rollback after committing the pivot and restarting fast sync, we don't end 1916 // up using a nil bloom. Empty bloom is fine, it just returns that it does not 1917 // have the info we need, so reach down to the database instead. 1918 if d.stateBloom != nil { 1919 d.stateBloom.Close() 1920 } 1921 return nil 1922 } 1923 1924 // DeliverHeaders injects a new batch of block headers received from a remote 1925 // node into the download schedule. 1926 func (d *Downloader) DeliverHeaders(id string, headers []*types.Header) (err error) { 1927 return d.deliver(id, d.headerCh, &headerPack{id, headers}, headerInMeter, headerDropMeter) 1928 } 1929 1930 // DeliverBodies injects a new batch of block bodies received from a remote node. 1931 func (d *Downloader) DeliverBodies(id string, transactions [][]*types.Transaction) (err error) { 1932 return d.deliver(id, d.bodyCh, &bodyPack{id, transactions}, bodyInMeter, bodyDropMeter) 1933 } 1934 1935 // DeliverReceipts injects a new batch of receipts received from a remote node. 1936 func (d *Downloader) DeliverReceipts(id string, receipts [][]*types.Receipt) (err error) { 1937 return d.deliver(id, d.receiptCh, &receiptPack{id, receipts}, receiptInMeter, receiptDropMeter) 1938 } 1939 1940 // DeliverStakingInfos injects a new batch of staking information received from a remote node. 1941 func (d *Downloader) DeliverStakingInfos(id string, stakingInfos []*reward.StakingInfo) error { 1942 if d.isStakingInfoRecovery { 1943 d.stakingInfoRecoveryCh <- stakingInfos 1944 } 1945 return d.deliver(id, d.stakingInfoCh, &stakingInfoPack{id, stakingInfos}, stakingInfoInMeter, stakingInfoDropMeter) 1946 } 1947 1948 // DeliverNodeData injects a new batch of node state data received from a remote node. 1949 func (d *Downloader) DeliverNodeData(id string, data [][]byte) (err error) { 1950 return d.deliver(id, d.stateCh, &statePack{id, data}, stateInMeter, stateDropMeter) 1951 } 1952 1953 // DeliverSnapPacket is invoked from a peer's message handler when it transmits a 1954 // data packet for the local node to consume. 1955 func (d *Downloader) DeliverSnapPacket(peer *snap.Peer, packet snap.Packet) error { 1956 switch packet := packet.(type) { 1957 case *snap.AccountRangePacket: 1958 hashes, accounts := packet.Unpack() 1959 return d.SnapSyncer.OnAccounts(peer, packet.ID, hashes, accounts, packet.Proof) 1960 1961 case *snap.StorageRangesPacket: 1962 hashset, slotset := packet.Unpack() 1963 return d.SnapSyncer.OnStorage(peer, packet.ID, hashset, slotset, packet.Proof) 1964 1965 case *snap.ByteCodesPacket: 1966 return d.SnapSyncer.OnByteCodes(peer, packet.ID, packet.Codes) 1967 1968 case *snap.TrieNodesPacket: 1969 return d.SnapSyncer.OnTrieNodes(peer, packet.ID, packet.Nodes) 1970 1971 default: 1972 return fmt.Errorf("unexpected snap packet type: %T", packet) 1973 } 1974 } 1975 1976 // deliver injects a new batch of data received from a remote node. 1977 func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { 1978 // Update the delivery metrics for both good and failed deliveries 1979 inMeter.Mark(int64(packet.Items())) 1980 defer func() { 1981 if err != nil { 1982 dropMeter.Mark(int64(packet.Items())) 1983 } 1984 }() 1985 // Deliver or abort if the sync is canceled while queuing 1986 d.cancelLock.RLock() 1987 cancel := d.cancelCh 1988 d.cancelLock.RUnlock() 1989 if cancel == nil { 1990 return errNoSyncActive 1991 } 1992 select { 1993 case destCh <- packet: 1994 return nil 1995 case <-cancel: 1996 return errNoSyncActive 1997 } 1998 } 1999 2000 // qosTuner is the quality of service tuning loop that occasionally gathers the 2001 // peer latency statistics and updates the estimated request round trip time. 2002 func (d *Downloader) qosTuner() { 2003 for { 2004 // Retrieve the current median RTT and integrate into the previoust target RTT 2005 rtt := time.Duration((1-qosTuningImpact)*float64(atomic.LoadUint64(&d.rttEstimate)) + qosTuningImpact*float64(d.peers.medianRTT())) 2006 atomic.StoreUint64(&d.rttEstimate, uint64(rtt)) 2007 2008 // A new RTT cycle passed, increase our confidence in the estimated RTT 2009 conf := atomic.LoadUint64(&d.rttConfidence) 2010 conf = conf + (1000000-conf)/2 2011 atomic.StoreUint64(&d.rttConfidence, conf) 2012 2013 // Log the new QoS values and sleep until the next RTT 2014 logger.Debug("Recalculated downloader QoS values", "rtt", rtt, "confidence", float64(conf)/1000000.0, "ttl", d.requestTTL()) 2015 select { 2016 case <-d.quitCh: 2017 return 2018 case <-time.After(rtt): 2019 } 2020 } 2021 } 2022 2023 // qosReduceConfidence is meant to be called when a new peer joins the downloader's 2024 // peer set, needing to reduce the confidence we have in out QoS estimates. 2025 func (d *Downloader) qosReduceConfidence() { 2026 // If we have a single peer, confidence is always 1 2027 peers := uint64(d.peers.Len()) 2028 if peers == 0 { 2029 // Ensure peer connectivity races don't catch us off guard 2030 return 2031 } 2032 if peers == 1 { 2033 atomic.StoreUint64(&d.rttConfidence, 1000000) 2034 return 2035 } 2036 // If we have a ton of peers, don't drop confidence) 2037 if peers >= uint64(qosConfidenceCap) { 2038 return 2039 } 2040 // Otherwise drop the confidence factor 2041 conf := atomic.LoadUint64(&d.rttConfidence) * (peers - 1) / peers 2042 if float64(conf)/1000000 < rttMinConfidence { 2043 conf = uint64(rttMinConfidence * 1000000) 2044 } 2045 atomic.StoreUint64(&d.rttConfidence, conf) 2046 2047 rtt := time.Duration(atomic.LoadUint64(&d.rttEstimate)) 2048 logger.Debug("Relaxed downloader QoS values", "rtt", rtt, "confidence", float64(conf)/1000000.0, "ttl", d.requestTTL()) 2049 } 2050 2051 // requestRTT returns the current target round trip time for a download request 2052 // to complete in. 2053 // 2054 // Note, the returned RTT is .9 of the actually estimated RTT. The reason is that 2055 // the downloader tries to adapt queries to the RTT, so multiple RTT values can 2056 // be adapted to, but smaller ones are preferred (stabler download stream). 2057 func (d *Downloader) requestRTT() time.Duration { 2058 return time.Duration(atomic.LoadUint64(&d.rttEstimate)) * 9 / 10 2059 } 2060 2061 // requestTTL returns the current timeout allowance for a single download request 2062 // to finish under. 2063 func (d *Downloader) requestTTL() time.Duration { 2064 var ( 2065 rtt = time.Duration(atomic.LoadUint64(&d.rttEstimate)) 2066 conf = float64(atomic.LoadUint64(&d.rttConfidence)) / 1000000.0 2067 ) 2068 ttl := time.Duration(ttlScaling) * time.Duration(float64(rtt)/conf) 2069 if ttl > ttlLimit { 2070 ttl = ttlLimit 2071 } 2072 return ttl 2073 }