github.com/ylsgit/go-ethereum@v1.6.5/eth/downloader/downloader.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package downloader contains the manual full chain synchronisation. 18 package downloader 19 20 import ( 21 "crypto/rand" 22 "errors" 23 "fmt" 24 "math" 25 "math/big" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 ethereum "github.com/ethereum/go-ethereum" 31 "github.com/ethereum/go-ethereum/common" 32 "github.com/ethereum/go-ethereum/core/types" 33 "github.com/ethereum/go-ethereum/ethdb" 34 "github.com/ethereum/go-ethereum/event" 35 "github.com/ethereum/go-ethereum/log" 36 "github.com/ethereum/go-ethereum/params" 37 "github.com/ethereum/go-ethereum/trie" 38 "github.com/rcrowley/go-metrics" 39 ) 40 41 var ( 42 MaxHashFetch = 512 // Amount of hashes to be fetched per retrieval request 43 MaxBlockFetch = 128 // Amount of blocks to be fetched per retrieval request 44 MaxHeaderFetch = 192 // Amount of block headers to be fetched per retrieval request 45 MaxSkeletonSize = 128 // Number of header fetches to need for a skeleton assembly 46 MaxBodyFetch = 128 // Amount of block bodies to be fetched per retrieval request 47 MaxReceiptFetch = 256 // Amount of transaction receipts to allow fetching per request 48 MaxStateFetch = 384 // Amount of node state values to allow fetching per request 49 50 MaxForkAncestry = 3 * params.EpochDuration // Maximum chain reorganisation 51 rttMinEstimate = 2 * time.Second // Minimum round-trip time to target for download requests 52 rttMaxEstimate = 20 * time.Second // Maximum rount-trip time to target for download requests 53 rttMinConfidence = 0.1 // Worse confidence factor in our estimated RTT value 54 ttlScaling = 3 // Constant scaling factor for RTT -> TTL conversion 55 ttlLimit = time.Minute // Maximum TTL allowance to prevent reaching crazy timeouts 56 57 qosTuningPeers = 5 // Number of peers to tune based on (best peers) 58 qosConfidenceCap = 10 // Number of peers above which not to modify RTT confidence 59 qosTuningImpact = 0.25 // Impact that a new tuning target has on the previous value 60 61 maxQueuedHeaders = 32 * 1024 // [eth/62] Maximum number of headers to queue for import (DOS protection) 62 maxHeadersProcess = 2048 // Number of header download results to import at once into the chain 63 maxResultsProcess = 2048 // Number of content download results to import at once into the chain 64 65 fsHeaderCheckFrequency = 100 // Verification frequency of the downloaded headers during fast sync 66 fsHeaderSafetyNet = 2048 // Number of headers to discard in case a chain violation is detected 67 fsHeaderForceVerify = 24 // Number of headers to verify before and after the pivot to accept it 68 fsPivotInterval = 256 // Number of headers out of which to randomize the pivot point 69 fsMinFullBlocks = 64 // Number of blocks to retrieve fully even in fast sync 70 fsCriticalTrials = uint32(32) // Number of times to retry in the cricical section before bailing 71 ) 72 73 var ( 74 errBusy = errors.New("busy") 75 errUnknownPeer = errors.New("peer is unknown or unhealthy") 76 errBadPeer = errors.New("action from bad peer ignored") 77 errStallingPeer = errors.New("peer is stalling") 78 errNoPeers = errors.New("no peers to keep download active") 79 errTimeout = errors.New("timeout") 80 errEmptyHeaderSet = errors.New("empty header set by peer") 81 errPeersUnavailable = errors.New("no peers available or all tried for download") 82 errInvalidAncestor = errors.New("retrieved ancestor is invalid") 83 errInvalidChain = errors.New("retrieved hash chain is invalid") 84 errInvalidBlock = errors.New("retrieved block is invalid") 85 errInvalidBody = errors.New("retrieved block body is invalid") 86 errInvalidReceipt = errors.New("retrieved receipt is invalid") 87 errCancelBlockFetch = errors.New("block download canceled (requested)") 88 errCancelHeaderFetch = errors.New("block header download canceled (requested)") 89 errCancelBodyFetch = errors.New("block body download canceled (requested)") 90 errCancelReceiptFetch = errors.New("receipt download canceled (requested)") 91 errCancelStateFetch = errors.New("state data download canceled (requested)") 92 errCancelHeaderProcessing = errors.New("header processing canceled (requested)") 93 errCancelContentProcessing = errors.New("content processing canceled (requested)") 94 errNoSyncActive = errors.New("no sync active") 95 errTooOld = errors.New("peer doesn't speak recent enough protocol version (need version >= 62)") 96 ) 97 98 type Downloader struct { 99 mode SyncMode // Synchronisation mode defining the strategy used (per sync cycle) 100 mux *event.TypeMux // Event multiplexer to announce sync operation events 101 102 queue *queue // Scheduler for selecting the hashes to download 103 peers *peerSet // Set of active peers from which download can proceed 104 105 fsPivotLock *types.Header // Pivot header on critical section entry (cannot change between retries) 106 fsPivotFails uint32 // Number of subsequent fast sync failures in the critical section 107 108 rttEstimate uint64 // Round trip time to target for download requests 109 rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops) 110 111 // Statistics 112 syncStatsChainOrigin uint64 // Origin block number where syncing started at 113 syncStatsChainHeight uint64 // Highest block number known when syncing started 114 syncStatsStateDone uint64 // Number of state trie entries already pulled 115 syncStatsLock sync.RWMutex // Lock protecting the sync stats fields 116 117 // Callbacks 118 hasHeader headerCheckFn // Checks if a header is present in the chain 119 hasBlockAndState blockAndStateCheckFn // Checks if a block and associated state is present in the chain 120 getHeader headerRetrievalFn // Retrieves a header from the chain 121 getBlock blockRetrievalFn // Retrieves a block from the chain 122 headHeader headHeaderRetrievalFn // Retrieves the head header from the chain 123 headBlock headBlockRetrievalFn // Retrieves the head block from the chain 124 headFastBlock headFastBlockRetrievalFn // Retrieves the head fast-sync block from the chain 125 commitHeadBlock headBlockCommitterFn // Commits a manually assembled block as the chain head 126 getTd tdRetrievalFn // Retrieves the TD of a block from the chain 127 insertHeaders headerChainInsertFn // Injects a batch of headers into the chain 128 insertBlocks blockChainInsertFn // Injects a batch of blocks into the chain 129 insertReceipts receiptChainInsertFn // Injects a batch of blocks and their receipts into the chain 130 rollback chainRollbackFn // Removes a batch of recently added chain links 131 dropPeer peerDropFn // Drops a peer for misbehaving 132 133 // Status 134 synchroniseMock func(id string, hash common.Hash) error // Replacement for synchronise during testing 135 synchronising int32 136 notified int32 137 138 // Channels 139 newPeerCh chan *peer 140 headerCh chan dataPack // [eth/62] Channel receiving inbound block headers 141 bodyCh chan dataPack // [eth/62] Channel receiving inbound block bodies 142 receiptCh chan dataPack // [eth/63] Channel receiving inbound receipts 143 stateCh chan dataPack // [eth/63] Channel receiving inbound node state data 144 bodyWakeCh chan bool // [eth/62] Channel to signal the block body fetcher of new tasks 145 receiptWakeCh chan bool // [eth/63] Channel to signal the receipt fetcher of new tasks 146 stateWakeCh chan bool // [eth/63] Channel to signal the state fetcher of new tasks 147 headerProcCh chan []*types.Header // [eth/62] Channel to feed the header processor new tasks 148 149 // Cancellation and termination 150 cancelPeer string // Identifier of the peer currently being used as the master (cancel on drop) 151 cancelCh chan struct{} // Channel to cancel mid-flight syncs 152 cancelLock sync.RWMutex // Lock to protect the cancel channel and peer in delivers 153 154 quitCh chan struct{} // Quit channel to signal termination 155 quitLock sync.RWMutex // Lock to prevent double closes 156 157 // Testing hooks 158 syncInitHook func(uint64, uint64) // Method to call upon initiating a new sync run 159 bodyFetchHook func([]*types.Header) // Method to call upon starting a block body fetch 160 receiptFetchHook func([]*types.Header) // Method to call upon starting a receipt fetch 161 chainInsertHook func([]*fetchResult) // Method to call upon inserting a chain of blocks (possibly in multiple invocations) 162 } 163 164 // New creates a new downloader to fetch hashes and blocks from remote peers. 165 func New(mode SyncMode, stateDb ethdb.Database, mux *event.TypeMux, hasHeader headerCheckFn, hasBlockAndState blockAndStateCheckFn, 166 getHeader headerRetrievalFn, getBlock blockRetrievalFn, headHeader headHeaderRetrievalFn, headBlock headBlockRetrievalFn, 167 headFastBlock headFastBlockRetrievalFn, commitHeadBlock headBlockCommitterFn, getTd tdRetrievalFn, insertHeaders headerChainInsertFn, 168 insertBlocks blockChainInsertFn, insertReceipts receiptChainInsertFn, rollback chainRollbackFn, dropPeer peerDropFn) *Downloader { 169 170 dl := &Downloader{ 171 mode: mode, 172 mux: mux, 173 queue: newQueue(stateDb), 174 peers: newPeerSet(), 175 rttEstimate: uint64(rttMaxEstimate), 176 rttConfidence: uint64(1000000), 177 hasHeader: hasHeader, 178 hasBlockAndState: hasBlockAndState, 179 getHeader: getHeader, 180 getBlock: getBlock, 181 headHeader: headHeader, 182 headBlock: headBlock, 183 headFastBlock: headFastBlock, 184 commitHeadBlock: commitHeadBlock, 185 getTd: getTd, 186 insertHeaders: insertHeaders, 187 insertBlocks: insertBlocks, 188 insertReceipts: insertReceipts, 189 rollback: rollback, 190 dropPeer: dropPeer, 191 newPeerCh: make(chan *peer, 1), 192 headerCh: make(chan dataPack, 1), 193 bodyCh: make(chan dataPack, 1), 194 receiptCh: make(chan dataPack, 1), 195 stateCh: make(chan dataPack, 1), 196 bodyWakeCh: make(chan bool, 1), 197 receiptWakeCh: make(chan bool, 1), 198 stateWakeCh: make(chan bool, 1), 199 headerProcCh: make(chan []*types.Header, 1), 200 quitCh: make(chan struct{}), 201 } 202 go dl.qosTuner() 203 return dl 204 } 205 206 // Progress retrieves the synchronisation boundaries, specifically the origin 207 // block where synchronisation started at (may have failed/suspended); the block 208 // or header sync is currently at; and the latest known block which the sync targets. 209 // 210 // In addition, during the state download phase of fast synchronisation the number 211 // of processed and the total number of known states are also returned. Otherwise 212 // these are zero. 213 func (d *Downloader) Progress() ethereum.SyncProgress { 214 // Fetch the pending state count outside of the lock to prevent unforeseen deadlocks 215 pendingStates := uint64(d.queue.PendingNodeData()) 216 217 // Lock the current stats and return the progress 218 d.syncStatsLock.RLock() 219 defer d.syncStatsLock.RUnlock() 220 221 current := uint64(0) 222 switch d.mode { 223 case FullSync: 224 current = d.headBlock().NumberU64() 225 case FastSync: 226 current = d.headFastBlock().NumberU64() 227 case LightSync: 228 current = d.headHeader().Number.Uint64() 229 } 230 return ethereum.SyncProgress{ 231 StartingBlock: d.syncStatsChainOrigin, 232 CurrentBlock: current, 233 HighestBlock: d.syncStatsChainHeight, 234 PulledStates: d.syncStatsStateDone, 235 KnownStates: d.syncStatsStateDone + pendingStates, 236 } 237 } 238 239 // Synchronising returns whether the downloader is currently retrieving blocks. 240 func (d *Downloader) Synchronising() bool { 241 return atomic.LoadInt32(&d.synchronising) > 0 242 } 243 244 // RegisterPeer injects a new download peer into the set of block source to be 245 // used for fetching hashes and blocks from. 246 func (d *Downloader) RegisterPeer(id string, version int, currentHead currentHeadRetrievalFn, 247 getRelHeaders relativeHeaderFetcherFn, getAbsHeaders absoluteHeaderFetcherFn, getBlockBodies blockBodyFetcherFn, 248 getReceipts receiptFetcherFn, getNodeData stateFetcherFn) error { 249 250 logger := log.New("peer", id) 251 logger.Trace("Registering sync peer") 252 if err := d.peers.Register(newPeer(id, version, currentHead, getRelHeaders, getAbsHeaders, getBlockBodies, getReceipts, getNodeData, logger)); err != nil { 253 logger.Error("Failed to register sync peer", "err", err) 254 return err 255 } 256 d.qosReduceConfidence() 257 258 return nil 259 } 260 261 // UnregisterPeer remove a peer from the known list, preventing any action from 262 // the specified peer. An effort is also made to return any pending fetches into 263 // the queue. 264 func (d *Downloader) UnregisterPeer(id string) error { 265 // Unregister the peer from the active peer set and revoke any fetch tasks 266 logger := log.New("peer", id) 267 logger.Trace("Unregistering sync peer") 268 if err := d.peers.Unregister(id); err != nil { 269 logger.Error("Failed to unregister sync peer", "err", err) 270 return err 271 } 272 d.queue.Revoke(id) 273 274 // If this peer was the master peer, abort sync immediately 275 d.cancelLock.RLock() 276 master := id == d.cancelPeer 277 d.cancelLock.RUnlock() 278 279 if master { 280 d.Cancel() 281 } 282 return nil 283 } 284 285 // Synchronise tries to sync up our local block chain with a remote peer, both 286 // adding various sanity checks as well as wrapping it with various log entries. 287 func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int, mode SyncMode) error { 288 err := d.synchronise(id, head, td, mode) 289 switch err { 290 case nil: 291 case errBusy: 292 293 case errTimeout, errBadPeer, errStallingPeer, 294 errEmptyHeaderSet, errPeersUnavailable, errTooOld, 295 errInvalidAncestor, errInvalidChain: 296 log.Warn("Synchronisation failed, dropping peer", "peer", id, "err", err) 297 d.dropPeer(id) 298 299 default: 300 log.Warn("Synchronisation failed, retrying", "err", err) 301 } 302 return err 303 } 304 305 // synchronise will select the peer and use it for synchronising. If an empty string is given 306 // it will use the best peer possible and synchronize if it's TD is higher than our own. If any of the 307 // checks fail an error will be returned. This method is synchronous 308 func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int, mode SyncMode) error { 309 // Mock out the synchronisation if testing 310 if d.synchroniseMock != nil { 311 return d.synchroniseMock(id, hash) 312 } 313 // Make sure only one goroutine is ever allowed past this point at once 314 if !atomic.CompareAndSwapInt32(&d.synchronising, 0, 1) { 315 return errBusy 316 } 317 defer atomic.StoreInt32(&d.synchronising, 0) 318 319 // Post a user notification of the sync (only once per session) 320 if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { 321 log.Info("Block synchronisation started") 322 } 323 // Reset the queue, peer set and wake channels to clean any internal leftover state 324 d.queue.Reset() 325 d.peers.Reset() 326 327 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stateWakeCh} { 328 select { 329 case <-ch: 330 default: 331 } 332 } 333 for _, ch := range []chan dataPack{d.headerCh, d.bodyCh, d.receiptCh, d.stateCh} { 334 for empty := false; !empty; { 335 select { 336 case <-ch: 337 default: 338 empty = true 339 } 340 } 341 } 342 for empty := false; !empty; { 343 select { 344 case <-d.headerProcCh: 345 default: 346 empty = true 347 } 348 } 349 // Create cancel channel for aborting mid-flight and mark the master peer 350 d.cancelLock.Lock() 351 d.cancelCh = make(chan struct{}) 352 d.cancelPeer = id 353 d.cancelLock.Unlock() 354 355 defer d.Cancel() // No matter what, we can't leave the cancel channel open 356 357 // Set the requested sync mode, unless it's forbidden 358 d.mode = mode 359 if d.mode == FastSync && atomic.LoadUint32(&d.fsPivotFails) >= fsCriticalTrials { 360 d.mode = FullSync 361 } 362 // Retrieve the origin peer and initiate the downloading process 363 p := d.peers.Peer(id) 364 if p == nil { 365 return errUnknownPeer 366 } 367 return d.syncWithPeer(p, hash, td) 368 } 369 370 // syncWithPeer starts a block synchronization based on the hash chain from the 371 // specified peer and head hash. 372 func (d *Downloader) syncWithPeer(p *peer, hash common.Hash, td *big.Int) (err error) { 373 d.mux.Post(StartEvent{}) 374 defer func() { 375 // reset on error 376 if err != nil { 377 d.mux.Post(FailedEvent{err}) 378 } else { 379 d.mux.Post(DoneEvent{}) 380 } 381 }() 382 if p.version < 62 { 383 return errTooOld 384 } 385 386 log.Debug("Synchronising with the network", "peer", p.id, "eth", p.version, "head", hash, "td", td, "mode", d.mode) 387 defer func(start time.Time) { 388 log.Debug("Synchronisation terminated", "elapsed", time.Since(start)) 389 }(time.Now()) 390 391 // Look up the sync boundaries: the common ancestor and the target block 392 latest, err := d.fetchHeight(p) 393 if err != nil { 394 return err 395 } 396 height := latest.Number.Uint64() 397 398 origin, err := d.findAncestor(p, height) 399 if err != nil { 400 return err 401 } 402 d.syncStatsLock.Lock() 403 if d.syncStatsChainHeight <= origin || d.syncStatsChainOrigin > origin { 404 d.syncStatsChainOrigin = origin 405 } 406 d.syncStatsChainHeight = height 407 d.syncStatsLock.Unlock() 408 409 // Initiate the sync using a concurrent header and content retrieval algorithm 410 pivot := uint64(0) 411 switch d.mode { 412 case LightSync: 413 pivot = height 414 case FastSync: 415 // Calculate the new fast/slow sync pivot point 416 if d.fsPivotLock == nil { 417 pivotOffset, err := rand.Int(rand.Reader, big.NewInt(int64(fsPivotInterval))) 418 if err != nil { 419 panic(fmt.Sprintf("Failed to access crypto random source: %v", err)) 420 } 421 if height > uint64(fsMinFullBlocks)+pivotOffset.Uint64() { 422 pivot = height - uint64(fsMinFullBlocks) - pivotOffset.Uint64() 423 } 424 } else { 425 // Pivot point locked in, use this and do not pick a new one! 426 pivot = d.fsPivotLock.Number.Uint64() 427 } 428 // If the point is below the origin, move origin back to ensure state download 429 if pivot < origin { 430 if pivot > 0 { 431 origin = pivot - 1 432 } else { 433 origin = 0 434 } 435 } 436 log.Debug("Fast syncing until pivot block", "pivot", pivot) 437 } 438 d.queue.Prepare(origin+1, d.mode, pivot, latest) 439 if d.syncInitHook != nil { 440 d.syncInitHook(origin, height) 441 } 442 return d.spawnSync(origin+1, 443 func() error { return d.fetchHeaders(p, origin+1) }, // Headers are always retrieved 444 func() error { return d.processHeaders(origin+1, td) }, // Headers are always retrieved 445 func() error { return d.fetchBodies(origin + 1) }, // Bodies are retrieved during normal and fast sync 446 func() error { return d.fetchReceipts(origin + 1) }, // Receipts are retrieved during fast sync 447 func() error { return d.fetchNodeData() }, // Node state data is retrieved during fast sync 448 ) 449 } 450 451 // spawnSync runs d.process and all given fetcher functions to completion in 452 // separate goroutines, returning the first error that appears. 453 func (d *Downloader) spawnSync(origin uint64, fetchers ...func() error) error { 454 var wg sync.WaitGroup 455 errc := make(chan error, len(fetchers)+1) 456 wg.Add(len(fetchers) + 1) 457 go func() { defer wg.Done(); errc <- d.processContent() }() 458 for _, fn := range fetchers { 459 fn := fn 460 go func() { defer wg.Done(); errc <- fn() }() 461 } 462 // Wait for the first error, then terminate the others. 463 var err error 464 for i := 0; i < len(fetchers)+1; i++ { 465 if i == len(fetchers) { 466 // Close the queue when all fetchers have exited. 467 // This will cause the block processor to end when 468 // it has processed the queue. 469 d.queue.Close() 470 } 471 if err = <-errc; err != nil { 472 break 473 } 474 } 475 d.queue.Close() 476 d.Cancel() 477 wg.Wait() 478 479 // If sync failed in the critical section, bump the fail counter 480 if err != nil && d.mode == FastSync && d.fsPivotLock != nil { 481 atomic.AddUint32(&d.fsPivotFails, 1) 482 } 483 return err 484 } 485 486 // Cancel cancels all of the operations and resets the queue. It returns true 487 // if the cancel operation was completed. 488 func (d *Downloader) Cancel() { 489 // Close the current cancel channel 490 d.cancelLock.Lock() 491 if d.cancelCh != nil { 492 select { 493 case <-d.cancelCh: 494 // Channel was already closed 495 default: 496 close(d.cancelCh) 497 } 498 } 499 d.cancelLock.Unlock() 500 } 501 502 // Terminate interrupts the downloader, canceling all pending operations. 503 // The downloader cannot be reused after calling Terminate. 504 func (d *Downloader) Terminate() { 505 // Close the termination channel (make sure double close is allowed) 506 d.quitLock.Lock() 507 select { 508 case <-d.quitCh: 509 default: 510 close(d.quitCh) 511 } 512 d.quitLock.Unlock() 513 514 // Cancel any pending download requests 515 d.Cancel() 516 } 517 518 // fetchHeight retrieves the head header of the remote peer to aid in estimating 519 // the total time a pending synchronisation would take. 520 func (d *Downloader) fetchHeight(p *peer) (*types.Header, error) { 521 p.log.Debug("Retrieving remote chain height") 522 523 // Request the advertised remote head block and wait for the response 524 head, _ := p.currentHead() 525 go p.getRelHeaders(head, 1, 0, false) 526 527 ttl := d.requestTTL() 528 timeout := time.After(ttl) 529 for { 530 select { 531 case <-d.cancelCh: 532 return nil, errCancelBlockFetch 533 534 case packet := <-d.headerCh: 535 // Discard anything not from the origin peer 536 if packet.PeerId() != p.id { 537 log.Debug("Received headers from incorrect peer", "peer", packet.PeerId()) 538 break 539 } 540 // Make sure the peer actually gave something valid 541 headers := packet.(*headerPack).headers 542 if len(headers) != 1 { 543 p.log.Debug("Multiple headers for single request", "headers", len(headers)) 544 return nil, errBadPeer 545 } 546 head := headers[0] 547 p.log.Debug("Remote head header identified", "number", head.Number, "hash", head.Hash()) 548 return head, nil 549 550 case <-timeout: 551 p.log.Debug("Waiting for head header timed out", "elapsed", ttl) 552 return nil, errTimeout 553 554 case <-d.bodyCh: 555 case <-d.stateCh: 556 case <-d.receiptCh: 557 // Out of bounds delivery, ignore 558 } 559 } 560 } 561 562 // findAncestor tries to locate the common ancestor link of the local chain and 563 // a remote peers blockchain. In the general case when our node was in sync and 564 // on the correct chain, checking the top N links should already get us a match. 565 // In the rare scenario when we ended up on a long reorganisation (i.e. none of 566 // the head links match), we do a binary search to find the common ancestor. 567 func (d *Downloader) findAncestor(p *peer, height uint64) (uint64, error) { 568 // Figure out the valid ancestor range to prevent rewrite attacks 569 floor, ceil := int64(-1), d.headHeader().Number.Uint64() 570 571 p.log.Debug("Looking for common ancestor", "local", ceil, "remote", height) 572 if d.mode == FullSync { 573 ceil = d.headBlock().NumberU64() 574 } else if d.mode == FastSync { 575 ceil = d.headFastBlock().NumberU64() 576 } 577 if ceil >= MaxForkAncestry { 578 floor = int64(ceil - MaxForkAncestry) 579 } 580 // Request the topmost blocks to short circuit binary ancestor lookup 581 head := ceil 582 if head > height { 583 head = height 584 } 585 from := int64(head) - int64(MaxHeaderFetch) 586 if from < 0 { 587 from = 0 588 } 589 // Span out with 15 block gaps into the future to catch bad head reports 590 limit := 2 * MaxHeaderFetch / 16 591 count := 1 + int((int64(ceil)-from)/16) 592 if count > limit { 593 count = limit 594 } 595 go p.getAbsHeaders(uint64(from), count, 15, false) 596 597 // Wait for the remote response to the head fetch 598 number, hash := uint64(0), common.Hash{} 599 600 ttl := d.requestTTL() 601 timeout := time.After(ttl) 602 603 for finished := false; !finished; { 604 select { 605 case <-d.cancelCh: 606 return 0, errCancelHeaderFetch 607 608 case packet := <-d.headerCh: 609 // Discard anything not from the origin peer 610 if packet.PeerId() != p.id { 611 log.Debug("Received headers from incorrect peer", "peer", packet.PeerId()) 612 break 613 } 614 // Make sure the peer actually gave something valid 615 headers := packet.(*headerPack).headers 616 if len(headers) == 0 { 617 p.log.Warn("Empty head header set") 618 return 0, errEmptyHeaderSet 619 } 620 // Make sure the peer's reply conforms to the request 621 for i := 0; i < len(headers); i++ { 622 if number := headers[i].Number.Int64(); number != from+int64(i)*16 { 623 p.log.Warn("Head headers broke chain ordering", "index", i, "requested", from+int64(i)*16, "received", number) 624 return 0, errInvalidChain 625 } 626 } 627 // Check if a common ancestor was found 628 finished = true 629 for i := len(headers) - 1; i >= 0; i-- { 630 // Skip any headers that underflow/overflow our requested set 631 if headers[i].Number.Int64() < from || headers[i].Number.Uint64() > ceil { 632 continue 633 } 634 // Otherwise check if we already know the header or not 635 if (d.mode == FullSync && d.hasBlockAndState(headers[i].Hash())) || (d.mode != FullSync && d.hasHeader(headers[i].Hash())) { 636 number, hash = headers[i].Number.Uint64(), headers[i].Hash() 637 638 // If every header is known, even future ones, the peer straight out lied about its head 639 if number > height && i == limit-1 { 640 p.log.Warn("Lied about chain head", "reported", height, "found", number) 641 return 0, errStallingPeer 642 } 643 break 644 } 645 } 646 647 case <-timeout: 648 p.log.Debug("Waiting for head header timed out", "elapsed", ttl) 649 return 0, errTimeout 650 651 case <-d.bodyCh: 652 case <-d.stateCh: 653 case <-d.receiptCh: 654 // Out of bounds delivery, ignore 655 } 656 } 657 // If the head fetch already found an ancestor, return 658 if !common.EmptyHash(hash) { 659 if int64(number) <= floor { 660 p.log.Warn("Ancestor below allowance", "number", number, "hash", hash, "allowance", floor) 661 return 0, errInvalidAncestor 662 } 663 p.log.Debug("Found common ancestor", "number", number, "hash", hash) 664 return number, nil 665 } 666 // Ancestor not found, we need to binary search over our chain 667 start, end := uint64(0), head 668 if floor > 0 { 669 start = uint64(floor) 670 } 671 for start+1 < end { 672 // Split our chain interval in two, and request the hash to cross check 673 check := (start + end) / 2 674 675 ttl := d.requestTTL() 676 timeout := time.After(ttl) 677 678 go p.getAbsHeaders(uint64(check), 1, 0, false) 679 680 // Wait until a reply arrives to this request 681 for arrived := false; !arrived; { 682 select { 683 case <-d.cancelCh: 684 return 0, errCancelHeaderFetch 685 686 case packer := <-d.headerCh: 687 // Discard anything not from the origin peer 688 if packer.PeerId() != p.id { 689 log.Debug("Received headers from incorrect peer", "peer", packer.PeerId()) 690 break 691 } 692 // Make sure the peer actually gave something valid 693 headers := packer.(*headerPack).headers 694 if len(headers) != 1 { 695 p.log.Debug("Multiple headers for single request", "headers", len(headers)) 696 return 0, errBadPeer 697 } 698 arrived = true 699 700 // Modify the search interval based on the response 701 if (d.mode == FullSync && !d.hasBlockAndState(headers[0].Hash())) || (d.mode != FullSync && !d.hasHeader(headers[0].Hash())) { 702 end = check 703 break 704 } 705 header := d.getHeader(headers[0].Hash()) // Independent of sync mode, header surely exists 706 if header.Number.Uint64() != check { 707 p.log.Debug("Received non requested header", "number", header.Number, "hash", header.Hash(), "request", check) 708 return 0, errBadPeer 709 } 710 start = check 711 712 case <-timeout: 713 p.log.Debug("Waiting for search header timed out", "elapsed", ttl) 714 return 0, errTimeout 715 716 case <-d.bodyCh: 717 case <-d.stateCh: 718 case <-d.receiptCh: 719 // Out of bounds delivery, ignore 720 } 721 } 722 } 723 // Ensure valid ancestry and return 724 if int64(start) <= floor { 725 p.log.Warn("Ancestor below allowance", "number", start, "hash", hash, "allowance", floor) 726 return 0, errInvalidAncestor 727 } 728 p.log.Debug("Found common ancestor", "number", start, "hash", hash) 729 return start, nil 730 } 731 732 // fetchHeaders keeps retrieving headers concurrently from the number 733 // requested, until no more are returned, potentially throttling on the way. To 734 // facilitate concurrency but still protect against malicious nodes sending bad 735 // headers, we construct a header chain skeleton using the "origin" peer we are 736 // syncing with, and fill in the missing headers using anyone else. Headers from 737 // other peers are only accepted if they map cleanly to the skeleton. If no one 738 // can fill in the skeleton - not even the origin peer - it's assumed invalid and 739 // the origin is dropped. 740 func (d *Downloader) fetchHeaders(p *peer, from uint64) error { 741 p.log.Debug("Directing header downloads", "origin", from) 742 defer p.log.Debug("Header download terminated") 743 744 // Create a timeout timer, and the associated header fetcher 745 skeleton := true // Skeleton assembly phase or finishing up 746 request := time.Now() // time of the last skeleton fetch request 747 timeout := time.NewTimer(0) // timer to dump a non-responsive active peer 748 <-timeout.C // timeout channel should be initially empty 749 defer timeout.Stop() 750 751 var ttl time.Duration 752 getHeaders := func(from uint64) { 753 request = time.Now() 754 755 ttl = d.requestTTL() 756 timeout.Reset(ttl) 757 758 if skeleton { 759 p.log.Trace("Fetching skeleton headers", "count", MaxHeaderFetch, "from", from) 760 go p.getAbsHeaders(from+uint64(MaxHeaderFetch)-1, MaxSkeletonSize, MaxHeaderFetch-1, false) 761 } else { 762 p.log.Trace("Fetching full headers", "count", MaxHeaderFetch, "from", from) 763 go p.getAbsHeaders(from, MaxHeaderFetch, 0, false) 764 } 765 } 766 // Start pulling the header chain skeleton until all is done 767 getHeaders(from) 768 769 for { 770 select { 771 case <-d.cancelCh: 772 return errCancelHeaderFetch 773 774 case packet := <-d.headerCh: 775 // Make sure the active peer is giving us the skeleton headers 776 if packet.PeerId() != p.id { 777 log.Debug("Received skeleton from incorrect peer", "peer", packet.PeerId()) 778 break 779 } 780 headerReqTimer.UpdateSince(request) 781 timeout.Stop() 782 783 // If the skeleton's finished, pull any remaining head headers directly from the origin 784 if packet.Items() == 0 && skeleton { 785 skeleton = false 786 getHeaders(from) 787 continue 788 } 789 // If no more headers are inbound, notify the content fetchers and return 790 if packet.Items() == 0 { 791 p.log.Debug("No more headers available") 792 select { 793 case d.headerProcCh <- nil: 794 return nil 795 case <-d.cancelCh: 796 return errCancelHeaderFetch 797 } 798 } 799 headers := packet.(*headerPack).headers 800 801 // If we received a skeleton batch, resolve internals concurrently 802 if skeleton { 803 filled, proced, err := d.fillHeaderSkeleton(from, headers) 804 if err != nil { 805 p.log.Debug("Skeleton chain invalid", "err", err) 806 return errInvalidChain 807 } 808 headers = filled[proced:] 809 from += uint64(proced) 810 } 811 // Insert all the new headers and fetch the next batch 812 if len(headers) > 0 { 813 p.log.Trace("Scheduling new headers", "count", len(headers), "from", from) 814 select { 815 case d.headerProcCh <- headers: 816 case <-d.cancelCh: 817 return errCancelHeaderFetch 818 } 819 from += uint64(len(headers)) 820 } 821 getHeaders(from) 822 823 case <-timeout.C: 824 // Header retrieval timed out, consider the peer bad and drop 825 p.log.Debug("Header request timed out", "elapsed", ttl) 826 headerTimeoutMeter.Mark(1) 827 d.dropPeer(p.id) 828 829 // Finish the sync gracefully instead of dumping the gathered data though 830 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stateWakeCh} { 831 select { 832 case ch <- false: 833 case <-d.cancelCh: 834 } 835 } 836 select { 837 case d.headerProcCh <- nil: 838 case <-d.cancelCh: 839 } 840 return errBadPeer 841 } 842 } 843 } 844 845 // fillHeaderSkeleton concurrently retrieves headers from all our available peers 846 // and maps them to the provided skeleton header chain. 847 // 848 // Any partial results from the beginning of the skeleton is (if possible) forwarded 849 // immediately to the header processor to keep the rest of the pipeline full even 850 // in the case of header stalls. 851 // 852 // The method returs the entire filled skeleton and also the number of headers 853 // already forwarded for processing. 854 func (d *Downloader) fillHeaderSkeleton(from uint64, skeleton []*types.Header) ([]*types.Header, int, error) { 855 log.Debug("Filling up skeleton", "from", from) 856 d.queue.ScheduleSkeleton(from, skeleton) 857 858 var ( 859 deliver = func(packet dataPack) (int, error) { 860 pack := packet.(*headerPack) 861 return d.queue.DeliverHeaders(pack.peerId, pack.headers, d.headerProcCh) 862 } 863 expire = func() map[string]int { return d.queue.ExpireHeaders(d.requestTTL()) } 864 throttle = func() bool { return false } 865 reserve = func(p *peer, count int) (*fetchRequest, bool, error) { 866 return d.queue.ReserveHeaders(p, count), false, nil 867 } 868 fetch = func(p *peer, req *fetchRequest) error { return p.FetchHeaders(req.From, MaxHeaderFetch) } 869 capacity = func(p *peer) int { return p.HeaderCapacity(d.requestRTT()) } 870 setIdle = func(p *peer, accepted int) { p.SetHeadersIdle(accepted) } 871 ) 872 err := d.fetchParts(errCancelHeaderFetch, d.headerCh, deliver, d.queue.headerContCh, expire, 873 d.queue.PendingHeaders, d.queue.InFlightHeaders, throttle, reserve, 874 nil, fetch, d.queue.CancelHeaders, capacity, d.peers.HeaderIdlePeers, setIdle, "headers") 875 876 log.Debug("Skeleton fill terminated", "err", err) 877 878 filled, proced := d.queue.RetrieveHeaders() 879 return filled, proced, err 880 } 881 882 // fetchBodies iteratively downloads the scheduled block bodies, taking any 883 // available peers, reserving a chunk of blocks for each, waiting for delivery 884 // and also periodically checking for timeouts. 885 func (d *Downloader) fetchBodies(from uint64) error { 886 log.Debug("Downloading block bodies", "origin", from) 887 888 var ( 889 deliver = func(packet dataPack) (int, error) { 890 pack := packet.(*bodyPack) 891 return d.queue.DeliverBodies(pack.peerId, pack.transactions, pack.uncles) 892 } 893 expire = func() map[string]int { return d.queue.ExpireBodies(d.requestTTL()) } 894 fetch = func(p *peer, req *fetchRequest) error { return p.FetchBodies(req) } 895 capacity = func(p *peer) int { return p.BlockCapacity(d.requestRTT()) } 896 setIdle = func(p *peer, accepted int) { p.SetBodiesIdle(accepted) } 897 ) 898 err := d.fetchParts(errCancelBodyFetch, d.bodyCh, deliver, d.bodyWakeCh, expire, 899 d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ShouldThrottleBlocks, d.queue.ReserveBodies, 900 d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, d.peers.BodyIdlePeers, setIdle, "bodies") 901 902 log.Debug("Block body download terminated", "err", err) 903 return err 904 } 905 906 // fetchReceipts iteratively downloads the scheduled block receipts, taking any 907 // available peers, reserving a chunk of receipts for each, waiting for delivery 908 // and also periodically checking for timeouts. 909 func (d *Downloader) fetchReceipts(from uint64) error { 910 log.Debug("Downloading transaction receipts", "origin", from) 911 912 var ( 913 deliver = func(packet dataPack) (int, error) { 914 pack := packet.(*receiptPack) 915 return d.queue.DeliverReceipts(pack.peerId, pack.receipts) 916 } 917 expire = func() map[string]int { return d.queue.ExpireReceipts(d.requestTTL()) } 918 fetch = func(p *peer, req *fetchRequest) error { return p.FetchReceipts(req) } 919 capacity = func(p *peer) int { return p.ReceiptCapacity(d.requestRTT()) } 920 setIdle = func(p *peer, accepted int) { p.SetReceiptsIdle(accepted) } 921 ) 922 err := d.fetchParts(errCancelReceiptFetch, d.receiptCh, deliver, d.receiptWakeCh, expire, 923 d.queue.PendingReceipts, d.queue.InFlightReceipts, d.queue.ShouldThrottleReceipts, d.queue.ReserveReceipts, 924 d.receiptFetchHook, fetch, d.queue.CancelReceipts, capacity, d.peers.ReceiptIdlePeers, setIdle, "receipts") 925 926 log.Debug("Transaction receipt download terminated", "err", err) 927 return err 928 } 929 930 // fetchNodeData iteratively downloads the scheduled state trie nodes, taking any 931 // available peers, reserving a chunk of nodes for each, waiting for delivery and 932 // also periodically checking for timeouts. 933 func (d *Downloader) fetchNodeData() error { 934 log.Debug("Downloading node state data") 935 936 var ( 937 deliver = func(packet dataPack) (int, error) { 938 start := time.Now() 939 return d.queue.DeliverNodeData(packet.PeerId(), packet.(*statePack).states, func(delivered int, progressed bool, err error) { 940 // If the peer returned old-requested data, forgive 941 if err == trie.ErrNotRequested { 942 log.Debug("Forgiving reply to stale state request", "peer", packet.PeerId()) 943 return 944 } 945 if err != nil { 946 // If the node data processing failed, the root hash is very wrong, abort 947 log.Error("State processing failed", "peer", packet.PeerId(), "err", err) 948 d.Cancel() 949 return 950 } 951 // Processing succeeded, notify state fetcher of continuation 952 pending := d.queue.PendingNodeData() 953 if pending > 0 { 954 select { 955 case d.stateWakeCh <- true: 956 default: 957 } 958 } 959 d.syncStatsLock.Lock() 960 d.syncStatsStateDone += uint64(delivered) 961 syncStatsStateDone := d.syncStatsStateDone // Thread safe copy for the log below 962 d.syncStatsLock.Unlock() 963 964 // If real database progress was made, reset any fast-sync pivot failure 965 if progressed && atomic.LoadUint32(&d.fsPivotFails) > 1 { 966 log.Debug("Fast-sync progressed, resetting fail counter", "previous", atomic.LoadUint32(&d.fsPivotFails)) 967 atomic.StoreUint32(&d.fsPivotFails, 1) // Don't ever reset to 0, as that will unlock the pivot block 968 } 969 // Log a message to the user and return 970 if delivered > 0 { 971 log.Info("Imported new state entries", "count", delivered, "elapsed", common.PrettyDuration(time.Since(start)), "processed", syncStatsStateDone, "pending", pending) 972 } 973 }) 974 } 975 expire = func() map[string]int { return d.queue.ExpireNodeData(d.requestTTL()) } 976 throttle = func() bool { return false } 977 reserve = func(p *peer, count int) (*fetchRequest, bool, error) { 978 return d.queue.ReserveNodeData(p, count), false, nil 979 } 980 fetch = func(p *peer, req *fetchRequest) error { return p.FetchNodeData(req) } 981 capacity = func(p *peer) int { return p.NodeDataCapacity(d.requestRTT()) } 982 setIdle = func(p *peer, accepted int) { p.SetNodeDataIdle(accepted) } 983 ) 984 err := d.fetchParts(errCancelStateFetch, d.stateCh, deliver, d.stateWakeCh, expire, 985 d.queue.PendingNodeData, d.queue.InFlightNodeData, throttle, reserve, nil, fetch, 986 d.queue.CancelNodeData, capacity, d.peers.NodeDataIdlePeers, setIdle, "states") 987 988 log.Debug("Node state data download terminated", "err", err) 989 return err 990 } 991 992 // fetchParts iteratively downloads scheduled block parts, taking any available 993 // peers, reserving a chunk of fetch requests for each, waiting for delivery and 994 // also periodically checking for timeouts. 995 // 996 // As the scheduling/timeout logic mostly is the same for all downloaded data 997 // types, this method is used by each for data gathering and is instrumented with 998 // various callbacks to handle the slight differences between processing them. 999 // 1000 // The instrumentation parameters: 1001 // - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) 1002 // - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) 1003 // - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) 1004 // - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) 1005 // - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) 1006 // - pending: task callback for the number of requests still needing download (detect completion/non-completability) 1007 // - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) 1008 // - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) 1009 // - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) 1010 // - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) 1011 // - fetch: network callback to actually send a particular download request to a physical remote peer 1012 // - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) 1013 // - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) 1014 // - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks 1015 // - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) 1016 // - kind: textual label of the type being downloaded to display in log mesages 1017 func (d *Downloader) fetchParts(errCancel error, deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool, 1018 expire func() map[string]int, pending func() int, inFlight func() bool, throttle func() bool, reserve func(*peer, int) (*fetchRequest, bool, error), 1019 fetchHook func([]*types.Header), fetch func(*peer, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peer) int, 1020 idle func() ([]*peer, int), setIdle func(*peer, int), kind string) error { 1021 1022 // Create a ticker to detect expired retrieval tasks 1023 ticker := time.NewTicker(100 * time.Millisecond) 1024 defer ticker.Stop() 1025 1026 update := make(chan struct{}, 1) 1027 1028 // Prepare the queue and fetch block parts until the block header fetcher's done 1029 finished := false 1030 for { 1031 select { 1032 case <-d.cancelCh: 1033 return errCancel 1034 1035 case packet := <-deliveryCh: 1036 // If the peer was previously banned and failed to deliver it's pack 1037 // in a reasonable time frame, ignore it's message. 1038 if peer := d.peers.Peer(packet.PeerId()); peer != nil { 1039 // Deliver the received chunk of data and check chain validity 1040 accepted, err := deliver(packet) 1041 if err == errInvalidChain { 1042 return err 1043 } 1044 // Unless a peer delivered something completely else than requested (usually 1045 // caused by a timed out request which came through in the end), set it to 1046 // idle. If the delivery's stale, the peer should have already been idled. 1047 if err != errStaleDelivery { 1048 setIdle(peer, accepted) 1049 } 1050 // Issue a log to the user to see what's going on 1051 switch { 1052 case err == nil && packet.Items() == 0: 1053 peer.log.Trace("Requested data not delivered", "type", kind) 1054 case err == nil: 1055 peer.log.Trace("Delivered new batch of data", "type", kind, "count", packet.Stats()) 1056 default: 1057 peer.log.Trace("Failed to deliver retrieved data", "type", kind, "err", err) 1058 } 1059 } 1060 // Blocks assembled, try to update the progress 1061 select { 1062 case update <- struct{}{}: 1063 default: 1064 } 1065 1066 case cont := <-wakeCh: 1067 // The header fetcher sent a continuation flag, check if it's done 1068 if !cont { 1069 finished = true 1070 } 1071 // Headers arrive, try to update the progress 1072 select { 1073 case update <- struct{}{}: 1074 default: 1075 } 1076 1077 case <-ticker.C: 1078 // Sanity check update the progress 1079 select { 1080 case update <- struct{}{}: 1081 default: 1082 } 1083 1084 case <-update: 1085 // Short circuit if we lost all our peers 1086 if d.peers.Len() == 0 { 1087 return errNoPeers 1088 } 1089 // Check for fetch request timeouts and demote the responsible peers 1090 for pid, fails := range expire() { 1091 if peer := d.peers.Peer(pid); peer != nil { 1092 // If a lot of retrieval elements expired, we might have overestimated the remote peer or perhaps 1093 // ourselves. Only reset to minimal throughput but don't drop just yet. If even the minimal times 1094 // out that sync wise we need to get rid of the peer. 1095 // 1096 // The reason the minimum threshold is 2 is because the downloader tries to estimate the bandwidth 1097 // and latency of a peer separately, which requires pushing the measures capacity a bit and seeing 1098 // how response times reacts, to it always requests one more than the minimum (i.e. min 2). 1099 if fails > 2 { 1100 peer.log.Trace("Data delivery timed out", "type", kind) 1101 setIdle(peer, 0) 1102 } else { 1103 peer.log.Debug("Stalling delivery, dropping", "type", kind) 1104 d.dropPeer(pid) 1105 } 1106 } 1107 } 1108 // If there's nothing more to fetch, wait or terminate 1109 if pending() == 0 { 1110 if !inFlight() && finished { 1111 log.Debug("Data fetching completed", "type", kind) 1112 return nil 1113 } 1114 break 1115 } 1116 // Send a download request to all idle peers, until throttled 1117 progressed, throttled, running := false, false, inFlight() 1118 idles, total := idle() 1119 1120 for _, peer := range idles { 1121 // Short circuit if throttling activated 1122 if throttle() { 1123 throttled = true 1124 break 1125 } 1126 // Reserve a chunk of fetches for a peer. A nil can mean either that 1127 // no more headers are available, or that the peer is known not to 1128 // have them. 1129 request, progress, err := reserve(peer, capacity(peer)) 1130 if err != nil { 1131 return err 1132 } 1133 if progress { 1134 progressed = true 1135 } 1136 if request == nil { 1137 continue 1138 } 1139 if request.From > 0 { 1140 peer.log.Trace("Requesting new batch of data", "type", kind, "from", request.From) 1141 } else if len(request.Headers) > 0 { 1142 peer.log.Trace("Requesting new batch of data", "type", kind, "count", len(request.Headers), "from", request.Headers[0].Number) 1143 } else { 1144 peer.log.Trace("Requesting new batch of data", "type", kind, "count", len(request.Hashes)) 1145 } 1146 // Fetch the chunk and make sure any errors return the hashes to the queue 1147 if fetchHook != nil { 1148 fetchHook(request.Headers) 1149 } 1150 if err := fetch(peer, request); err != nil { 1151 // Although we could try and make an attempt to fix this, this error really 1152 // means that we've double allocated a fetch task to a peer. If that is the 1153 // case, the internal state of the downloader and the queue is very wrong so 1154 // better hard crash and note the error instead of silently accumulating into 1155 // a much bigger issue. 1156 panic(fmt.Sprintf("%v: %s fetch assignment failed", peer, kind)) 1157 } 1158 running = true 1159 } 1160 // Make sure that we have peers available for fetching. If all peers have been tried 1161 // and all failed throw an error 1162 if !progressed && !throttled && !running && len(idles) == total && pending() > 0 { 1163 return errPeersUnavailable 1164 } 1165 } 1166 } 1167 } 1168 1169 // processHeaders takes batches of retrieved headers from an input channel and 1170 // keeps processing and scheduling them into the header chain and downloader's 1171 // queue until the stream ends or a failure occurs. 1172 func (d *Downloader) processHeaders(origin uint64, td *big.Int) error { 1173 // Calculate the pivoting point for switching from fast to slow sync 1174 pivot := d.queue.FastSyncPivot() 1175 1176 // Keep a count of uncertain headers to roll back 1177 rollback := []*types.Header{} 1178 defer func() { 1179 if len(rollback) > 0 { 1180 // Flatten the headers and roll them back 1181 hashes := make([]common.Hash, len(rollback)) 1182 for i, header := range rollback { 1183 hashes[i] = header.Hash() 1184 } 1185 lastHeader, lastFastBlock, lastBlock := d.headHeader().Number, common.Big0, common.Big0 1186 if d.headFastBlock != nil { 1187 lastFastBlock = d.headFastBlock().Number() 1188 } 1189 if d.headBlock != nil { 1190 lastBlock = d.headBlock().Number() 1191 } 1192 d.rollback(hashes) 1193 curFastBlock, curBlock := common.Big0, common.Big0 1194 if d.headFastBlock != nil { 1195 curFastBlock = d.headFastBlock().Number() 1196 } 1197 if d.headBlock != nil { 1198 curBlock = d.headBlock().Number() 1199 } 1200 log.Warn("Rolled back headers", "count", len(hashes), 1201 "header", fmt.Sprintf("%d->%d", lastHeader, d.headHeader().Number), 1202 "fast", fmt.Sprintf("%d->%d", lastFastBlock, curFastBlock), 1203 "block", fmt.Sprintf("%d->%d", lastBlock, curBlock)) 1204 1205 // If we're already past the pivot point, this could be an attack, thread carefully 1206 if rollback[len(rollback)-1].Number.Uint64() > pivot { 1207 // If we didn't ever fail, lock in te pivot header (must! not! change!) 1208 if atomic.LoadUint32(&d.fsPivotFails) == 0 { 1209 for _, header := range rollback { 1210 if header.Number.Uint64() == pivot { 1211 log.Warn("Fast-sync pivot locked in", "number", pivot, "hash", header.Hash()) 1212 d.fsPivotLock = header 1213 } 1214 } 1215 } 1216 } 1217 } 1218 }() 1219 1220 // Wait for batches of headers to process 1221 gotHeaders := false 1222 1223 for { 1224 select { 1225 case <-d.cancelCh: 1226 return errCancelHeaderProcessing 1227 1228 case headers := <-d.headerProcCh: 1229 // Terminate header processing if we synced up 1230 if len(headers) == 0 { 1231 // Notify everyone that headers are fully processed 1232 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stateWakeCh} { 1233 select { 1234 case ch <- false: 1235 case <-d.cancelCh: 1236 } 1237 } 1238 // If no headers were retrieved at all, the peer violated it's TD promise that it had a 1239 // better chain compared to ours. The only exception is if it's promised blocks were 1240 // already imported by other means (e.g. fecher): 1241 // 1242 // R <remote peer>, L <local node>: Both at block 10 1243 // R: Mine block 11, and propagate it to L 1244 // L: Queue block 11 for import 1245 // L: Notice that R's head and TD increased compared to ours, start sync 1246 // L: Import of block 11 finishes 1247 // L: Sync begins, and finds common ancestor at 11 1248 // L: Request new headers up from 11 (R's TD was higher, it must have something) 1249 // R: Nothing to give 1250 if d.mode != LightSync { 1251 if !gotHeaders && td.Cmp(d.getTd(d.headBlock().Hash())) > 0 { 1252 return errStallingPeer 1253 } 1254 } 1255 // If fast or light syncing, ensure promised headers are indeed delivered. This is 1256 // needed to detect scenarios where an attacker feeds a bad pivot and then bails out 1257 // of delivering the post-pivot blocks that would flag the invalid content. 1258 // 1259 // This check cannot be executed "as is" for full imports, since blocks may still be 1260 // queued for processing when the header download completes. However, as long as the 1261 // peer gave us something useful, we're already happy/progressed (above check). 1262 if d.mode == FastSync || d.mode == LightSync { 1263 if td.Cmp(d.getTd(d.headHeader().Hash())) > 0 { 1264 return errStallingPeer 1265 } 1266 } 1267 // Disable any rollback and return 1268 rollback = nil 1269 return nil 1270 } 1271 // Otherwise split the chunk of headers into batches and process them 1272 gotHeaders = true 1273 1274 for len(headers) > 0 { 1275 // Terminate if something failed in between processing chunks 1276 select { 1277 case <-d.cancelCh: 1278 return errCancelHeaderProcessing 1279 default: 1280 } 1281 // Select the next chunk of headers to import 1282 limit := maxHeadersProcess 1283 if limit > len(headers) { 1284 limit = len(headers) 1285 } 1286 chunk := headers[:limit] 1287 1288 // In case of header only syncing, validate the chunk immediately 1289 if d.mode == FastSync || d.mode == LightSync { 1290 // Collect the yet unknown headers to mark them as uncertain 1291 unknown := make([]*types.Header, 0, len(headers)) 1292 for _, header := range chunk { 1293 if !d.hasHeader(header.Hash()) { 1294 unknown = append(unknown, header) 1295 } 1296 } 1297 // If we're importing pure headers, verify based on their recentness 1298 frequency := fsHeaderCheckFrequency 1299 if chunk[len(chunk)-1].Number.Uint64()+uint64(fsHeaderForceVerify) > pivot { 1300 frequency = 1 1301 } 1302 if n, err := d.insertHeaders(chunk, frequency); err != nil { 1303 // If some headers were inserted, add them too to the rollback list 1304 if n > 0 { 1305 rollback = append(rollback, chunk[:n]...) 1306 } 1307 log.Debug("Invalid header encountered", "number", chunk[n].Number, "hash", chunk[n].Hash(), "err", err) 1308 return errInvalidChain 1309 } 1310 // All verifications passed, store newly found uncertain headers 1311 rollback = append(rollback, unknown...) 1312 if len(rollback) > fsHeaderSafetyNet { 1313 rollback = append(rollback[:0], rollback[len(rollback)-fsHeaderSafetyNet:]...) 1314 } 1315 } 1316 // If we're fast syncing and just pulled in the pivot, make sure it's the one locked in 1317 if d.mode == FastSync && d.fsPivotLock != nil && chunk[0].Number.Uint64() <= pivot && chunk[len(chunk)-1].Number.Uint64() >= pivot { 1318 if pivot := chunk[int(pivot-chunk[0].Number.Uint64())]; pivot.Hash() != d.fsPivotLock.Hash() { 1319 log.Warn("Pivot doesn't match locked in one", "remoteNumber", pivot.Number, "remoteHash", pivot.Hash(), "localNumber", d.fsPivotLock.Number, "localHash", d.fsPivotLock.Hash()) 1320 return errInvalidChain 1321 } 1322 } 1323 // Unless we're doing light chains, schedule the headers for associated content retrieval 1324 if d.mode == FullSync || d.mode == FastSync { 1325 // If we've reached the allowed number of pending headers, stall a bit 1326 for d.queue.PendingBlocks() >= maxQueuedHeaders || d.queue.PendingReceipts() >= maxQueuedHeaders { 1327 select { 1328 case <-d.cancelCh: 1329 return errCancelHeaderProcessing 1330 case <-time.After(time.Second): 1331 } 1332 } 1333 // Otherwise insert the headers for content retrieval 1334 inserts := d.queue.Schedule(chunk, origin) 1335 if len(inserts) != len(chunk) { 1336 log.Debug("Stale headers") 1337 return errBadPeer 1338 } 1339 } 1340 headers = headers[limit:] 1341 origin += uint64(limit) 1342 } 1343 // Signal the content downloaders of the availablility of new tasks 1344 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh, d.stateWakeCh} { 1345 select { 1346 case ch <- true: 1347 default: 1348 } 1349 } 1350 } 1351 } 1352 } 1353 1354 // processContent takes fetch results from the queue and tries to import them 1355 // into the chain. The type of import operation will depend on the result contents. 1356 func (d *Downloader) processContent() error { 1357 pivot := d.queue.FastSyncPivot() 1358 for { 1359 results := d.queue.WaitResults() 1360 if len(results) == 0 { 1361 return nil // queue empty 1362 } 1363 if d.chainInsertHook != nil { 1364 d.chainInsertHook(results) 1365 } 1366 // Actually import the blocks 1367 first, last := results[0].Header, results[len(results)-1].Header 1368 log.Debug("Inserting downloaded chain", "items", len(results), 1369 "firstnum", first.Number, "firsthash", first.Hash(), 1370 "lastnum", last.Number, "lasthash", last.Hash(), 1371 ) 1372 for len(results) != 0 { 1373 // Check for any termination requests 1374 select { 1375 case <-d.quitCh: 1376 return errCancelContentProcessing 1377 default: 1378 } 1379 // Retrieve the a batch of results to import 1380 var ( 1381 blocks = make([]*types.Block, 0, maxResultsProcess) 1382 receipts = make([]types.Receipts, 0, maxResultsProcess) 1383 ) 1384 items := int(math.Min(float64(len(results)), float64(maxResultsProcess))) 1385 for _, result := range results[:items] { 1386 switch { 1387 case d.mode == FullSync: 1388 blocks = append(blocks, types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles)) 1389 case d.mode == FastSync: 1390 blocks = append(blocks, types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles)) 1391 if result.Header.Number.Uint64() <= pivot { 1392 receipts = append(receipts, result.Receipts) 1393 } 1394 } 1395 } 1396 // Try to process the results, aborting if there's an error 1397 var ( 1398 err error 1399 index int 1400 ) 1401 switch { 1402 case len(receipts) > 0: 1403 index, err = d.insertReceipts(blocks, receipts) 1404 if err == nil && blocks[len(blocks)-1].NumberU64() == pivot { 1405 log.Debug("Committing block as new head", "number", blocks[len(blocks)-1].Number(), "hash", blocks[len(blocks)-1].Hash()) 1406 index, err = len(blocks)-1, d.commitHeadBlock(blocks[len(blocks)-1].Hash()) 1407 } 1408 default: 1409 index, err = d.insertBlocks(blocks) 1410 } 1411 if err != nil { 1412 log.Debug("Downloaded item processing failed", "number", results[index].Header.Number, "hash", results[index].Header.Hash(), "err", err) 1413 return errInvalidChain 1414 } 1415 // Shift the results to the next batch 1416 results = results[items:] 1417 } 1418 } 1419 } 1420 1421 // DeliverHeaders injects a new batch of block headers received from a remote 1422 // node into the download schedule. 1423 func (d *Downloader) DeliverHeaders(id string, headers []*types.Header) (err error) { 1424 return d.deliver(id, d.headerCh, &headerPack{id, headers}, headerInMeter, headerDropMeter) 1425 } 1426 1427 // DeliverBodies injects a new batch of block bodies received from a remote node. 1428 func (d *Downloader) DeliverBodies(id string, transactions [][]*types.Transaction, uncles [][]*types.Header) (err error) { 1429 return d.deliver(id, d.bodyCh, &bodyPack{id, transactions, uncles}, bodyInMeter, bodyDropMeter) 1430 } 1431 1432 // DeliverReceipts injects a new batch of receipts received from a remote node. 1433 func (d *Downloader) DeliverReceipts(id string, receipts [][]*types.Receipt) (err error) { 1434 return d.deliver(id, d.receiptCh, &receiptPack{id, receipts}, receiptInMeter, receiptDropMeter) 1435 } 1436 1437 // DeliverNodeData injects a new batch of node state data received from a remote node. 1438 func (d *Downloader) DeliverNodeData(id string, data [][]byte) (err error) { 1439 return d.deliver(id, d.stateCh, &statePack{id, data}, stateInMeter, stateDropMeter) 1440 } 1441 1442 // deliver injects a new batch of data received from a remote node. 1443 func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { 1444 // Update the delivery metrics for both good and failed deliveries 1445 inMeter.Mark(int64(packet.Items())) 1446 defer func() { 1447 if err != nil { 1448 dropMeter.Mark(int64(packet.Items())) 1449 } 1450 }() 1451 // Deliver or abort if the sync is canceled while queuing 1452 d.cancelLock.RLock() 1453 cancel := d.cancelCh 1454 d.cancelLock.RUnlock() 1455 if cancel == nil { 1456 return errNoSyncActive 1457 } 1458 select { 1459 case destCh <- packet: 1460 return nil 1461 case <-cancel: 1462 return errNoSyncActive 1463 } 1464 } 1465 1466 // qosTuner is the quality of service tuning loop that occasionally gathers the 1467 // peer latency statistics and updates the estimated request round trip time. 1468 func (d *Downloader) qosTuner() { 1469 for { 1470 // Retrieve the current median RTT and integrate into the previoust target RTT 1471 rtt := time.Duration(float64(1-qosTuningImpact)*float64(atomic.LoadUint64(&d.rttEstimate)) + qosTuningImpact*float64(d.peers.medianRTT())) 1472 atomic.StoreUint64(&d.rttEstimate, uint64(rtt)) 1473 1474 // A new RTT cycle passed, increase our confidence in the estimated RTT 1475 conf := atomic.LoadUint64(&d.rttConfidence) 1476 conf = conf + (1000000-conf)/2 1477 atomic.StoreUint64(&d.rttConfidence, conf) 1478 1479 // Log the new QoS values and sleep until the next RTT 1480 log.Debug("Recalculated downloader QoS values", "rtt", rtt, "confidence", float64(conf)/1000000.0, "ttl", d.requestTTL()) 1481 select { 1482 case <-d.quitCh: 1483 return 1484 case <-time.After(rtt): 1485 } 1486 } 1487 } 1488 1489 // qosReduceConfidence is meant to be called when a new peer joins the downloader's 1490 // peer set, needing to reduce the confidence we have in out QoS estimates. 1491 func (d *Downloader) qosReduceConfidence() { 1492 // If we have a single peer, confidence is always 1 1493 peers := uint64(d.peers.Len()) 1494 if peers == 0 { 1495 // Ensure peer connectivity races don't catch us off guard 1496 return 1497 } 1498 if peers == 1 { 1499 atomic.StoreUint64(&d.rttConfidence, 1000000) 1500 return 1501 } 1502 // If we have a ton of peers, don't drop confidence) 1503 if peers >= uint64(qosConfidenceCap) { 1504 return 1505 } 1506 // Otherwise drop the confidence factor 1507 conf := atomic.LoadUint64(&d.rttConfidence) * (peers - 1) / peers 1508 if float64(conf)/1000000 < rttMinConfidence { 1509 conf = uint64(rttMinConfidence * 1000000) 1510 } 1511 atomic.StoreUint64(&d.rttConfidence, conf) 1512 1513 rtt := time.Duration(atomic.LoadUint64(&d.rttEstimate)) 1514 log.Debug("Relaxed downloader QoS values", "rtt", rtt, "confidence", float64(conf)/1000000.0, "ttl", d.requestTTL()) 1515 } 1516 1517 // requestRTT returns the current target round trip time for a download request 1518 // to complete in. 1519 // 1520 // Note, the returned RTT is .9 of the actually estimated RTT. The reason is that 1521 // the downloader tries to adapt queries to the RTT, so multiple RTT values can 1522 // be adapted to, but smaller ones are preffered (stabler download stream). 1523 func (d *Downloader) requestRTT() time.Duration { 1524 return time.Duration(atomic.LoadUint64(&d.rttEstimate)) * 9 / 10 1525 } 1526 1527 // requestTTL returns the current timeout allowance for a single download request 1528 // to finish under. 1529 func (d *Downloader) requestTTL() time.Duration { 1530 var ( 1531 rtt = time.Duration(atomic.LoadUint64(&d.rttEstimate)) 1532 conf = float64(atomic.LoadUint64(&d.rttConfidence)) / 1000000.0 1533 ) 1534 ttl := time.Duration(ttlScaling) * time.Duration(float64(rtt)/conf) 1535 if ttl > ttlLimit { 1536 ttl = ttlLimit 1537 } 1538 return ttl 1539 }