github.com/dominant-strategies/go-quai@v0.28.2/eth/downloader/downloader.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package downloader contains the manual full chain synchronisation. 18 package downloader 19 20 import ( 21 "errors" 22 "fmt" 23 "math/big" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 quai "github.com/dominant-strategies/go-quai" 29 "github.com/dominant-strategies/go-quai/common" 30 "github.com/dominant-strategies/go-quai/consensus" 31 "github.com/dominant-strategies/go-quai/core/state/snapshot" 32 "github.com/dominant-strategies/go-quai/core/types" 33 "github.com/dominant-strategies/go-quai/eth/protocols/eth" 34 "github.com/dominant-strategies/go-quai/ethdb" 35 "github.com/dominant-strategies/go-quai/event" 36 "github.com/dominant-strategies/go-quai/log" 37 "github.com/dominant-strategies/go-quai/metrics" 38 ) 39 40 var ( 41 MaxBlockFetch = 128 // Amount of blocks to be fetched per retrieval request 42 MaxHeaderFetch = 192 // Amount of block headers to be fetched per retrieval request 43 MaxSkeletonWindow = 1024 // Amount of blocks to be fetched for a skeleton assembly. 44 MaxSkeletonSize = 1024 // Number of header fetches to need for a skeleton assembly 45 MaxStateFetch = 384 // Amount of node state values to allow fetching per request 46 47 PrimeSkeletonDist = 8 48 PrimeFetchDepth = 1000 49 RegionFetchDepth = 7000 50 ZoneFetchDepth = 21000 51 52 maxQueuedHeaders = 32 * 1024 // [eth/62] Maximum number of headers to queue for import (DOS protection) 53 maxHeadersProcess = 2048 // Number of header download results to import at once into the chain 54 55 fsHeaderContCheck = 3 * time.Second // Time interval to check for header continuations during state download 56 ) 57 58 var ( 59 errBusy = errors.New("busy") 60 errUnknownPeer = errors.New("peer is unknown or unhealthy") 61 errBadPeer = errors.New("action from bad peer ignored") 62 errStallingPeer = errors.New("peer is stalling") 63 errUnsyncedPeer = errors.New("unsynced peer") 64 errNoPeers = errors.New("no peers to keep download active") 65 errTimeout = errors.New("timeout") 66 errEmptyHeaderSet = errors.New("empty header set by peer") 67 errPeersUnavailable = errors.New("no peers available or all tried for download") 68 errInvalidAncestor = errors.New("retrieved ancestor is invalid") 69 errInvalidChain = errors.New("retrieved hash chain is invalid") 70 errInvalidBody = errors.New("retrieved block body is invalid") 71 errCancelContentProcessing = errors.New("content processing canceled (requested)") 72 errBadBlockFound = errors.New("peer sent a bad block") 73 errCanceled = errors.New("syncing canceled (requested)") 74 errNoSyncActive = errors.New("no sync active") 75 errTooOld = errors.New("peer's protocol version too old") 76 ) 77 78 type Downloader struct { 79 mode uint32 // Synchronisation mode defining the strategy used (per sync cycle), use d.getMode() to get the SyncMode 80 mux *event.TypeMux // Event multiplexer to announce sync operation events 81 82 queue *queue // Scheduler for selecting the hashes to download 83 peers *peerSet // Set of active peers from which download can proceed 84 85 stateDB ethdb.Database // Database to state sync into (and deduplicate via) 86 87 // Statistics 88 syncStatsChainOrigin uint64 // Origin block number where syncing started at 89 syncStatsChainHeight uint64 // Highest block number known when syncing started 90 syncStatsLock sync.RWMutex // Lock protecting the sync stats fields 91 92 core Core 93 94 headEntropy *big.Int 95 headNumber uint64 96 97 // Callbacks 98 dropPeer peerDropFn // Drops a peer for misbehaving 99 100 // Status 101 synchroniseMock func(id string, hash common.Hash) error // Replacement for synchronise during testing 102 synchronising int32 103 notified int32 104 committed int32 105 106 // Channels 107 headerCh chan dataPack // Channel receiving inbound block headers 108 bodyCh chan dataPack // Channel receiving inbound block bodies 109 bodyWakeCh chan bool // Channel to signal the block body fetcher of new tasks 110 headerProcCh chan []*types.Header // Channel to feed the header processor new tasks 111 112 // Cancellation and termination 113 cancelPeer string // Identifier of the peer currently being used as the master (cancel on drop) 114 cancelCh chan struct{} // Channel to cancel mid-flight syncs 115 cancelLock sync.RWMutex // Lock to protect the cancel channel and peer in delivers 116 cancelWg sync.WaitGroup // Make sure all fetcher goroutines have exited. 117 118 quitCh chan struct{} // Quit channel to signal termination 119 quitLock sync.Mutex // Lock to prevent double closes 120 121 // Testing hooks 122 syncInitHook func(uint64, uint64) // Method to call upon initiating a new sync run 123 bodyFetchHook func([]*types.Header) // Method to call upon starting a block body fetch 124 chainInsertHook func([]*fetchResult) // Method to call upon inserting a chain of blocks (possibly in multiple invocations) 125 } 126 127 // Core encapsulates functions required to sync a full core. 128 type Core interface { 129 // HasBlock verifies a block's presence in the local chain. 130 HasBlock(common.Hash, uint64) bool 131 132 // GetBlockByHash retrieves a block from the local chain. 133 GetBlockByHash(common.Hash) *types.Block 134 135 // GetBlockByNumber retrieves a block from the local chain. 136 GetBlockByNumber(uint64) *types.Block 137 138 // CurrentHeader retrieves the head of local chain. 139 CurrentHeader() *types.Header 140 141 // CurrentLogEntropy returns the logarithm of the total entropy reduction since genesis for our current head block 142 CurrentLogEntropy() *big.Int 143 144 // TotalLogS() returns the total entropy reduction if the chain since genesis to the given header 145 TotalLogS(header *types.Header) *big.Int 146 147 // AddPendingEtxs adds the pendingEtxs to the database. 148 AddPendingEtxs(pendingEtxs types.PendingEtxs) error 149 150 // Snapshots returns the core snapshot tree to paused it during sync. 151 Snapshots() *snapshot.Tree 152 153 // Engine 154 Engine() consensus.Engine 155 156 // Write block to the database 157 WriteBlock(block *types.Block) 158 159 // GetTerminiByHash returns the termini of a given block 160 GetTerminiByHash(hash common.Hash) *types.Termini 161 162 // BadHashExistsInChain returns true if any of the specified bad hashes exists on chain 163 BadHashExistsInChain() bool 164 165 // IsBlockHashABadHash returns true if block hash exists in the bad hashes list 166 IsBlockHashABadHash(hash common.Hash) bool 167 } 168 169 // New creates a new downloader to fetch hashes and blocks from remote peers. 170 func New(mux *event.TypeMux, core Core, dropPeer peerDropFn) *Downloader { 171 dl := &Downloader{ 172 mux: mux, 173 queue: newQueue(blockCacheMaxItems, blockCacheInitialItems), 174 peers: newPeerSet(), 175 core: core, 176 headNumber: core.CurrentHeader().NumberU64(), 177 headEntropy: core.CurrentLogEntropy(), 178 dropPeer: dropPeer, 179 headerCh: make(chan dataPack, 1), 180 bodyCh: make(chan dataPack, 1), 181 bodyWakeCh: make(chan bool, 1), 182 headerProcCh: make(chan []*types.Header, 10), 183 quitCh: make(chan struct{}), 184 } 185 186 return dl 187 } 188 189 // Progress retrieves the synchronisation boundaries, specifically the origin 190 // block where synchronisation started at (may have failed/suspended); the block 191 // or header sync is currently at; and the latest known block which the sync targets. 192 // 193 // In addition, during the state download phase of fast synchronisation the number 194 // of processed and the total number of known states are also returned. Otherwise 195 // these are zero. 196 func (d *Downloader) Progress() quai.SyncProgress { 197 // Lock the current stats and return the progress 198 d.syncStatsLock.RLock() 199 defer d.syncStatsLock.RUnlock() 200 201 current := uint64(0) 202 mode := d.getMode() 203 switch { 204 case d.core != nil && mode == FullSync: 205 current = d.core.CurrentHeader().NumberU64() 206 default: 207 log.Error("Unknown downloader chain/mode combo", "light", "full", d.core != nil, "mode", mode) 208 } 209 return quai.SyncProgress{ 210 StartingBlock: d.syncStatsChainOrigin, 211 CurrentBlock: current, 212 HighestBlock: d.syncStatsChainHeight, 213 } 214 } 215 216 // Synchronising returns whether the downloader is currently retrieving blocks. 217 func (d *Downloader) Synchronising() bool { 218 return atomic.LoadInt32(&d.synchronising) > 0 219 } 220 221 // RegisterPeer injects a new download peer into the set of block source to be 222 // used for fetching hashes and blocks from. 223 func (d *Downloader) RegisterPeer(id string, version uint, peer Peer) error { 224 logger := log.Log 225 logger.Trace("Registering sync peer") 226 if err := d.peers.Register(newPeerConnection(id, version, peer, logger)); err != nil { 227 logger.Error("Failed to register sync peer", "err", err) 228 return err 229 } 230 return nil 231 } 232 233 // HeadEntropy returns the downloader head entropy 234 func (d *Downloader) HeadEntropy() *big.Int { 235 return d.headEntropy 236 } 237 238 // UnregisterPeer remove a peer from the known list, preventing any action from 239 // the specified peer. An effort is also made to return any pending fetches into 240 // the queue. 241 func (d *Downloader) UnregisterPeer(id string) error { 242 // Unregister the peer from the active peer set and revoke any fetch tasks 243 logger := log.Log 244 logger.Trace("Unregistering sync peer") 245 if err := d.peers.Unregister(id); err != nil { 246 logger.Error("Failed to unregister sync peer", "err", err) 247 return err 248 } 249 d.queue.Revoke(id) 250 251 return nil 252 } 253 254 // Synchronise tries to sync up our local block chain with a remote peer, both 255 // adding various sanity checks as well as wrapping it with various log entries. 256 func (d *Downloader) Synchronise(id string, head common.Hash, entropy *big.Int, mode SyncMode) error { 257 err := d.synchronise(id, head, entropy, mode) 258 switch err { 259 case nil, errBusy, errCanceled, errNoFetchesPending: 260 return err 261 } 262 if errors.Is(err, errInvalidChain) || errors.Is(err, errBadPeer) || errors.Is(err, errTimeout) || 263 errors.Is(err, errStallingPeer) || errors.Is(err, errUnsyncedPeer) || errors.Is(err, errEmptyHeaderSet) || 264 errors.Is(err, errPeersUnavailable) || errors.Is(err, errTooOld) || errors.Is(err, errInvalidAncestor) || errors.Is(err, errBadBlockFound) { 265 log.Warn("Synchronisation failed, dropping peer", "peer", id, "err", err) 266 if d.dropPeer == nil { 267 // The dropPeer method is nil when `--copydb` is used for a local copy. 268 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 269 log.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", id) 270 } else { 271 d.dropPeer(id) 272 } 273 return err 274 } 275 log.Warn("Synchronisation failed, retrying", "err", err) 276 return err 277 } 278 279 // PeerSet retrieves the current peer set of the downloader. 280 func (d *Downloader) PeerSet() *peerSet { 281 return d.peers 282 } 283 284 // synchronise will select the peer and use it for synchronising. If an empty string is given 285 // it will use the best peer possible and synchronize if its number is higher than our own. If any of the 286 // checks fail an error will be returned. This method is synchronous 287 func (d *Downloader) synchronise(id string, hash common.Hash, entropy *big.Int, mode SyncMode) error { 288 // Mock out the synchronisation if testing 289 if d.synchroniseMock != nil { 290 return d.synchroniseMock(id, hash) 291 } 292 // Make sure only one goroutine is ever allowed past this point at once 293 if !atomic.CompareAndSwapInt32(&d.synchronising, 0, 1) { 294 return errBusy 295 } 296 defer atomic.StoreInt32(&d.synchronising, 0) 297 298 // Post a user notification of the sync (only once per session) 299 if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { 300 log.Info("Block synchronisation started") 301 } 302 303 // Reset the queue, peer set and wake channels to clean any internal leftover state 304 d.queue.Reset(blockCacheMaxItems, blockCacheInitialItems) 305 d.peers.Reset() 306 307 for _, ch := range []chan bool{d.bodyWakeCh} { 308 select { 309 case <-ch: 310 default: 311 } 312 } 313 for _, ch := range []chan dataPack{d.headerCh, d.bodyCh} { 314 for empty := false; !empty; { 315 select { 316 case <-ch: 317 default: 318 empty = true 319 } 320 } 321 } 322 for empty := false; !empty; { 323 select { 324 case <-d.headerProcCh: 325 default: 326 empty = true 327 } 328 } 329 // Create cancel channel for aborting mid-flight and mark the master peer 330 d.cancelLock.Lock() 331 d.cancelCh = make(chan struct{}) 332 d.cancelPeer = id 333 d.cancelLock.Unlock() 334 335 defer d.Cancel() // No matter what, we can't leave the cancel channel open 336 337 // Atomically set the requested sync mode 338 atomic.StoreUint32(&d.mode, uint32(mode)) 339 340 // Retrieve the origin peer and initiate the downloading process 341 p := d.peers.Peer(id) 342 if p == nil { 343 return errUnknownPeer 344 } 345 346 // If the peer entropy is lower than the downloader head entropy 347 if d.headEntropy.Cmp(entropy) >= 0 { 348 return nil 349 } 350 351 // Only start the downloader after we reset from a forked state 352 if d.core.BadHashExistsInChain() { 353 log.Warn("Bad Hashes still exist on chain, cannot start the downloader yet") 354 return nil 355 } 356 return d.syncWithPeer(p, hash, entropy) 357 } 358 359 func (d *Downloader) getMode() SyncMode { 360 return SyncMode(atomic.LoadUint32(&d.mode)) 361 } 362 363 // syncWithPeer starts a block synchronization based on the hash chain from the 364 // specified peer and head hash. 365 func (d *Downloader) syncWithPeer(p *peerConnection, hash common.Hash, entropy *big.Int) (err error) { 366 d.mux.Post(StartEvent{}) 367 defer func() { 368 // reset on error 369 if err != nil { 370 d.mux.Post(FailedEvent{err}) 371 } else { 372 latest := d.core.CurrentHeader() 373 d.mux.Post(DoneEvent{latest}) 374 } 375 }() 376 if p.version < eth.QUAI1 { 377 return fmt.Errorf("%w: advertized %d < required %d", errTooOld, p.version, eth.QUAI1) 378 } 379 mode := d.getMode() 380 381 log.Info("Synchronising with the network", "peer", p.id, "eth", p.version, "head", hash, "entropy", entropy, "mode", mode) 382 defer func(start time.Time) { 383 log.Debug("Synchronisation terminated", "elapsed", common.PrettyDuration(time.Since(start))) 384 }(time.Now()) 385 386 // Get the latest head of the peer to start the sync from. 387 latest, err := d.fetchHead(p) 388 if err != nil { 389 return err 390 } 391 392 // Height of the peer 393 peerHeight := latest.Number().Uint64() 394 origin := peerHeight 395 396 // TODO: display the correct sync stats 397 d.syncStatsLock.Lock() 398 if d.syncStatsChainHeight <= origin || d.syncStatsChainOrigin > origin { 399 d.syncStatsChainOrigin = origin 400 } 401 d.syncStatsChainHeight = peerHeight 402 d.syncStatsLock.Unlock() 403 404 d.committed = 1 405 406 // Initiate the sync using a concurrent header and content retrieval algorithm 407 if d.syncInitHook != nil { 408 d.syncInitHook(origin, peerHeight) 409 } 410 fetchers := []func() error{ 411 func() error { return d.fetchHeaders(p, origin) }, // Headers are always retrieved 412 func() error { return d.fetchBodies(origin) }, // Bodies are retrieved during normal and fast sync 413 func() error { return d.processHeaders(origin) }, 414 func() error { return d.processFullSyncContent(peerHeight) }, 415 } 416 return d.spawnSync(fetchers) 417 } 418 419 // spawnSync runs d.process and all given fetcher functions to completion in 420 // separate goroutines, returning the first error that appears. 421 func (d *Downloader) spawnSync(fetchers []func() error) error { 422 errc := make(chan error, len(fetchers)) 423 d.cancelWg.Add(len(fetchers)) 424 for _, fn := range fetchers { 425 fn := fn 426 go func() { defer d.cancelWg.Done(); errc <- fn() }() 427 } 428 // Wait for the first error, then terminate the others. 429 var err error 430 for i := 0; i < len(fetchers); i++ { 431 if i == len(fetchers)-1 { 432 // Close the queue when all fetchers have exited. 433 // This will cause the block processor to end when 434 // it has processed the queue. 435 d.queue.Close() 436 } 437 if err = <-errc; err != nil && err != errCanceled { 438 break 439 } 440 } 441 d.queue.Close() 442 d.Cancel() 443 return err 444 } 445 446 // cancel aborts all of the operations and resets the queue. However, cancel does 447 // not wait for the running download goroutines to finish. This method should be 448 // used when cancelling the downloads from inside the downloader. 449 func (d *Downloader) cancel() { 450 // Close the current cancel channel 451 d.cancelLock.Lock() 452 defer d.cancelLock.Unlock() 453 454 if d.cancelCh != nil { 455 select { 456 case <-d.cancelCh: 457 // Channel was already closed 458 default: 459 close(d.cancelCh) 460 } 461 } 462 } 463 464 // Cancel aborts all of the operations and waits for all download goroutines to 465 // finish before returning. 466 func (d *Downloader) Cancel() { 467 d.cancel() 468 d.cancelWg.Wait() 469 } 470 471 // Terminate interrupts the downloader, canceling all pending operations. 472 // The downloader cannot be reused after calling Terminate. 473 func (d *Downloader) Terminate() { 474 // Close the termination channel (make sure double close is allowed) 475 d.quitLock.Lock() 476 select { 477 case <-d.quitCh: 478 default: 479 close(d.quitCh) 480 } 481 482 d.quitLock.Unlock() 483 484 // Cancel any pending download requests 485 d.Cancel() 486 } 487 488 // fetchHead retrieves the head header from a remote peer. 489 func (d *Downloader) fetchHead(p *peerConnection) (head *types.Header, err error) { 490 p.log.Debug("Retrieving remote chain head") 491 492 // Request the advertised remote head block and wait for the response 493 latest, _, _, _ := p.peer.Head() 494 fetch := 1 495 go p.peer.RequestHeadersByHash(latest, fetch, uint64(1), false, true) 496 497 ttl := d.peers.rates.TargetTimeout() 498 timeout := time.After(ttl) 499 for { 500 select { 501 case <-d.cancelCh: 502 return nil, errCanceled 503 504 case packet := <-d.headerCh: 505 // Discard anything not from the origin peer 506 if packet.PeerId() != p.id { 507 log.Debug("Received headers from incorrect peer", "peer", packet.PeerId()) 508 break 509 } 510 // Make sure the peer gave us at least one and at most the requested headers 511 headers := packet.(*headerPack).headers 512 if len(headers) == 0 || len(headers) > fetch { 513 return nil, fmt.Errorf("%w: returned headers %d != requested %d", errBadPeer, len(headers), fetch) 514 } 515 // The first header needs to be the head, validate against the checkpoint 516 // and request. 517 head := headers[0] 518 if len(headers) == 1 { 519 p.log.Debug("Remote head identified", "number", head.Number(), "hash", head.Hash()) 520 return head, nil 521 } 522 return head, nil 523 524 case <-timeout: 525 p.log.Debug("Waiting for head header timed out", "elapsed", ttl) 526 return nil, errTimeout 527 528 case <-d.bodyCh: 529 } 530 } 531 } 532 533 // fetchHeaders keeps retrieving headers concurrently from the number 534 // requested, until no more are returned, potentially throttling on the way. To 535 // facilitate concurrency but still protect against malicious nodes sending bad 536 // headers, we construct a header chain skeleton using the "origin" peer we are 537 // syncing with, and fill in the missing headers using anyone else. Headers from 538 // other peers are only accepted if they map cleanly to the skeleton. If no one 539 // can fill in the skeleton - not even the origin peer - it's assumed invalid and 540 // the origin is dropped. 541 func (d *Downloader) fetchHeaders(p *peerConnection, from uint64) error { 542 p.log.Debug("Directing header downloads", "origin", from) 543 defer p.log.Debug("Header download terminated") 544 545 // Create a timeout timer, and the associated header fetcher 546 skeleton := true 547 skeletonHeaders := make([]*types.Header, 0) 548 request := time.Now() // time of the last skeleton fetch request 549 timeout := time.NewTimer(0) // timer to dump a non-responsive active peer 550 <-timeout.C // timeout channel should be initially empty 551 defer timeout.Stop() 552 553 // peer height 554 peerHeight := from 555 nodeCtx := common.NodeLocation.Context() 556 557 localHeight := d.headNumber 558 559 // getFetchPoint returns the next fetch point given the number of headers processed 560 // after the previous point. 561 updateFetchPoint := func() { 562 if localHeight+uint64(MaxSkeletonWindow) < peerHeight { 563 from = localHeight + uint64(MaxSkeletonWindow) 564 } else { 565 from = peerHeight 566 } 567 } 568 569 var ttl time.Duration 570 getHeaders := func(from uint64, to uint64) { 571 request = time.Now() 572 573 if skeleton { 574 timeout.Reset(1 * time.Minute) 575 } else { 576 ttl = d.peers.rates.TargetTimeout() 577 timeout.Reset(ttl) 578 } 579 580 if skeleton { 581 // Reset the skeleton headers each time we try to fetch the skeleton. 582 skeletonHeaders = make([]*types.Header, 0) 583 p.log.Trace("Fetching skeleton headers", "count", MaxHeaderFetch, "from", from) 584 if nodeCtx == common.PRIME_CTX { 585 go p.peer.RequestHeadersByNumber(from, MaxSkeletonSize, uint64(PrimeSkeletonDist), to, false, true) 586 } else { 587 go p.peer.RequestHeadersByNumber(from, MaxSkeletonSize, uint64(1), to, true, true) 588 } 589 } else { 590 p.log.Trace("Fetching full headers", "count", MaxHeaderFetch, "from", from) 591 go p.peer.RequestHeadersByNumber(from, MaxHeaderFetch, uint64(1), to, false, true) 592 } 593 } 594 595 // In the case of prime there is no guarantee that during the sync backwards 596 // the prime blocks will match. To be tolerant to reorgs and forking, we need 597 // to fetch till certain depth. For now we will be syncing till more than 50 prime 598 // blocks back. 599 updateFetchPoint() 600 if nodeCtx == common.PRIME_CTX { 601 if localHeight > uint64(PrimeFetchDepth) { 602 getHeaders(from, localHeight-uint64(PrimeFetchDepth)) 603 } else { 604 getHeaders(from, 0) 605 } 606 } else if nodeCtx == common.REGION_CTX { 607 if localHeight > uint64(RegionFetchDepth) { 608 getHeaders(from, localHeight-uint64(RegionFetchDepth)) 609 } else { 610 getHeaders(from, 0) 611 } 612 } else { 613 if localHeight > uint64(ZoneFetchDepth) { 614 getHeaders(from, localHeight-uint64(ZoneFetchDepth)) 615 } else { 616 getHeaders(from, 0) 617 } 618 } 619 620 first := true 621 622 for { 623 select { 624 case <-d.cancelCh: 625 return errCanceled 626 627 case packet := <-d.headerCh: 628 629 // Make sure the active peer is giving us the skeleton headers 630 if packet.PeerId() != p.id { 631 log.Debug("Received skeleton from incorrect peer", "peer", packet.PeerId()) 632 break 633 } 634 headerReqTimer.UpdateSince(request) 635 timeout.Stop() 636 637 headers := packet.(*headerPack).headers 638 639 if skeleton { 640 // Only fill the skeleton between the headers we don't know about. 641 for i := 0; i < len(headers); i++ { 642 skeletonHeaders = append(skeletonHeaders, headers[i]) 643 commonAncestor := d.core.HasBlock(headers[i].Hash(), headers[i].NumberU64()) && (d.core.GetTerminiByHash(headers[i].Hash()) != nil) 644 if commonAncestor { 645 break 646 } 647 } 648 } 649 650 if len(skeletonHeaders) > 0 && skeletonHeaders[len(skeletonHeaders)-1].NumberU64() < 8 { 651 genesisBlock := d.core.GetBlockByNumber(0) 652 skeletonHeaders = append(skeletonHeaders, genesisBlock.Header()) 653 } 654 655 if len(headers) == 0 { 656 continue 657 } 658 659 // Prepare the resultStore to fill the skeleton. 660 // first bool is used to only set the offset on the first skeleton fetch. 661 if len(skeletonHeaders) > 0 && first { 662 d.queue.Prepare(skeletonHeaders[len(skeletonHeaders)-1].NumberU64(), FullSync) 663 first = false 664 } 665 666 // If the skeleton's finished, pull any remaining head headers directly from the origin 667 // When the length of skeleton Headers is zero or one, thre is no more skeleton to fetch. 668 // If we are at the tail fetch directly from the peer height. 669 if skeleton && len(skeletonHeaders) == 1 { 670 skeleton = false 671 // get the headers directly from peer height 672 getHeaders(peerHeight, skeletonHeaders[0].NumberU64()) 673 continue 674 } 675 676 var progressed bool 677 // If we received a skeleton batch, resolve internals concurrently 678 if skeleton { 679 // trim on common, set origin to last entry in the skeleton 680 filled, proced, err := d.fillHeaderSkeleton(from, skeletonHeaders) 681 if err != nil { 682 p.log.Info("Skeleton chain invalid", "err", err) 683 return fmt.Errorf("%w: %v", errInvalidChain, err) 684 } 685 headers = filled[proced:] 686 localHeight = skeletonHeaders[0].NumberU64() 687 688 progressed = proced > 0 689 updateFetchPoint() 690 getHeaders(from, localHeight) 691 } 692 693 // Insert all the new headers and fetch the next batch 694 // This is only used for getting the tail for prime, region and zone. 695 if len(headers) > 0 && !skeleton { 696 p.log.Trace("Scheduling new headers", "count", len(headers), "from", from) 697 select { 698 case d.headerProcCh <- headers: 699 case <-d.cancelCh: 700 return errCanceled 701 } 702 703 if localHeight >= peerHeight { 704 continue 705 } 706 } 707 708 if len(headers) == 0 && !progressed { 709 // No headers delivered, or all of them being delayed, sleep a bit and retry 710 p.log.Trace("All headers delayed, waiting") 711 select { 712 case <-time.After(fsHeaderContCheck): 713 updateFetchPoint() 714 getHeaders(from, localHeight) 715 continue 716 case <-d.cancelCh: 717 return errCanceled 718 } 719 } 720 721 case <-timeout.C: 722 if d.dropPeer == nil { 723 // The dropPeer method is nil when `--copydb` is used for a local copy. 724 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 725 p.log.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", p.id) 726 break 727 } 728 // Header retrieval timed out, consider the peer bad and drop 729 p.log.Debug("Header request timed out", "elapsed", ttl) 730 headerTimeoutMeter.Mark(1) 731 d.dropPeer(p.id) 732 733 // Finish the sync gracefully instead of dumping the gathered data though 734 for _, ch := range []chan bool{d.bodyWakeCh} { 735 select { 736 case ch <- false: 737 case <-d.cancelCh: 738 } 739 } 740 select { 741 case d.headerProcCh <- nil: 742 case <-d.cancelCh: 743 } 744 return fmt.Errorf("%w: header request timed out", errBadPeer) 745 } 746 } 747 } 748 749 // fillHeaderSkeleton concurrently retrieves headers from all our available peers 750 // and maps them to the provided skeleton header chain. 751 // 752 // Any partial results from the beginning of the skeleton is (if possible) forwarded 753 // immediately to the header processor to keep the rest of the pipeline full even 754 // in the case of header stalls. 755 // 756 // The method returns the entire filled skeleton and also the number of headers 757 // already forwarded for processing. 758 func (d *Downloader) fillHeaderSkeleton(from uint64, skeleton []*types.Header) ([]*types.Header, int, error) { 759 log.Debug("Filling up skeleton", "from", from) 760 d.queue.ScheduleSkeleton(from, skeleton) 761 762 var ( 763 deliver = func(packet dataPack) (int, error) { 764 pack := packet.(*headerPack) 765 return d.queue.DeliverHeaders(pack.peerID, pack.headers, d.headerProcCh) 766 } 767 expire = func() map[string]int { return d.queue.ExpireHeaders(d.peers.rates.TargetTimeout()) } 768 reserve = func(p *peerConnection, count int) (*fetchRequest, bool, bool) { 769 return d.queue.ReserveHeaders(p, count), false, false 770 } 771 fetch = func(p *peerConnection, req *fetchRequest) error { 772 return p.FetchHeaders(req.From, int(req.From-req.To)) 773 } 774 capacity = func(p *peerConnection) int { return p.HeaderCapacity(d.peers.rates.TargetRoundTrip()) } 775 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { 776 p.SetHeadersIdle(accepted, deliveryTime) 777 } 778 ) 779 err := d.fetchParts(d.headerCh, deliver, d.queue.headerContCh, expire, 780 d.queue.PendingHeaders, d.queue.InFlightHeaders, reserve, 781 nil, fetch, d.queue.CancelHeaders, capacity, d.peers.HeaderIdlePeers, setIdle, "headers") 782 783 log.Debug("Skeleton fill terminated", "err", err) 784 785 filled, proced := d.queue.RetrieveHeaders() 786 return filled, proced, err 787 } 788 789 // fetchBodies iteratively downloads the scheduled block bodies, taking any 790 // available peers, reserving a chunk of blocks for each, waiting for delivery 791 // and also periodically checking for timeouts. 792 func (d *Downloader) fetchBodies(from uint64) error { 793 log.Debug("Downloading block bodies", "origin", from) 794 795 var ( 796 deliver = func(packet dataPack) (int, error) { 797 pack := packet.(*bodyPack) 798 return d.queue.DeliverBodies(pack.peerID, pack.transactions, pack.uncles, pack.extTransactions, pack.manifest) 799 } 800 expire = func() map[string]int { return d.queue.ExpireBodies(d.peers.rates.TargetTimeout()) } 801 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchBodies(req) } 802 capacity = func(p *peerConnection) int { return p.BlockCapacity(d.peers.rates.TargetRoundTrip()) } 803 setIdle = func(p *peerConnection, accepted int, deliveryTime time.Time) { p.SetBodiesIdle(accepted, deliveryTime) } 804 ) 805 err := d.fetchParts(d.bodyCh, deliver, d.bodyWakeCh, expire, 806 d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ReserveBodies, 807 d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, d.peers.BodyIdlePeers, setIdle, "bodies") 808 809 log.Debug("Block body download terminated", "err", err) 810 return err 811 } 812 813 // fetchParts iteratively downloads scheduled block parts, taking any available 814 // peers, reserving a chunk of fetch requests for each, waiting for delivery and 815 // also periodically checking for timeouts. 816 // 817 // As the scheduling/timeout logic mostly is the same for all downloaded data 818 // types, this method is used by each for data gathering and is instrumented with 819 // various callbacks to handle the slight differences between processing them. 820 // 821 // The instrumentation parameters: 822 // - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) 823 // - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) 824 // - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) 825 // - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) 826 // - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) 827 // - pending: task callback for the number of requests still needing download (detect completion/non-completability) 828 // - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) 829 // - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) 830 // - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) 831 // - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) 832 // - fetch: network callback to actually send a particular download request to a physical remote peer 833 // - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) 834 // - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) 835 // - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks 836 // - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) 837 // - kind: textual label of the type being downloaded to display in log messages 838 func (d *Downloader) fetchParts(deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool, 839 expire func() map[string]int, pending func() int, inFlight func() bool, reserve func(*peerConnection, int) (*fetchRequest, bool, bool), 840 fetchHook func([]*types.Header), fetch func(*peerConnection, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peerConnection) int, 841 idle func() ([]*peerConnection, int), setIdle func(*peerConnection, int, time.Time), kind string) error { 842 843 // Create a ticker to detect expired retrieval tasks 844 ticker := time.NewTicker(100 * time.Millisecond) 845 defer ticker.Stop() 846 847 update := make(chan struct{}, 1) 848 849 // Prepare the queue and fetch block parts until the block header fetcher's done 850 finished := false 851 for { 852 select { 853 case <-d.cancelCh: 854 return errCanceled 855 856 case packet := <-deliveryCh: 857 deliveryTime := time.Now() 858 // If the peer was previously banned and failed to deliver its pack 859 // in a reasonable time frame, ignore its message. 860 if peer := d.peers.Peer(packet.PeerId()); peer != nil { 861 // Deliver the received chunk of data and check chain validity 862 accepted, err := deliver(packet) 863 if errors.Is(err, errInvalidChain) { 864 return err 865 } 866 // Unless a peer delivered something completely else than requested (usually 867 // caused by a timed out request which came through in the end), set it to 868 // idle. If the delivery's stale, the peer should have already been idled. 869 if !errors.Is(err, errStaleDelivery) { 870 setIdle(peer, accepted, deliveryTime) 871 } 872 // Issue a log to the user to see what's going on 873 switch { 874 case err == nil && packet.Items() == 0: 875 peer.log.Trace("Requested data not delivered", "type", kind) 876 case err == nil: 877 peer.log.Trace("Delivered new batch of data", "type", kind, "count", packet.Stats()) 878 default: 879 peer.log.Debug("Failed to deliver retrieved data", "type", kind, "err", err) 880 } 881 } 882 // Blocks assembled, try to update the progress 883 select { 884 case update <- struct{}{}: 885 default: 886 } 887 888 case cont := <-wakeCh: 889 // The header fetcher sent a continuation flag, check if it's done 890 if !cont { 891 finished = true 892 } 893 // Headers arrive, try to update the progress 894 select { 895 case update <- struct{}{}: 896 default: 897 } 898 899 case <-ticker.C: 900 // Sanity check update the progress 901 select { 902 case update <- struct{}{}: 903 default: 904 } 905 906 case <-update: 907 // Short circuit if we lost all our peers 908 if d.peers.Len() == 0 { 909 return errNoPeers 910 } 911 // Check for fetch request timeouts and demote the responsible peers 912 for pid, fails := range expire() { 913 if peer := d.peers.Peer(pid); peer != nil { 914 // If a lot of retrieval elements expired, we might have overestimated the remote peer or perhaps 915 // ourselves. Only reset to minimal throughput but don't drop just yet. If even the minimal times 916 // out that sync wise we need to get rid of the peer. 917 // 918 // The reason the minimum threshold is 2 is because the downloader tries to estimate the bandwidth 919 // and latency of a peer separately, which requires pushing the measures capacity a bit and seeing 920 // how response times reacts, to it always requests one more than the minimum (i.e. min 2). 921 if fails > 2 { 922 peer.log.Trace("Data delivery timed out", "type", kind) 923 setIdle(peer, 0, time.Now()) 924 } else { 925 peer.log.Debug("Stalling delivery, dropping", "type", kind) 926 927 if d.dropPeer == nil { 928 // The dropPeer method is nil when `--copydb` is used for a local copy. 929 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 930 peer.log.Warn("Downloader wants to drop peer, but peerdrop-function is not set", "peer", pid) 931 } else { 932 d.dropPeer(pid) 933 934 // If this peer was the master peer, abort sync immediately 935 d.cancelLock.RLock() 936 master := pid == d.cancelPeer 937 d.cancelLock.RUnlock() 938 939 if master { 940 d.cancel() 941 return errTimeout 942 } 943 } 944 } 945 } 946 } 947 // If there's nothing more to fetch, wait or terminate 948 if pending() == 0 { 949 if !inFlight() && finished { 950 log.Debug("Data fetching completed", "type", kind) 951 return nil 952 } 953 break 954 } 955 // Send a download request to all idle peers, until throttled 956 progressed, throttled, running := false, false, inFlight() 957 idles, total := idle() 958 pendCount := pending() 959 for _, peer := range idles { 960 // Short circuit if throttling activated 961 if throttled { 962 break 963 } 964 // Short circuit if there is no more available task. 965 if pendCount = pending(); pendCount == 0 { 966 break 967 } 968 // Reserve a chunk of fetches for a peer. A nil can mean either that 969 // no more headers are available, or that the peer is known not to 970 // have them. 971 request, progress, throttle := reserve(peer, capacity(peer)) 972 if progress { 973 progressed = true 974 } 975 if throttle { 976 throttled = true 977 throttleCounter.Inc(1) 978 } 979 if request == nil { 980 continue 981 } 982 if request.From > 0 { 983 peer.log.Trace("Requesting new batch of data", "type", kind, "from", request.From) 984 } else { 985 if len(request.Headers) != 0 { 986 peer.log.Trace("Requesting new batch of data", "type", kind, "count", len(request.Headers), "from", request.Headers[0].Number()) 987 } 988 } 989 // Fetch the chunk and make sure any errors return the hashes to the queue 990 if fetchHook != nil { 991 fetchHook(request.Headers) 992 } 993 if err := fetch(peer, request); err != nil { 994 // Although we could try and make an attempt to fix this, this error really 995 // means that we've double allocated a fetch task to a peer. If that is the 996 // case, the internal state of the downloader and the queue is very wrong so 997 // better hard crash and note the error instead of silently accumulating into 998 // a much bigger issue. 999 panic(fmt.Sprintf("%v: %s fetch assignment failed", peer, kind)) 1000 } 1001 running = true 1002 } 1003 // Make sure that we have peers available for fetching. If all peers have been tried 1004 // and all failed throw an error 1005 if !progressed && !throttled && !running && len(idles) == total && pendCount > 0 { 1006 return errPeersUnavailable 1007 } 1008 } 1009 } 1010 } 1011 1012 // processHeaders takes batches of retrieved headers from an input channel and 1013 // keeps processing and scheduling them into the header chain and downloader's 1014 // queue until the stream ends or a failure occurs. 1015 func (d *Downloader) processHeaders(origin uint64) error { 1016 // Keep a count of uncertain headers to roll back 1017 var ( 1018 rollback uint64 // Zero means no rollback (fine as you can't unroll the genesis) 1019 rollbackErr error 1020 mode = d.getMode() 1021 ) 1022 defer func() { 1023 if rollback > 0 { 1024 curBlock := d.core.CurrentHeader().NumberU64() 1025 log.Warn("Rolled back chain segment", 1026 "block", fmt.Sprintf("%d->%d", curBlock), "reason", rollbackErr) 1027 } 1028 }() 1029 // Wait for batches of headers to process 1030 1031 for { 1032 select { 1033 case <-d.cancelCh: 1034 rollbackErr = errCanceled 1035 return errCanceled 1036 1037 case headers := <-d.headerProcCh: 1038 // Terminate header processing if we synced up 1039 if len(headers) == 0 { 1040 // Notify everyone that headers are fully processed 1041 for _, ch := range []chan bool{d.bodyWakeCh} { 1042 select { 1043 case ch <- false: 1044 case <-d.cancelCh: 1045 } 1046 } 1047 // If no headers were retrieved at all, the peer violated its TD promise that it had a 1048 // better chain compared to ours. The only exception is if its promised blocks were 1049 // already imported by other means (e.g. fetcher): 1050 // 1051 // R <remote peer>, L <local node>: Both at block 10 1052 // R: Mine block 11, and propagate it to L 1053 // L: Queue block 11 for import 1054 // L: Notice that R's head and TD increased compared to ours, start sync 1055 // L: Import of block 11 finishes 1056 // L: Sync begins, and finds common ancestor at 11 1057 // L: Request new headers up from 11 (R's number was higher, it must have something) 1058 // R: Nothing to give 1059 // Disable any rollback and return 1060 rollback = 0 1061 return nil 1062 } 1063 // Otherwise split the chunk of headers into batches and process them 1064 for len(headers) > 0 { 1065 // Terminate if something failed in between processing chunks 1066 select { 1067 case <-d.cancelCh: 1068 rollbackErr = errCanceled 1069 return errCanceled 1070 default: 1071 } 1072 // Select the next chunk of headers to import 1073 limit := maxHeadersProcess 1074 if limit > len(headers) { 1075 limit = len(headers) 1076 } 1077 chunk := headers[:limit] 1078 1079 // Unless we're doing light chains, schedule the headers for associated content retrieval 1080 if mode == FullSync { 1081 // If we've reached the allowed number of pending headers, stall a bit 1082 for d.queue.PendingBlocks() >= maxQueuedHeaders { 1083 select { 1084 case <-d.cancelCh: 1085 rollbackErr = errCanceled 1086 return errCanceled 1087 case <-time.After(time.Second): 1088 } 1089 } 1090 // Otherwise insert the headers for content retrieval 1091 inserts := d.queue.Schedule(chunk) 1092 if len(inserts) != len(chunk) { 1093 rollbackErr = fmt.Errorf("stale headers: len inserts %v len(chunk) %v", len(inserts), len(chunk)) 1094 return fmt.Errorf("%w: stale headers", errBadPeer) 1095 } 1096 } 1097 headers = headers[limit:] 1098 origin += uint64(limit) 1099 } 1100 // Update the highest block number we know if a higher one is found. 1101 d.syncStatsLock.Lock() 1102 if d.syncStatsChainHeight < origin { 1103 d.syncStatsChainHeight = origin - 1 1104 } 1105 d.syncStatsLock.Unlock() 1106 1107 // Signal the content downloaders of the availablility of new tasks 1108 for _, ch := range []chan bool{d.bodyWakeCh} { 1109 select { 1110 case ch <- true: 1111 default: 1112 } 1113 } 1114 } 1115 } 1116 } 1117 1118 // processFullSyncContent takes fetch results from the queue and imports them into the chain. 1119 func (d *Downloader) processFullSyncContent(peerHeight uint64) error { 1120 for { 1121 select { 1122 case <-d.cancelCh: 1123 return nil 1124 default: 1125 results := d.queue.Results(true) 1126 if len(results) == 0 { 1127 return nil 1128 } 1129 if err := d.importBlockResults(results); err != nil { 1130 return err 1131 } 1132 // If all the blocks are fetched, we exit the sync process 1133 if d.headNumber == peerHeight { 1134 return errNoFetchesPending 1135 } 1136 } 1137 } 1138 } 1139 1140 func (d *Downloader) importBlockResults(results []*fetchResult) error { 1141 // Check for any early termination requests 1142 if len(results) == 0 { 1143 return nil 1144 } 1145 select { 1146 case <-d.quitCh: 1147 return errCancelContentProcessing 1148 case <-d.cancelCh: 1149 return errCancelContentProcessing 1150 default: 1151 } 1152 // Retrieve the a batch of results to import 1153 first, last := results[0].Header, results[len(results)-1].Header 1154 log.Info("Inserting downloaded chain", "items", len(results), 1155 "firstnum", first.Number(), "firsthash", first.Hash(), 1156 "lastnum", last.Number(), "lasthash", last.Hash(), 1157 ) 1158 1159 for _, result := range results { 1160 block := types.NewBlockWithHeader(result.Header).WithBody(result.Transactions, result.Uncles, result.ExtTransactions, result.SubManifest) 1161 if d.core.IsBlockHashABadHash(block.Hash()) { 1162 return errBadBlockFound 1163 } 1164 d.headNumber = block.NumberU64() 1165 d.headEntropy = d.core.TotalLogS(block.Header()) 1166 d.core.WriteBlock(block) 1167 } 1168 return nil 1169 } 1170 1171 // DeliverHeaders injects a new batch of block headers received from a remote 1172 // node into the download schedule. 1173 func (d *Downloader) DeliverHeaders(id string, headers []*types.Header) error { 1174 return d.deliver(d.headerCh, &headerPack{id, headers}, headerInMeter, headerDropMeter) 1175 } 1176 1177 // DeliverBodies injects a new batch of block bodies received from a remote node. 1178 func (d *Downloader) DeliverBodies(id string, transactions [][]*types.Transaction, uncles [][]*types.Header, extTransactions [][]*types.Transaction, manifests []types.BlockManifest) error { 1179 return d.deliver(d.bodyCh, &bodyPack{id, transactions, uncles, extTransactions, manifests}, bodyInMeter, bodyDropMeter) 1180 } 1181 1182 // deliver injects a new batch of data received from a remote node. 1183 func (d *Downloader) deliver(destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { 1184 // Update the delivery metrics for both good and failed deliveries 1185 inMeter.Mark(int64(packet.Items())) 1186 defer func() { 1187 if err != nil { 1188 dropMeter.Mark(int64(packet.Items())) 1189 } 1190 }() 1191 // Deliver or abort if the sync is canceled while queuing 1192 d.cancelLock.RLock() 1193 cancel := d.cancelCh 1194 d.cancelLock.RUnlock() 1195 if cancel == nil { 1196 return errNoSyncActive 1197 } 1198 select { 1199 case destCh <- packet: 1200 return nil 1201 case <-cancel: 1202 return errNoSyncActive 1203 } 1204 } 1205 1206 func (d *Downloader) DropPeer(peer *eth.Peer) { 1207 d.dropPeer(peer.ID()) 1208 }