github.com/annchain/OG@v0.0.9/og/downloader/downloader.go (about) 1 // Copyright © 2019 Annchain Authors <EMAIL ADDRESS> 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 package downloader 15 16 import ( 17 "errors" 18 "fmt" 19 "github.com/annchain/OG/arefactor/common/goroutine" 20 types2 "github.com/annchain/OG/arefactor/og/types" 21 "github.com/annchain/OG/metrics" 22 "github.com/annchain/OG/og/protocol/dagmessage" 23 "github.com/annchain/OG/og/types" 24 "github.com/sirupsen/logrus" 25 "sync" 26 "sync/atomic" 27 "time" 28 ) 29 30 var ( 31 MaxHashFetch = 512 // Amount of hashes to be fetched per retrieval request 32 MaxBlockFetch = 128 // Amount of blocks to be fetched per retrieval request 33 MaxHeaderFetch = 192 // Amount of block headers to be fetched per retrieval request 34 MaxSkeletonSize = 128 // Number of header fetches to need for a skeleton assembly 35 MaxBodyFetch = 128 // Amount of block bodies to be fetched per retrieval request 36 MaxReceiptFetch = 256 // Amount of transaction receipts to allow fetching per request 37 MaxStateFetch = 384 // Amount of node state values to allow fetching per request 38 39 rttMinEstimate = 2 * time.Second // Minimum round-trip time to target for download requests 40 rttMaxEstimate = 20 * time.Second // Maximum round-trip time to target for download requests 41 rttMinConfidence = 0.1 // Worse confidence factor in our estimated RTT value 42 ttlScaling = 3 // Constant scaling factor for RTT -> TTL conversion 43 ttlLimit = time.Minute // Maximum TTL allowance to prevent reaching crazy timeouts 44 45 qosTuningPeers = 5 // Number of peers to tune based on (best peers) 46 qosConfidenceCap = 10 // Number of peers above which not to modify RTT confidence 47 qosTuningImpact = 0.25 // Impact that a new tuning target has on the previous value 48 49 maxQueuedHeaders = 32 * 1024 // [og/01] Maximum number of headers to queue for import (DOS protection) 50 maxHeadersProcess = 2048 // Number of header download results to import at once into the chain 51 maxResultsProcess = 2048 // Number of content download results to import at once into the chain 52 53 fsHeaderCheckFrequency = 100 // Verification frequency of the downloaded headers during fast sync 54 fsHeaderSafetyNet = 2048 // Number of headers to discard in case a chain violation is detected 55 fsHeaderForceVerify = 24 // Number of headers to verify before and after the pivot to accept it 56 fsHeaderContCheck = 3 * time.Second // Time interval to check for header continuations during state download 57 fsMinFullBlocks = 64 58 MaxForkAncestry = 3 * uint64(3000) // Number of blocks to retrieve fully even in fast sync 59 ) 60 61 var ( 62 errBusy = errors.New("busy") 63 errUnknownPeer = errors.New("peer is unknown or unhealthy") 64 errBadPeer = errors.New("action from bad peer ignored") 65 errStallingPeer = errors.New("peer is stalling") 66 errNoPeers = errors.New("no peers to keep download active") 67 errTimeout = errors.New("timeout") 68 errEmptyHeaderSet = errors.New("empty header set by peer") 69 errPeersUnavailable = errors.New("no peers available or all tried for download") 70 errInvalidAncestor = errors.New("retrieved ancestor is invalid") 71 errInvalidChain = errors.New("retrieved hash chain is invalid") 72 errInvalidBlock = errors.New("retrieved block is invalid") 73 errInvalidBody = errors.New("retrieved block body is invalid") 74 errInvalidReceipt = errors.New("retrieved receipt is invalid") 75 errCancelBlockFetch = errors.New("block download canceled (requested)") 76 errCancelHeaderFetch = errors.New("block header download canceled (requested)") 77 errCancelBodyFetch = errors.New("block body download canceled (requested)") 78 errCancelReceiptFetch = errors.New("receipt download canceled (requested)") 79 errCancelStateFetch = errors.New("state data download canceled (requested)") 80 errCancelHeaderProcessing = errors.New("header processing canceled (requested)") 81 errCancelContentProcessing = errors.New("content processing canceled (requested)") 82 errNoSyncActive = errors.New("no sync active") 83 errTooOld = errors.New("peer doesn't speak recent enough protocol version (need version >= 01)") 84 errNotAccepet = errors.New("skeleton filling not accepted") 85 ) 86 87 type Downloader struct { 88 mode SyncMode // Synchronisation mode defining the strategy used (per sync cycle) 89 90 queue *queue // Scheduler for selecting the hashes to download 91 peers *peerSet // Set of active peers from which download can proceed 92 93 rttEstimate uint64 // Round trip time to target for download requests 94 rttConfidence uint64 // Confidence in the estimated RTT (unit: millionths to allow atomic ops) 95 96 dag IDag 97 98 insertTxs insertTxsFn 99 // Callbacks 100 dropPeer peerDropFn // Drops a peer for misbehaving 101 102 // Status 103 synchroniseMock func(id string, hash types2.Hash) error // Replacement for synchronise during testing 104 synchronising int32 105 notified int32 106 committed int32 107 108 // Channels 109 headerCh chan dataPack // [og/01] Channel receiving inbound block headers 110 bodyCh chan dataPack // [og/01] Channel receiving inbound block bodies 111 receiptCh chan dataPack // [eth/63] Channel receiving inbound receipts 112 bodyWakeCh chan bool // [og/01] Channel to signal the block body fetcher of new tasks 113 receiptWakeCh chan bool // [eth/63] Channel to signal the receipt fetcher of new tasks 114 headerProcCh chan []*dagmessage.SequencerHeader // [og/01] Channel to feed the header processor new tasks 115 116 // for stateFetcher 117 stateCh chan dataPack // [eth/63] Channel receiving inbound node state data 118 119 // Cancellation and termination 120 cancelPeer string // Identifier of the peer currently being used as the master (cancel on drop) 121 cancelCh chan struct{} // Channel to cancel mid-flight syncs 122 cancelLock sync.RWMutex // Lock to protect the cancel channel and peer in delivers 123 cancelWg sync.WaitGroup // Make sure all fetcher goroutines have exited. 124 125 quitCh chan struct{} // Quit channel to signal termination 126 quitLock sync.RWMutex // Lock to prevent double closes 127 128 // Testing hooks 129 syncInitHook func(uint64, uint64) // Method to call upon initiating a new sync run 130 bodyFetchHook func([]*dagmessage.SequencerHeader) // Method to call upon starting a block body fetch 131 chainInsertHook func([]*fetchResult) // Method to call upon inserting a chain of blocks (possibly in multiple invocations) 132 } 133 134 type IDag interface { 135 LatestSequencer() *types.Sequencer 136 GetSequencer(hash types2.Hash, id uint64) *types.Sequencer 137 } 138 139 // New creates a new downloader to fetch hashes and blocks from remote peers. 140 func New(mode SyncMode, dag IDag, dropPeer peerDropFn, insertTxs insertTxsFn) *Downloader { 141 142 dl := &Downloader{ 143 mode: mode, 144 queue: newQueue(), 145 peers: newPeerSet(), 146 rttEstimate: uint64(rttMaxEstimate), 147 rttConfidence: uint64(1000000), 148 dag: dag, 149 dropPeer: dropPeer, 150 insertTxs: insertTxs, 151 headerCh: make(chan dataPack, 1), 152 bodyCh: make(chan dataPack, 1), 153 receiptCh: make(chan dataPack, 1), 154 bodyWakeCh: make(chan bool, 1), 155 receiptWakeCh: make(chan bool, 1), 156 headerProcCh: make(chan []*dagmessage.SequencerHeader, 1), 157 quitCh: make(chan struct{}), 158 stateCh: make(chan dataPack), 159 } 160 return dl 161 } 162 163 func (d *Downloader) Start() { 164 goroutine.New(d.qosTuner) 165 } 166 167 // Synchronising returns whether the downloader is currently retrieving blocks. 168 func (d *Downloader) Synchronising() bool { 169 return atomic.LoadInt32(&d.synchronising) > 0 170 } 171 172 // RegisterPeer injects a new download peer into the set of block source to be 173 // used for fetching hashes and blocks from. 174 func (d *Downloader) RegisterPeer(id string, version int, peer Peer) error { 175 176 log.WithField("id ", id).Trace("Registering sync peer") 177 if err := d.peers.Register(newPeerConnection(id, version, peer)); err != nil { 178 log.WithField("id ", id).WithError(err).Error("Failed to register sync peer") 179 return err 180 } 181 d.qosReduceConfidence() 182 183 return nil 184 } 185 186 // UnregisterPeer remove a peer from the known list, preventing any action from 187 // the specified peer. An effort is also made to return any pending fetches into 188 // the queue. 189 func (d *Downloader) UnregisterPeer(id string) error { 190 // Unregister the peer from the active peer set and revoke any fetch tasks 191 192 log.WithField("id ", id).Trace("Unregistering sync peer") 193 if err := d.peers.Unregister(id); err != nil { 194 log.WithField("id ", id).WithError(err).Error("Failed to unregister sync peer") 195 return err 196 } 197 d.queue.Revoke(id) 198 199 // If this peer was the master peer, abort sync immediately 200 d.cancelLock.RLock() 201 master := id == d.cancelPeer 202 d.cancelLock.RUnlock() 203 204 if master { 205 d.cancel() 206 } 207 return nil 208 } 209 210 // Synchronise tries to sync up our local block chain with a remote peer, both 211 // adding various sanity checks as well as wrapping it with various log entries. 212 func (d *Downloader) Synchronise(id string, head types2.Hash, seqId uint64, mode SyncMode) error { 213 err := d.synchronise(id, head, seqId, mode) 214 switch err { 215 case nil: 216 case errBusy: 217 log.WithError(err).Debug("Synchronisation is busy, retrying") 218 case errTimeout, errBadPeer, errStallingPeer, 219 errEmptyHeaderSet, errPeersUnavailable, errTooOld, 220 errInvalidAncestor, errInvalidChain, errNotAccepet: 221 log.WithError(err).WithField("id", id).Warn("Synchronisation failed, dropping peer") 222 if d.dropPeer == nil { 223 // The dropPeer method is nil when `--copydb` is used for a local copy. 224 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 225 log.WithField("peer", id).Warn("Downloader wants to drop peer, but peerdrop-function is not set") 226 } else { 227 d.dropPeer(id) 228 } 229 default: 230 log.WithError(err).Warn("Synchronisation failed, retrying") 231 } 232 return err 233 } 234 235 // synchronise will select the peer and use it for synchronising. If an empty string is given 236 // it will use the best peer possible and synchronize if its TD is higher than our own. If any of the 237 // checks fail an error will be returned. This method is synchronous 238 func (d *Downloader) synchronise(id string, hash types2.Hash, seqId uint64, mode SyncMode) error { 239 // Mock out the synchronisation if testing 240 if d.synchroniseMock != nil { 241 return d.synchroniseMock(id, hash) 242 } 243 // Make sure only one goroutine is ever allowed past this point at once 244 if !atomic.CompareAndSwapInt32(&d.synchronising, 0, 1) { 245 return errBusy 246 } 247 defer atomic.StoreInt32(&d.synchronising, 0) 248 249 // Post a user notification of the sync (only once per session) 250 if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { 251 log.Info("Block synchronisation started") 252 } 253 // Reset the queue, peer set and wake channels to clean any internal leftover state 254 d.queue.Reset() 255 d.peers.Reset() 256 257 for _, ch := range []chan bool{d.bodyWakeCh} { 258 select { 259 case <-ch: 260 default: 261 } 262 } 263 for _, ch := range []chan dataPack{d.headerCh, d.bodyCh} { 264 for empty := false; !empty; { 265 select { 266 case <-ch: 267 default: 268 empty = true 269 } 270 } 271 } 272 for empty := false; !empty; { 273 select { 274 case <-d.headerProcCh: 275 default: 276 empty = true 277 } 278 } 279 // Create cancel channel for aborting mid-flight and mark the master peer 280 d.cancelLock.Lock() 281 d.cancelCh = make(chan struct{}) 282 d.cancelPeer = id 283 d.cancelLock.Unlock() 284 285 defer d.Cancel() // No matter what, we can't leave the cancel channel open 286 287 // Set the requested sync mode, unless it's forbidden 288 d.mode = mode 289 290 // Retrieve the origin peer and initiate the downloading process 291 p := d.peers.Peer(id) 292 if p == nil { 293 return errUnknownPeer 294 } 295 return d.syncWithPeer(p, hash, seqId) 296 } 297 298 // syncWithPeer starts a block synchronization based on the hash chain from the 299 // specified peer and head hash. 300 func (d *Downloader) syncWithPeer(p *peerConnection, hash types2.Hash, seqId uint64) (err error) { 301 302 if p.version < 1 { 303 return errTooOld 304 } 305 log.WithField("peer", p.id).WithField("og", p.version).WithField("head", hash).WithField( 306 "seqid", seqId).WithField("mode", d.mode).Debug("Synchronising with the network") 307 defer func(start time.Time) { 308 log.WithField("elapsed", time.Since(start)).Debug("Synchronisation terminated") 309 }(time.Now()) 310 311 // Look up the sync boundaries: the common ancestor and the target block 312 latest, err := d.fetchHeight(p) 313 if err != nil { 314 return err 315 } 316 height := latest.SequencerId() 317 origin, err := d.findAncestor(p, height) 318 if err != nil { 319 return err 320 } 321 322 //ancestor is smaller than our height 323 ourHeight := d.dag.LatestSequencer().Number() 324 if ourHeight > origin { 325 origin = ourHeight 326 } 327 // Ensure our origin point is below any fast sync pivot point 328 pivot := uint64(0) 329 if d.mode == FastSync { 330 if height <= uint64(fsMinFullBlocks) { 331 origin = 0 332 } else { 333 pivot = height - uint64(fsMinFullBlocks) 334 if pivot <= origin { 335 origin = pivot - 1 336 } 337 } 338 } 339 d.committed = 1 340 if d.mode == FastSync && pivot != 0 { 341 d.committed = 0 342 } 343 // Initiate the sync using a concurrent header and content retrieval algorithm 344 d.queue.Prepare(origin+1, d.mode) 345 if d.syncInitHook != nil { 346 d.syncInitHook(origin, height) 347 } 348 349 fetchers := []func() error{ 350 func() error { return d.fetchHeaders(p, origin+1, pivot) }, // Headers are always retrieved 351 func() error { return d.fetchBodies(origin + 1) }, // Bodies are retrieved during normal and fast sync 352 // Receipts are retrieved during fast sync 353 func() error { return d.processHeaders(origin+1, pivot, seqId) }, 354 } 355 if d.mode == FastSync { 356 fetchers = append(fetchers, func() error { return d.processFastSyncContent(latest) }) 357 } else if d.mode == FullSync { 358 fetchers = append(fetchers, d.processFullSyncContent) 359 } 360 return d.spawnSync(fetchers) 361 } 362 363 // spawnSync runs d.process and all given fetcher functions to completion in 364 // separate goroutines, returning the first error that appears. 365 func (d *Downloader) spawnSync(fetchers []func() error) error { 366 errc := make(chan error, len(fetchers)) 367 d.cancelWg.Add(len(fetchers)) 368 for _, fn := range fetchers { 369 fn := fn 370 function := func() { defer d.cancelWg.Done(); errc <- fn() } 371 goroutine.New(function) 372 } 373 // Wait for the first error, then terminate the others. 374 var err error 375 for i := 0; i < len(fetchers); i++ { 376 if i == len(fetchers)-1 { 377 // Close the queue when all fetchers have exited. 378 // This will cause the block processor to end when 379 // it has processed the queue. 380 d.queue.Close() 381 } 382 if err = <-errc; err != nil { 383 break 384 } 385 } 386 d.queue.Close() 387 d.Cancel() 388 return err 389 } 390 391 // cancel aborts all of the operations and resets the queue. However, cancel does 392 // not wait for the running download goroutines to finish. This method should be 393 // used when cancelling the downloads from inside the downloader. 394 func (d *Downloader) cancel() { 395 // Close the current cancel channel 396 d.cancelLock.Lock() 397 if d.cancelCh != nil { 398 select { 399 case <-d.cancelCh: 400 // Channel was already closed 401 default: 402 close(d.cancelCh) 403 } 404 } 405 d.cancelLock.Unlock() 406 } 407 408 // Cancel aborts all of the operations and waits for all download goroutines to 409 // finish before returning. 410 func (d *Downloader) Cancel() { 411 d.cancel() 412 d.cancelWg.Wait() 413 } 414 415 // Terminate interrupts the downloader, canceling all pending operations. 416 // The downloader cannot be reused after calling Terminate. 417 func (d *Downloader) Terminate() { 418 log.Info("downloader terminate") 419 // Close the termination channel (make sure double close is allowed) 420 d.quitLock.Lock() 421 select { 422 case <-d.quitCh: 423 log.Debug("got d.quitCh") 424 default: 425 log.Debug("close d.quitCh") 426 close(d.quitCh) 427 } 428 d.quitLock.Unlock() 429 430 // Cancel any pending download requests 431 d.Cancel() 432 } 433 434 // fetchHeight retrieves the head header of the remote peer to aid in estimating 435 // the total time a pending synchronisation would take. 436 func (d *Downloader) fetchHeight(p *peerConnection) (*dagmessage.SequencerHeader, error) { 437 log.Debug("Retrieving remote chain height") 438 439 // Request the advertised remote head block and wait for the response 440 head, _ := p.peer.Head() 441 function := func() { p.peer.RequestHeadersByHash(head, 1, 0, false) } 442 goroutine.New(function) 443 444 ttl := d.requestTTL() 445 timeout := time.After(ttl) 446 for { 447 select { 448 case <-d.cancelCh: 449 return nil, errCancelBlockFetch 450 451 case packet := <-d.headerCh: 452 // Discard anything not from the origin peer 453 if packet.PeerId() != p.id { 454 log.WithField("peer", packet.PeerId()).Info("Received headers from incorrect peer") 455 break 456 } 457 // Make sure the peer actually gave something valid 458 headers := packet.(*headerPack).headers 459 if len(headers) != 1 { 460 log.WithField("headers", len(headers)).Info("Multiple headers for single request") 461 return nil, errBadPeer 462 } 463 head := headers[0] 464 log.WithField("number", head.SequencerId()).WithField("hash", head.GetHash()).Debug( 465 "Remote head header identified") 466 return head, nil 467 468 case <-timeout: 469 log.WithField("peer ", p.id).Info("Waiting for head header timed out", "elapsed", ttl) 470 return nil, errTimeout 471 472 case <-d.bodyCh: 473 case <-d.receiptCh: 474 // Out of bounds delivery, ignore 475 } 476 } 477 } 478 479 // findAncestor tries to locate the common ancestor link of the local chain and 480 // a remote peers blockchain. In the general case when our node was in sync and 481 // on the correct chain, checking the top N links should already get us a match. 482 // In the rare scenario when we ended up on a long reorganisation (i.e. none of 483 // the head links match), we do a binary search to find the common ancestor. 484 func (d *Downloader) findAncestor(p *peerConnection, height uint64) (uint64, error) { 485 // Figure out the valid ancestor range to prevent rewrite attacks 486 floor, ceil := int64(-1), d.dag.LatestSequencer().Number() 487 if d.mode == FullSync { 488 ceil = d.dag.LatestSequencer().Number() 489 } else if d.mode == FastSync { 490 ceil = d.dag.LatestSequencer().Number() 491 } 492 if ceil >= MaxForkAncestry { 493 floor = int64(ceil - MaxForkAncestry) 494 } 495 log.WithField("local", ceil).WithField("remote", height).Debug("Looking for common ancestor") 496 497 // Request the topmost blocks to short circuit binary ancestor lookup 498 head := ceil 499 if head > height { 500 head = height 501 } 502 from := int64(head) - int64(MaxHeaderFetch) 503 if from < 0 { 504 from = 0 505 } 506 // Span out with 15 block gaps into the future to catch bad head reports 507 limit := 2 * MaxHeaderFetch / 16 508 count := 1 + int((int64(ceil)-from)/16) 509 if count > limit { 510 count = limit 511 } 512 function := func() { p.peer.RequestHeadersByNumber(uint64(from), count, 15, false) } 513 goroutine.New(function) 514 515 // Wait for the remote response to the head fetch 516 number, hash := uint64(0), types2.Hash{} 517 518 ttl := d.requestTTL() 519 timeout := time.After(ttl) 520 521 for finished := false; !finished; { 522 select { 523 case <-d.cancelCh: 524 return 0, errCancelHeaderFetch 525 526 case packet := <-d.headerCh: 527 // Discard anything not from the origin peer 528 if packet.PeerId() != p.id { 529 log.WithField("peer", packet.PeerId()).Debug("Received headers from incorrect peer") 530 break 531 } 532 // Make sure the peer actually gave something valid 533 headers := packet.(*headerPack).headers 534 if len(headers) == 0 { 535 log.Warn("Empty head header set") 536 return 0, errEmptyHeaderSet 537 } 538 // Make sure the peer's reply conforms to the request 539 for i := 0; i < len(headers); i++ { 540 if number := headers[i].SequencerId(); int64(number) != from+int64(i)*16 { 541 log.WithField("index", i).WithField("requested", from+int64(i)*16).WithField("received", number).Warn( 542 "Head headers broke chain ordering") 543 return 0, errInvalidChain 544 } 545 } 546 // Check if a common ancestor was found 547 finished = true 548 for i := len(headers) - 1; i >= 0; i-- { 549 // Skip any headers that underflow/overflow our requested set 550 if headers[i].SequencerId() < uint64(from) || headers[i].SequencerId() > ceil { 551 continue 552 } 553 // Otherwise check if we already know the header or not 554 if d.mode == FullSync && d.dag.GetSequencer(headers[i].GetHash(), headers[i].SequencerId()) != nil { 555 number, hash = headers[i].SequencerId(), headers[i].GetHash() 556 557 // If every header is known, even future ones, the peer straight out lied about its head 558 if number > height && i == limit-1 { 559 log.WithField("reported", height).WithField("found", number).Warn("Lied about chain head") 560 return 0, errStallingPeer 561 } 562 break 563 } 564 } 565 566 case <-timeout: 567 log.WithField("peer ", p.id).Debug("Waiting for head header timed out", "elapsed", ttl) 568 return 0, errTimeout 569 570 case <-d.bodyCh: 571 case <-d.receiptCh: 572 // Out of bounds delivery, ignore 573 } 574 } 575 // If the head fetch already found an ancestor, return 576 if !hash.Empty() { 577 if int64(number) <= floor { 578 log.WithField("number", number).WithField("hash", hash).WithField( 579 "allowance", floor).Warn("Ancestor below allowance") 580 return 0, errInvalidAncestor 581 } 582 log.WithField("number", number).WithField("hash", hash.String()).Debug("Found common ancestor") 583 return number, nil 584 } 585 // Ancestor not found, we need to binary search over our chain 586 start, end := uint64(0), head 587 if floor > 0 { 588 start = uint64(floor) 589 } 590 for start+1 < end { 591 // Split our chain interval in two, and request the hash to cross check 592 check := (start + end) / 2 593 594 ttl := d.requestTTL() 595 timeout := time.After(ttl) 596 597 function := func() { p.peer.RequestHeadersByNumber(check, 1, 0, false) } 598 goroutine.New(function) 599 600 // Wait until a reply arrives to this request 601 for arrived := false; !arrived; { 602 select { 603 case <-d.cancelCh: 604 return 0, errCancelHeaderFetch 605 606 case packer := <-d.headerCh: 607 // Discard anything not from the origin peer 608 if packer.PeerId() != p.id { 609 log.WithField("peer", packer.PeerId()).Info("Received headers from incorrect peer") 610 break 611 } 612 // Make sure the peer actually gave something valid 613 headers := packer.(*headerPack).headers 614 if len(headers) != 1 { 615 log.WithField("headers", len(headers)).Info("Multiple headers for single request") 616 return 0, errBadPeer 617 } 618 arrived = true 619 620 // Modify the search interval based on the response 621 if d.mode == FullSync && d.dag.GetSequencer(headers[0].GetHash(), headers[0].SequencerId()) == nil { 622 end = check 623 break 624 } 625 626 start = check 627 628 case <-timeout: 629 log.WithField("peer ", p.id).WithField("elapsed", ttl).Info("Waiting for search header timed out") 630 return 0, errTimeout 631 632 case <-d.bodyCh: 633 case <-d.receiptCh: 634 // Out of bounds delivery, ignore 635 } 636 } 637 } 638 // Ensure valid ancestry and return 639 if int64(start) <= floor { 640 log.WithField("number", start).WithField("hash", hash).WithField( 641 "allowance", floor).Warn("Ancestor below allowance") 642 return 0, errInvalidAncestor 643 } 644 log.WithField("number", start).WithField("hash", hash).Debug("Found common ancestor") 645 return start, nil 646 } 647 648 // fetchHeaders keeps retrieving headers concurrently from the number 649 // requested, until no more are returned, potentially throttling on the way. To 650 // facilitate concurrency but still protect against malicious nodes sending bad 651 // headers, we construct a header chain skeleton using the "origin" peer we are 652 // syncing with, and fill in the missing headers using anyone else. Headers from 653 // other peers are only accepted if they map cleanly to the skeleton. If no one 654 // can fill in the skeleton - not even the origin peer - it's assumed invalid and 655 // the origin is dropped. 656 func (d *Downloader) fetchHeaders(p *peerConnection, from uint64, pivot uint64) error { 657 log.WithField("origin", from).Debug("Directing header downloads") 658 defer log.Debug("Header download terminated") 659 660 // Create a timeout timer, and the associated header fetcher 661 skeleton := true // Skeleton assembly phase or finishing up 662 request := time.Now() // time of the last skeleton fetch request 663 timeout := time.NewTimer(0) // timer to dump a non-responsive active peer 664 <-timeout.C // timeout channel should be initially empty 665 defer timeout.Stop() 666 667 var ttl time.Duration 668 669 //skeleton = false 670 getHeaders := func(from uint64) { 671 request = time.Now() 672 673 ttl = d.requestTTL() 674 timeout.Reset(ttl) 675 676 if skeleton { 677 log.WithField("count", MaxHeaderFetch).WithField("from", from).Trace("Fetching skeleton headers") 678 function := func() { 679 p.peer.RequestHeadersByNumber(from+uint64(MaxHeaderFetch)-1, 680 MaxSkeletonSize, MaxHeaderFetch-1, false) 681 } 682 goroutine.New(function) 683 } else { 684 log.WithField("count", MaxHeaderFetch).WithField("from", from).Trace("Fetching full headers") 685 function := func() { 686 p.peer.RequestHeadersByNumber(from, MaxHeaderFetch, 0, false) 687 688 } 689 goroutine.New(function) 690 } 691 692 } 693 // Start pulling the header chain skeleton until all is done 694 getHeaders(from) 695 696 for { 697 select { 698 case <-d.cancelCh: 699 return errCancelHeaderFetch 700 701 case packet := <-d.headerCh: 702 // Make sure the active peer is giving us the skeleton headers 703 if packet.PeerId() != p.id { 704 log.WithField("peer", packet.PeerId()).Debug("Received skeleton from incorrect peer") 705 break 706 } 707 headerReqTimer.UpdateSince(request) 708 timeout.Stop() 709 710 // If the skeleton's finished, pull any remaining head headers directly from the origin 711 if packet.Items() == 0 && skeleton { 712 skeleton = false 713 getHeaders(from) 714 continue 715 } 716 // If no more headers are inbound, notify the content fetchers and return 717 if packet.Items() == 0 { 718 // Don't abort header fetches while the pivot is downloading 719 if atomic.LoadInt32(&d.committed) == 0 && pivot <= from { 720 log.Debug("No headers, waiting for pivot commit") 721 select { 722 case <-time.After(fsHeaderContCheck): 723 getHeaders(from) 724 continue 725 case <-d.cancelCh: 726 return errCancelHeaderFetch 727 } 728 } 729 // Pivot done (or not in fast sync) and no more headers, terminate the process 730 log.Debug("No more headers available") 731 select { 732 case d.headerProcCh <- nil: 733 return nil 734 case <-d.cancelCh: 735 return errCancelHeaderFetch 736 } 737 } 738 headers := packet.(*headerPack).headers 739 740 // If we received a skeleton batch, resolve internals concurrently 741 if skeleton { 742 filled, proced, err := d.fillHeaderSkeleton(from, headers) 743 if err != nil { 744 log.WithError(err).Warn("Skeleton chain invalid") 745 return errInvalidChain 746 } 747 headers = filled[proced:] 748 from += uint64(proced) 749 } 750 // Insert all the new headers and fetch the next batch 751 if len(headers) > 0 { 752 log.WithField("from", from).WithField("count", len(headers)).Trace("Scheduling new headers") 753 select { 754 case d.headerProcCh <- headers: 755 case <-d.cancelCh: 756 return errCancelHeaderFetch 757 } 758 from += uint64(len(headers)) 759 } 760 getHeaders(from) 761 762 case <-timeout.C: 763 764 if d.dropPeer == nil { 765 // The dropPeer method is nil when `--copydb` is used for a local copy. 766 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 767 log.WithField("peer", p.id).Warn("Downloader wants to drop peer, but peerdrop-function is not set") 768 break 769 } 770 // Header retrieval timed out, consider the peer bad and drop 771 log.WithField("peer ", p.id).WithField("elapsed", ttl).Debug("Header request timed out") 772 headerTimeoutMeter.Mark(1) 773 d.dropPeer(p.id) 774 775 // Finish the sync gracefully instead of dumping the gathered data though 776 for _, ch := range []chan bool{d.bodyWakeCh, d.receiptWakeCh} { 777 select { 778 case ch <- false: 779 case <-d.cancelCh: 780 } 781 } 782 select { 783 case d.headerProcCh <- nil: 784 case <-d.cancelCh: 785 } 786 return errBadPeer 787 } 788 } 789 } 790 791 // fillHeaderSkeleton concurrently retrieves headers from all our available peers 792 // and maps them to the provided skeleton header chain. 793 // 794 // Any partial results from the beginning of the skeleton is (if possible) forwarded 795 // immediately to the header processor to keep the rest of the pipeline full even 796 // in the case of header stalls. 797 // 798 // The method returns the entire filled skeleton and also the number of headers 799 // already forwarded for processing. 800 func (d *Downloader) fillHeaderSkeleton(from uint64, skeleton []*dagmessage.SequencerHeader) ([]*dagmessage.SequencerHeader, int, error) { 801 log.WithField("from", from).Debug("Filling up skeleton") 802 d.queue.ScheduleSkeleton(from, skeleton) 803 804 var ( 805 deliver = func(packet dataPack) (int, error) { 806 pack := packet.(*headerPack) 807 return d.queue.DeliverHeaders(pack.peerID, pack.headers, d.headerProcCh) 808 } 809 expire = func() map[string]int { return d.queue.ExpireHeaders(d.requestTTL()) } 810 throttle = func() bool { return false } 811 reserve = func(p *peerConnection, count int) (*fetchRequest, bool, error) { 812 return d.queue.ReserveHeaders(p, count), false, nil 813 } 814 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchHeaders(req.From, MaxHeaderFetch) } 815 capacity = func(p *peerConnection) int { return p.HeaderCapacity(d.requestRTT()) } 816 setIdle = func(p *peerConnection, accepted int) { p.SetHeadersIdle(accepted) } 817 ) 818 err := d.fetchParts(errCancelHeaderFetch, d.headerCh, deliver, d.queue.headerContCh, expire, 819 d.queue.PendingHeaders, d.queue.InFlightHeaders, throttle, reserve, 820 nil, fetch, d.queue.CancelHeaders, capacity, d.peers.HeaderIdlePeers, setIdle, "headers") 821 if err != nil { 822 log.WithError(err).Warn("Skeleton fill terminated") 823 } else { 824 log.Debug("Skeleton fill terminated") 825 } 826 827 filled, proced := d.queue.RetrieveHeaders() 828 return filled, proced, err 829 } 830 831 // fetchBodies iteratively downloads the scheduled block bodies, taking any 832 // available peers, reserving a chunk of blocks for each, waiting for delivery 833 // and also periodically checking for timeouts. 834 func (d *Downloader) fetchBodies(from uint64) error { 835 log.WithField("origin", from).Debug("Downloading block bodies") 836 837 var ( 838 deliver = func(packet dataPack) (int, error) { 839 840 pack := packet.(*bodyPack) 841 return d.queue.DeliverBodies(pack.peerID, pack.transactions, pack.sequencers) 842 } 843 expire = func() map[string]int { return d.queue.ExpireBodies(d.requestTTL()) } 844 fetch = func(p *peerConnection, req *fetchRequest) error { return p.FetchBodies(req) } 845 capacity = func(p *peerConnection) int { return p.BlockCapacity(d.requestRTT()) } 846 setIdle = func(p *peerConnection, accepted int) { p.SetBodiesIdle(accepted) } 847 ) 848 err := d.fetchParts(errCancelBodyFetch, d.bodyCh, deliver, d.bodyWakeCh, expire, 849 d.queue.PendingBlocks, d.queue.InFlightBlocks, d.queue.ShouldThrottleBlocks, d.queue.ReserveBodies, 850 d.bodyFetchHook, fetch, d.queue.CancelBodies, capacity, d.peers.BodyIdlePeers, setIdle, "bodies") 851 852 if err != nil { 853 log.WithError(err).Warn("Block body download terminated") 854 } else { 855 log.Debug("Block body download terminated") 856 } 857 return err 858 } 859 860 // fetchParts iteratively downloads scheduled block parts, taking any available 861 // peers, reserving a chunk of fetch requests for each, waiting for delivery and 862 // also periodically checking for timeouts. 863 // 864 // As the scheduling/timeout logic mostly is the same for all downloaded data 865 // types, this method is used by each for data gathering and is instrumented with 866 // various callbacks to handle the slight differences between processing them. 867 // 868 // The instrumentation parameters: 869 // - errCancel: error type to return if the fetch operation is cancelled (mostly makes logging nicer) 870 // - deliveryCh: channel from which to retrieve downloaded data packets (merged from all concurrent peers) 871 // - deliver: processing callback to deliver data packets into type specific download queues (usually within `queue`) 872 // - wakeCh: notification channel for waking the fetcher when new tasks are available (or sync completed) 873 // - expire: task callback method to abort requests that took too long and return the faulty peers (traffic shaping) 874 // - pending: task callback for the number of requests still needing download (detect completion/non-completability) 875 // - inFlight: task callback for the number of in-progress requests (wait for all active downloads to finish) 876 // - throttle: task callback to check if the processing queue is full and activate throttling (bound memory use) 877 // - reserve: task callback to reserve new download tasks to a particular peer (also signals partial completions) 878 // - fetchHook: tester callback to notify of new tasks being initiated (allows testing the scheduling logic) 879 // - fetch: network callback to actually send a particular download request to a physical remote peer 880 // - cancel: task callback to abort an in-flight download request and allow rescheduling it (in case of lost peer) 881 // - capacity: network callback to retrieve the estimated type-specific bandwidth capacity of a peer (traffic shaping) 882 // - idle: network callback to retrieve the currently (type specific) idle peers that can be assigned tasks 883 // - setIdle: network callback to set a peer back to idle and update its estimated capacity (traffic shaping) 884 // - kind: textual label of the type being downloaded to display in log mesages 885 func (d *Downloader) fetchParts(errCancel error, deliveryCh chan dataPack, deliver func(dataPack) (int, error), wakeCh chan bool, 886 expire func() map[string]int, pending func() int, inFlight func() bool, throttle func() bool, reserve func(*peerConnection, int) (*fetchRequest, bool, error), 887 fetchHook func([]*dagmessage.SequencerHeader), fetch func(*peerConnection, *fetchRequest) error, cancel func(*fetchRequest), capacity func(*peerConnection) int, 888 idle func() ([]*peerConnection, int), setIdle func(*peerConnection, int), kind string) error { 889 890 // Create a ticker to detect expired retrieval tasks 891 ticker := time.NewTicker(100 * time.Millisecond) 892 defer ticker.Stop() 893 894 update := make(chan struct{}, 1) 895 896 // Prepare the queue and fetch block parts until the block header fetcher's done 897 finished := false 898 for { 899 select { 900 case <-d.cancelCh: 901 return errCancel 902 903 case packet := <-deliveryCh: 904 // If the peer was previously banned and failed to deliver its pack 905 // in a reasonable time frame, ignore its message. 906 if peer := d.peers.Peer(packet.PeerId()); peer != nil { 907 // Deliver the received chunk of data and check chain validity 908 accepted, err := deliver(packet) 909 if err == errInvalidChain { 910 return err 911 } 912 // Unless a peer delivered something completely else than requested (usually 913 // caused by a timed out request which came through in the end), set it to 914 // idle. If the delivery's stale, the peer should have already been idled. 915 if err != errStaleDelivery { 916 setIdle(peer, accepted) 917 } 918 // Issue a log to the user to see what's going on 919 switch { 920 case err == nil && packet.Items() == 0: 921 log.WithField("type", kind).Warn("Requested data not delivered") 922 case err == nil: 923 log.WithField("type", kind).WithField("count", packet.Stats()).Trace("Delivered new batch of data") 924 default: 925 log.WithField("type", kind).WithError(err).Warn("Failed to deliver retrieved data") 926 } 927 } 928 // Blocks assembled, try to update the progress 929 select { 930 case update <- struct{}{}: 931 default: 932 } 933 934 case cont := <-wakeCh: 935 // The header fetcher sent a continuation flag, check if it's done 936 if !cont { 937 finished = true 938 } 939 // Headers arrive, try to update the progress 940 select { 941 case update <- struct{}{}: 942 default: 943 } 944 945 case <-ticker.C: 946 // Sanity check update the progress 947 select { 948 case update <- struct{}{}: 949 default: 950 } 951 952 case <-update: 953 // Short circuit if we lost all our peers 954 if d.peers.Len() == 0 { 955 return errNoPeers 956 } 957 // Check for fetch request timeouts and demote the responsible peers 958 for pid, fails := range expire() { 959 if peer := d.peers.Peer(pid); peer != nil { 960 // If a lot of retrieval elements expired, we might have overestimated the remote peer or perhaps 961 // ourselves. Only reset to minimal throughput but don't drop just yet. If even the minimal times 962 // out that sync wise we need to get rid of the peer. 963 // 964 // The reason the minimum threshold is 2 is because the downloader tries to estimate the bandwidth 965 // and latency of a peer separately, which requires pushing the measures capacity a bit and seeing 966 // how response times reacts, to it always requests one more than the minimum (i.e. min 2). 967 if fails > 2 { 968 log.WithField("type", kind).Debug("Data delivery timed out") 969 setIdle(peer, 0) 970 } else { 971 log.WithField("type", kind).Debug("Stalling delivery, dropping") 972 if d.dropPeer == nil { 973 // The dropPeer method is nil when `--copydb` is used for a local copy. 974 // Timeouts can occur if e.g. compaction hits at the wrong time, and can be ignored 975 log.WithField("peer", pid).Warn("Downloader wants to drop peer, but peerdrop-function is not set") 976 } else { 977 d.dropPeer(pid) 978 } 979 } 980 } 981 } 982 // If there's nothing more to fetch, wait or terminate 983 if pending() == 0 { 984 if !inFlight() && finished { 985 log.WithField("type", kind).Debug("data fetching completed") 986 return nil 987 } 988 break 989 } 990 // Send a download request to all idle peers, until throttled 991 progressed, throttled, running := false, false, inFlight() 992 idles, total := idle() 993 994 for _, peer := range idles { 995 // Short circuit if throttling activated 996 if throttle() { 997 throttled = true 998 break 999 } 1000 // Short circuit if there is no more available task. 1001 if pending() == 0 { 1002 break 1003 } 1004 _, id := peer.peer.Head() 1005 if id < d.dag.LatestSequencer().Number() { 1006 log.WithField("peer head ", id).WithField("peer", peer.id).Debug("peer head is behind") 1007 continue 1008 } 1009 // Reserve a chunk of fetches for a peer. A nil can mean either that 1010 // no more headers are available, or that the peer is known not to 1011 // have them. 1012 request, progress, err := reserve(peer, capacity(peer)) 1013 if err != nil { 1014 return err 1015 } 1016 if progress { 1017 progressed = true 1018 } 1019 if request == nil { 1020 continue 1021 } 1022 if request.From > 0 { 1023 log.WithField("type", kind).WithField("from", request.From).Trace( 1024 "Requesting new batch of data") 1025 } else { 1026 log.WithField("from", request.Headers[0].SequencerId()).WithField( 1027 "type", kind).WithField("count", len(request.Headers)).Trace( 1028 "Requesting new batch of data") 1029 } 1030 // Fetch the chunk and make sure any errors return the hashes to the queue 1031 if fetchHook != nil { 1032 fetchHook(request.Headers) 1033 } 1034 if err := fetch(peer, request); err != nil { 1035 // Although we could try and make an attempt to fix this, this error really 1036 // means that we've double allocated a fetch task to a peer. If that is the 1037 // case, the internal state of the downloader and the queue is very wrong so 1038 // better hard crash and note the error instead of silently accumulating into 1039 // a much bigger issue. 1040 panic(fmt.Sprintf("%v: %s fetch assignment failed", peer, kind)) 1041 } 1042 running = true 1043 } 1044 // Make sure that we have peers available for fetching. If all peers have been tried 1045 // and all failed throw an error 1046 if !progressed && !throttled && !running && len(idles) == total && pending() > 0 { 1047 log.WithField("progressed", progressed).WithField("throttled", throttled).WithField( 1048 "running", running).WithField("len idesls", len(idles)).WithField("total", total).WithField("pending", pending()).Debug("fetch error") 1049 return errPeersUnavailable 1050 } 1051 } 1052 } 1053 } 1054 1055 // processHeaders takes batches of retrieved headers from an input channel and 1056 // keeps processing and scheduling them into the header chain and downloader's 1057 // queue until the stream ends or a failure occurs. 1058 func (d *Downloader) processHeaders(origin uint64, pivot uint64, seqId uint64) error { 1059 // Keep a count of uncertain headers to roll back 1060 // Wait for batches of headers to process 1061 gotHeaders := false 1062 for { 1063 select { 1064 case <-d.cancelCh: 1065 return errCancelHeaderProcessing 1066 1067 case headers := <-d.headerProcCh: 1068 // Terminate header processing if we synced up 1069 if len(headers) == 0 { 1070 // Notify everyone that headers are fully processed 1071 for _, ch := range []chan bool{d.bodyWakeCh} { 1072 select { 1073 case ch <- false: 1074 case <-d.cancelCh: 1075 } 1076 } 1077 1078 // If no headers were retrieved at all, the peer violated its TD promise that it had a 1079 // better chain compared to ours. The only exception is if its promised blocks were 1080 // already imported by other means (e.g. fetcher): 1081 // 1082 // R <remote peer>, L <local node>: Both at block 10 1083 // R: Mine block 11, and propagate it to L 1084 // L: Queue block 11 for import 1085 // L: Notice that R's head and TD increased compared to ours, start sync 1086 // L: Import of block 11 finishes 1087 // L: Sync begins, and finds common ancestor at 11 1088 // L: Request new headers up from 11 (R's TD was higher, it must have something) 1089 // R: Nothing to give 1090 if d.mode != LightSync { 1091 head := d.dag.LatestSequencer() 1092 if !gotHeaders && head.Number() > seqId { 1093 return errStallingPeer 1094 } 1095 } 1096 // Disable any rollback and return 1097 return nil 1098 } 1099 // Otherwise split the chunk of headers into batches and process them 1100 gotHeaders = true 1101 1102 for len(headers) > 0 { 1103 // Terminate if something failed in between processing chunks 1104 select { 1105 case <-d.cancelCh: 1106 return errCancelHeaderProcessing 1107 default: 1108 } 1109 // Select the next chunk of headers to import 1110 limit := maxHeadersProcess 1111 if limit > len(headers) { 1112 limit = len(headers) 1113 } 1114 chunk := headers[:limit] 1115 1116 // Unless we're doing light chains, schedule the headers for associated content retrieval 1117 if d.mode == FullSync || d.mode == FastSync { 1118 // If we've reached the allowed number of pending headers, stall a bit 1119 for d.queue.PendingBlocks() >= maxQueuedHeaders { 1120 select { 1121 case <-d.cancelCh: 1122 return errCancelHeaderProcessing 1123 case <-time.After(time.Second): 1124 } 1125 } 1126 // Otherwise insert the headers for content retrieval 1127 inserts := d.queue.Schedule(chunk, origin) 1128 if len(inserts) != len(chunk) { 1129 log.Debug("Stale headers") 1130 return errBadPeer 1131 } 1132 } 1133 headers = headers[limit:] 1134 origin += uint64(limit) 1135 } 1136 1137 // Signal the content downloaders of the availablility of new tasks 1138 for _, ch := range []chan bool{d.bodyWakeCh} { 1139 select { 1140 case ch <- true: 1141 default: 1142 } 1143 } 1144 } 1145 } 1146 } 1147 1148 // processFullSyncContent takes fetch results from the queue and imports them into the chain. 1149 func (d *Downloader) processFullSyncContent() error { 1150 for { 1151 results := d.queue.Results(true) 1152 if len(results) == 0 { 1153 return nil 1154 } 1155 if d.chainInsertHook != nil { 1156 d.chainInsertHook(results) 1157 } 1158 if err := d.importBlockResults(results); err != nil { 1159 return err 1160 } 1161 } 1162 } 1163 1164 func (d *Downloader) importBlockResults(results []*fetchResult) error { 1165 // Check for any early termination requests 1166 if len(results) == 0 { 1167 return nil 1168 } 1169 select { 1170 case <-d.quitCh: 1171 log.Debug("got d.quitch") 1172 return errCancelContentProcessing 1173 default: 1174 } 1175 // Retrieve the a batch of results to import 1176 first, last := results[0].Header, results[len(results)-1].Header 1177 1178 log.WithField("items", len(results)).WithField("firstnum", first.SequencerId()).WithField( 1179 "firsthash", first.GetHash()).WithField("lastnum", last.SequencerId()).WithField("lasthash", last.GetHash()).Debug( 1180 "Inserting downloaded txs") 1181 1182 for _, result := range results { 1183 log.WithField("len txs", len(result.Transactions)).WithField("seq", result.Sequencer).Debug("Inserting downloaded txs") 1184 err := d.insertTxs(result.Sequencer, result.Transactions) 1185 if err != nil { 1186 return err 1187 } 1188 1189 } 1190 1191 return nil 1192 } 1193 1194 // processFastSyncContent takes fetch results from the queue and writes them to the 1195 // database. It also controls the synchronisation of state nodes of the pivot block. 1196 func (d *Downloader) processFastSyncContent(latest *dagmessage.SequencerHeader) error { 1197 1198 // Figure out the ideal pivot block. Note, that this goalpost may move if the 1199 // sync takes long enough for the chain head to move significantly. 1200 pivot := uint64(0) 1201 if height := latest.SequencerId(); height > uint64(fsMinFullBlocks) { 1202 pivot = height - uint64(fsMinFullBlocks) 1203 } 1204 // To cater for moving pivot points, track the pivot block and subsequently 1205 // accumulated download results separately. 1206 var ( 1207 oldPivot *fetchResult // Locked in pivot block, might change eventually 1208 oldTail []*fetchResult // Downloaded content after the pivot 1209 ) 1210 for { 1211 // Wait for the next batch of downloaded data to be available, and if the pivot 1212 // block became stale, move the goalpost 1213 results := d.queue.Results(oldPivot == nil) // Block if we're not monitoring pivot staleness 1214 if len(results) == 0 { 1215 // If pivot sync is done, stop 1216 if oldPivot == nil { 1217 return nil 1218 } 1219 // If sync failed, stop 1220 select { 1221 case <-d.cancelCh: 1222 return nil 1223 default: 1224 } 1225 } 1226 if d.chainInsertHook != nil { 1227 d.chainInsertHook(results) 1228 } 1229 if oldPivot != nil { 1230 results = append(append([]*fetchResult{oldPivot}, oldTail...), results...) 1231 } 1232 // Split around the pivot block and process the two sides via fast/full sync 1233 if atomic.LoadInt32(&d.committed) == 0 { 1234 latest = results[len(results)-1].Header 1235 if height := latest.SequencerId(); height > pivot+2*uint64(fsMinFullBlocks) { 1236 log.WithField("old", pivot).WithField("new", height-uint64(fsMinFullBlocks)).Warn("Pivot became stale, moving") 1237 pivot = height - uint64(fsMinFullBlocks) 1238 } 1239 } 1240 1241 // Fast sync done, pivot commit done, full import 1242 if err := d.importBlockResults(results); err != nil { 1243 return err 1244 } 1245 } 1246 } 1247 1248 // DeliverHeaders injects a new batch of block headers received from a remote 1249 // node into the download schedule. 1250 func (d *Downloader) DeliverHeaders(id string, headers []*dagmessage.SequencerHeader) (err error) { 1251 return d.deliver(id, d.headerCh, &headerPack{id, headers}, headerInMeter, headerDropMeter) 1252 } 1253 1254 // DeliverBodies injects a new batch of block bodies received from a remote node. 1255 func (d *Downloader) DeliverBodies(id string, transactions []types.Txis, sequencers []*types.Sequencer) (err error) { 1256 return d.deliver(id, d.bodyCh, &bodyPack{id, transactions, sequencers}, bodyInMeter, bodyDropMeter) 1257 } 1258 1259 // DeliverNodeData injects a new batch of node state data received from a remote node. 1260 func (d *Downloader) DeliverNodeData(id string, data [][]byte) (err error) { 1261 return d.deliver(id, d.stateCh, &statePack{id, data}, stateInMeter, stateDropMeter) 1262 } 1263 1264 // deliver injects a new batch of data received from a remote node. 1265 func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { 1266 // Update the delivery metrics for both good and failed deliveries 1267 inMeter.Mark(int64(packet.Items())) 1268 defer func() { 1269 if err != nil { 1270 dropMeter.Mark(int64(packet.Items())) 1271 } 1272 }() 1273 // Deliver or abort if the sync is canceled while queuing 1274 d.cancelLock.RLock() 1275 cancel := d.cancelCh 1276 d.cancelLock.RUnlock() 1277 if cancel == nil { 1278 return errNoSyncActive 1279 } 1280 select { 1281 case destCh <- packet: 1282 return nil 1283 case <-cancel: 1284 return errNoSyncActive 1285 } 1286 } 1287 1288 // qosTuner is the quality of service tuning loop that occasionally gathers the 1289 // peer latency statistics and updates the estimated request round trip time. 1290 func (d *Downloader) qosTuner() { 1291 for { 1292 // Retrieve the current median RTT and integrate into the previoust target RTT 1293 rtt := time.Duration((1-qosTuningImpact)*float64(atomic.LoadUint64(&d.rttEstimate)) + qosTuningImpact*float64(d.peers.medianRTT())) 1294 atomic.StoreUint64(&d.rttEstimate, uint64(rtt)) 1295 1296 // A new RTT cycle passed, increase our confidence in the estimated RTT 1297 conf := atomic.LoadUint64(&d.rttConfidence) 1298 conf = conf + (1000000-conf)/2 1299 atomic.StoreUint64(&d.rttConfidence, conf) 1300 1301 // Log the new QoS values and sleep until the next RTT 1302 log.WithFields(logrus.Fields{ 1303 "rtt": rtt, 1304 "confidence": float64(conf) / 1000000.0, 1305 "ttl": d.requestTTL(), 1306 }).Debug("Recalculated downloader QoS values") 1307 select { 1308 case <-d.quitCh: 1309 log.Debug("got d.quitch") 1310 return 1311 case <-time.After(rtt): 1312 } 1313 } 1314 } 1315 1316 // qosReduceConfidence is meant to be called when a new peer joins the downloader's 1317 // peer set, needing to reduce the confidence we have in out QoS estimates. 1318 func (d *Downloader) qosReduceConfidence() { 1319 // If we have a single peer, confidence is always 1 1320 peers := uint64(d.peers.Len()) 1321 if peers == 0 { 1322 // Ensure peer connectivity races don't catch us off guard 1323 return 1324 } 1325 if peers == 1 { 1326 atomic.StoreUint64(&d.rttConfidence, 1000000) 1327 return 1328 } 1329 // If we have a ton of peers, don't drop confidence) 1330 if peers >= uint64(qosConfidenceCap) { 1331 return 1332 } 1333 // Otherwise drop the confidence factor 1334 conf := atomic.LoadUint64(&d.rttConfidence) * (peers - 1) / peers 1335 if float64(conf)/1000000 < rttMinConfidence { 1336 conf = uint64(rttMinConfidence * 1000000) 1337 } 1338 atomic.StoreUint64(&d.rttConfidence, conf) 1339 1340 rtt := time.Duration(atomic.LoadUint64(&d.rttEstimate)) 1341 log.WithFields(logrus.Fields{ 1342 "rtt": rtt, 1343 "confidence": float64(conf) / 1000000.0, 1344 "ttl": d.requestTTL(), 1345 }).Debug("Relaxed downloader QoS values") 1346 } 1347 1348 // requestRTT returns the current target round trip time for a download request 1349 // to complete in. 1350 // 1351 // Note, the returned RTT is .9 of the actually estimated RTT. The reason is that 1352 // the downloader tries to adapt queries to the RTT, so multiple RTT values can 1353 // be adapted to, but smaller ones are preferred (stabler download stream). 1354 func (d *Downloader) requestRTT() time.Duration { 1355 return time.Duration(atomic.LoadUint64(&d.rttEstimate)) * 9 / 10 1356 } 1357 1358 // requestTTL returns the current timeout allowance for a single download request 1359 // to finish under. 1360 func (d *Downloader) requestTTL() time.Duration { 1361 var ( 1362 rtt = time.Duration(atomic.LoadUint64(&d.rttEstimate)) 1363 conf = float64(atomic.LoadUint64(&d.rttConfidence)) / 1000000.0 1364 ) 1365 ttl := time.Duration(ttlScaling) * time.Duration(float64(rtt)/conf) 1366 if ttl > ttlLimit { 1367 ttl = ttlLimit 1368 } 1369 return ttl 1370 }