github.com/jeffallen/go-ethereum@v1.1.4-0.20150910155051-571d3236c49c/eth/downloader/downloader.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package downloader contains the manual full chain synchronisation. 18 package downloader 19 20 import ( 21 "bytes" 22 "errors" 23 "math" 24 "math/big" 25 "math/rand" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 "github.com/ethereum/go-ethereum/common" 31 "github.com/ethereum/go-ethereum/core" 32 "github.com/ethereum/go-ethereum/core/types" 33 "github.com/ethereum/go-ethereum/event" 34 "github.com/ethereum/go-ethereum/logger" 35 "github.com/ethereum/go-ethereum/logger/glog" 36 "gopkg.in/fatih/set.v0" 37 ) 38 39 const ( 40 eth60 = 60 // Constant to check for old protocol support 41 eth61 = 61 // Constant to check for new protocol support 42 ) 43 44 var ( 45 MinHashFetch = 512 // Minimum amount of hashes to not consider a peer stalling 46 MaxHashFetch = 512 // Amount of hashes to be fetched per retrieval request 47 MaxBlockFetch = 128 // Amount of blocks to be fetched per retrieval request 48 49 hashTTL = 5 * time.Second // Time it takes for a hash request to time out 50 blockSoftTTL = 3 * time.Second // Request completion threshold for increasing or decreasing a peer's bandwidth 51 blockHardTTL = 3 * blockSoftTTL // Maximum time allowance before a block request is considered expired 52 crossCheckCycle = time.Second // Period after which to check for expired cross checks 53 54 maxQueuedHashes = 256 * 1024 // Maximum number of hashes to queue for import (DOS protection) 55 maxBannedHashes = 4096 // Number of bannable hashes before phasing old ones out 56 maxBlockProcess = 256 // Number of blocks to import at once into the chain 57 ) 58 59 var ( 60 errBusy = errors.New("busy") 61 errUnknownPeer = errors.New("peer is unknown or unhealthy") 62 errBadPeer = errors.New("action from bad peer ignored") 63 errStallingPeer = errors.New("peer is stalling") 64 errBannedHead = errors.New("peer head hash already banned") 65 errNoPeers = errors.New("no peers to keep download active") 66 errPendingQueue = errors.New("pending items in queue") 67 errTimeout = errors.New("timeout") 68 errEmptyHashSet = errors.New("empty hash set by peer") 69 errPeersUnavailable = errors.New("no peers available or all peers tried for block download process") 70 errAlreadyInPool = errors.New("hash already in pool") 71 errInvalidChain = errors.New("retrieved hash chain is invalid") 72 errCrossCheckFailed = errors.New("block cross-check failed") 73 errCancelHashFetch = errors.New("hash fetching canceled (requested)") 74 errCancelBlockFetch = errors.New("block downloading canceled (requested)") 75 errNoSyncActive = errors.New("no sync active") 76 ) 77 78 // hashCheckFn is a callback type for verifying a hash's presence in the local chain. 79 type hashCheckFn func(common.Hash) bool 80 81 // blockRetrievalFn is a callback type for retrieving a block from the local chain. 82 type blockRetrievalFn func(common.Hash) *types.Block 83 84 // headRetrievalFn is a callback type for retrieving the head block from the local chain. 85 type headRetrievalFn func() *types.Block 86 87 // chainInsertFn is a callback type to insert a batch of blocks into the local chain. 88 type chainInsertFn func(types.Blocks) (int, error) 89 90 // peerDropFn is a callback type for dropping a peer detected as malicious. 91 type peerDropFn func(id string) 92 93 type blockPack struct { 94 peerId string 95 blocks []*types.Block 96 } 97 98 type hashPack struct { 99 peerId string 100 hashes []common.Hash 101 } 102 103 type crossCheck struct { 104 expire time.Time 105 parent common.Hash 106 } 107 108 type Downloader struct { 109 mux *event.TypeMux 110 111 queue *queue // Scheduler for selecting the hashes to download 112 peers *peerSet // Set of active peers from which download can proceed 113 checks map[common.Hash]*crossCheck // Pending cross checks to verify a hash chain 114 banned *set.Set // Set of hashes we've received and banned 115 116 interrupt int32 // Atomic boolean to signal termination 117 118 // Statistics 119 importStart time.Time // Instance when the last blocks were taken from the cache 120 importQueue []*Block // Previously taken blocks to check import progress 121 importDone int // Number of taken blocks already imported from the last batch 122 importLock sync.Mutex 123 124 // Callbacks 125 hasBlock hashCheckFn // Checks if a block is present in the chain 126 getBlock blockRetrievalFn // Retrieves a block from the chain 127 headBlock headRetrievalFn // Retrieves the head block from the chain 128 insertChain chainInsertFn // Injects a batch of blocks into the chain 129 dropPeer peerDropFn // Drops a peer for misbehaving 130 131 // Status 132 synchroniseMock func(id string, hash common.Hash) error // Replacement for synchronise during testing 133 synchronising int32 134 processing int32 135 notified int32 136 137 // Channels 138 newPeerCh chan *peer 139 hashCh chan hashPack // Channel receiving inbound hashes 140 blockCh chan blockPack // Channel receiving inbound blocks 141 processCh chan bool // Channel to signal the block fetcher of new or finished work 142 143 cancelCh chan struct{} // Channel to cancel mid-flight syncs 144 cancelLock sync.RWMutex // Lock to protect the cancel channel in delivers 145 } 146 147 // Block is an origin-tagged blockchain block. 148 type Block struct { 149 RawBlock *types.Block 150 OriginPeer string 151 } 152 153 // New creates a new downloader to fetch hashes and blocks from remote peers. 154 func New(mux *event.TypeMux, hasBlock hashCheckFn, getBlock blockRetrievalFn, headBlock headRetrievalFn, insertChain chainInsertFn, dropPeer peerDropFn) *Downloader { 155 // Create the base downloader 156 downloader := &Downloader{ 157 mux: mux, 158 queue: newQueue(), 159 peers: newPeerSet(), 160 hasBlock: hasBlock, 161 getBlock: getBlock, 162 headBlock: headBlock, 163 insertChain: insertChain, 164 dropPeer: dropPeer, 165 newPeerCh: make(chan *peer, 1), 166 hashCh: make(chan hashPack, 1), 167 blockCh: make(chan blockPack, 1), 168 processCh: make(chan bool, 1), 169 } 170 // Inject all the known bad hashes 171 downloader.banned = set.New() 172 for hash, _ := range core.BadHashes { 173 downloader.banned.Add(hash) 174 } 175 return downloader 176 } 177 178 // Stats retrieves the current status of the downloader. 179 func (d *Downloader) Stats() (pending int, cached int, importing int, estimate time.Duration) { 180 // Fetch the download status 181 pending, cached = d.queue.Size() 182 183 // Figure out the import progress 184 d.importLock.Lock() 185 defer d.importLock.Unlock() 186 187 for len(d.importQueue) > 0 && d.hasBlock(d.importQueue[0].RawBlock.Hash()) { 188 d.importQueue = d.importQueue[1:] 189 d.importDone++ 190 } 191 importing = len(d.importQueue) 192 193 // Make an estimate on the total sync 194 estimate = 0 195 if d.importDone > 0 { 196 estimate = time.Since(d.importStart) / time.Duration(d.importDone) * time.Duration(pending+cached+importing) 197 } 198 return 199 } 200 201 // Synchronising returns whether the downloader is currently retrieving blocks. 202 func (d *Downloader) Synchronising() bool { 203 return atomic.LoadInt32(&d.synchronising) > 0 204 } 205 206 // RegisterPeer injects a new download peer into the set of block source to be 207 // used for fetching hashes and blocks from. 208 func (d *Downloader) RegisterPeer(id string, version int, head common.Hash, getRelHashes relativeHashFetcherFn, getAbsHashes absoluteHashFetcherFn, getBlocks blockFetcherFn) error { 209 // If the peer wants to send a banned hash, reject 210 if d.banned.Has(head) { 211 glog.V(logger.Debug).Infoln("Register rejected, head hash banned:", id) 212 return errBannedHead 213 } 214 // Otherwise try to construct and register the peer 215 glog.V(logger.Detail).Infoln("Registering peer", id) 216 if err := d.peers.Register(newPeer(id, version, head, getRelHashes, getAbsHashes, getBlocks)); err != nil { 217 glog.V(logger.Error).Infoln("Register failed:", err) 218 return err 219 } 220 return nil 221 } 222 223 // UnregisterPeer remove a peer from the known list, preventing any action from 224 // the specified peer. 225 func (d *Downloader) UnregisterPeer(id string) error { 226 glog.V(logger.Detail).Infoln("Unregistering peer", id) 227 if err := d.peers.Unregister(id); err != nil { 228 glog.V(logger.Error).Infoln("Unregister failed:", err) 229 return err 230 } 231 return nil 232 } 233 234 // Synchronise tries to sync up our local block chain with a remote peer, both 235 // adding various sanity checks as well as wrapping it with various log entries. 236 func (d *Downloader) Synchronise(id string, head common.Hash, td *big.Int) { 237 glog.V(logger.Detail).Infof("Attempting synchronisation: %v, head 0x%x, TD %v", id, head[:4], td) 238 239 switch err := d.synchronise(id, head, td); err { 240 case nil: 241 glog.V(logger.Detail).Infof("Synchronisation completed") 242 243 case errBusy: 244 glog.V(logger.Detail).Infof("Synchronisation already in progress") 245 246 case errTimeout, errBadPeer, errStallingPeer, errBannedHead, errEmptyHashSet, errPeersUnavailable, errInvalidChain, errCrossCheckFailed: 247 glog.V(logger.Debug).Infof("Removing peer %v: %v", id, err) 248 d.dropPeer(id) 249 250 case errPendingQueue: 251 glog.V(logger.Debug).Infoln("Synchronisation aborted:", err) 252 253 default: 254 glog.V(logger.Warn).Infof("Synchronisation failed: %v", err) 255 } 256 } 257 258 // synchronise will select the peer and use it for synchronising. If an empty string is given 259 // it will use the best peer possible and synchronize if it's TD is higher than our own. If any of the 260 // checks fail an error will be returned. This method is synchronous 261 func (d *Downloader) synchronise(id string, hash common.Hash, td *big.Int) error { 262 // Mock out the synchonisation if testing 263 if d.synchroniseMock != nil { 264 return d.synchroniseMock(id, hash) 265 } 266 // Make sure only one goroutine is ever allowed past this point at once 267 if !atomic.CompareAndSwapInt32(&d.synchronising, 0, 1) { 268 return errBusy 269 } 270 defer atomic.StoreInt32(&d.synchronising, 0) 271 272 // If the head hash is banned, terminate immediately 273 if d.banned.Has(hash) { 274 return errBannedHead 275 } 276 // Post a user notification of the sync (only once per session) 277 if atomic.CompareAndSwapInt32(&d.notified, 0, 1) { 278 glog.V(logger.Info).Infoln("Block synchronisation started") 279 } 280 // Abort if the queue still contains some leftover data 281 if _, cached := d.queue.Size(); cached > 0 && d.queue.GetHeadBlock() != nil { 282 return errPendingQueue 283 } 284 // Reset the queue and peer set to clean any internal leftover state 285 d.queue.Reset() 286 d.peers.Reset() 287 d.checks = make(map[common.Hash]*crossCheck) 288 289 // Create cancel channel for aborting mid-flight 290 d.cancelLock.Lock() 291 d.cancelCh = make(chan struct{}) 292 d.cancelLock.Unlock() 293 294 // Retrieve the origin peer and initiate the downloading process 295 p := d.peers.Peer(id) 296 if p == nil { 297 return errUnknownPeer 298 } 299 return d.syncWithPeer(p, hash, td) 300 } 301 302 // Has checks if the downloader knows about a particular hash, meaning that its 303 // either already downloaded of pending retrieval. 304 func (d *Downloader) Has(hash common.Hash) bool { 305 return d.queue.Has(hash) 306 } 307 308 // syncWithPeer starts a block synchronization based on the hash chain from the 309 // specified peer and head hash. 310 func (d *Downloader) syncWithPeer(p *peer, hash common.Hash, td *big.Int) (err error) { 311 d.mux.Post(StartEvent{}) 312 defer func() { 313 // reset on error 314 if err != nil { 315 d.cancel() 316 d.mux.Post(FailedEvent{err}) 317 } else { 318 d.mux.Post(DoneEvent{}) 319 } 320 }() 321 322 glog.V(logger.Debug).Infof("Synchronizing with the network using: %s, eth/%d", p.id, p.version) 323 switch p.version { 324 case eth60: 325 // Old eth/60 version, use reverse hash retrieval algorithm 326 if err = d.fetchHashes60(p, hash); err != nil { 327 return err 328 } 329 if err = d.fetchBlocks60(); err != nil { 330 return err 331 } 332 case eth61: 333 // New eth/61, use forward, concurrent hash and block retrieval algorithm 334 number, err := d.findAncestor(p) 335 if err != nil { 336 return err 337 } 338 errc := make(chan error, 2) 339 go func() { errc <- d.fetchHashes(p, td, number+1) }() 340 go func() { errc <- d.fetchBlocks(number + 1) }() 341 342 // If any fetcher fails, cancel the other 343 if err := <-errc; err != nil { 344 d.cancel() 345 <-errc 346 return err 347 } 348 return <-errc 349 350 default: 351 // Something very wrong, stop right here 352 glog.V(logger.Error).Infof("Unsupported eth protocol: %d", p.version) 353 return errBadPeer 354 } 355 glog.V(logger.Debug).Infoln("Synchronization completed") 356 357 return nil 358 } 359 360 // cancel cancels all of the operations and resets the queue. It returns true 361 // if the cancel operation was completed. 362 func (d *Downloader) cancel() { 363 // Close the current cancel channel 364 d.cancelLock.Lock() 365 if d.cancelCh != nil { 366 select { 367 case <-d.cancelCh: 368 // Channel was already closed 369 default: 370 close(d.cancelCh) 371 } 372 } 373 d.cancelLock.Unlock() 374 375 // Reset the queue 376 d.queue.Reset() 377 } 378 379 // Terminate interrupts the downloader, canceling all pending operations. 380 func (d *Downloader) Terminate() { 381 atomic.StoreInt32(&d.interrupt, 1) 382 d.cancel() 383 } 384 385 // fetchHashes60 starts retrieving hashes backwards from a specific peer and hash, 386 // up until it finds a common ancestor. If the source peer times out, alternative 387 // ones are tried for continuation. 388 func (d *Downloader) fetchHashes60(p *peer, h common.Hash) error { 389 var ( 390 start = time.Now() 391 active = p // active peer will help determine the current active peer 392 head = common.Hash{} // common and last hash 393 394 timeout = time.NewTimer(0) // timer to dump a non-responsive active peer 395 attempted = make(map[string]bool) // attempted peers will help with retries 396 crossTicker = time.NewTicker(crossCheckCycle) // ticker to periodically check expired cross checks 397 ) 398 defer crossTicker.Stop() 399 defer timeout.Stop() 400 401 glog.V(logger.Debug).Infof("Downloading hashes (%x) from %s", h[:4], p.id) 402 <-timeout.C // timeout channel should be initially empty. 403 404 getHashes := func(from common.Hash) { 405 go active.getRelHashes(from) 406 timeout.Reset(hashTTL) 407 } 408 409 // Add the hash to the queue, and start hash retrieval. 410 d.queue.Insert([]common.Hash{h}, false) 411 getHashes(h) 412 413 attempted[p.id] = true 414 for finished := false; !finished; { 415 select { 416 case <-d.cancelCh: 417 return errCancelHashFetch 418 419 case hashPack := <-d.hashCh: 420 // Make sure the active peer is giving us the hashes 421 if hashPack.peerId != active.id { 422 glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)", hashPack.peerId) 423 break 424 } 425 timeout.Stop() 426 427 // Make sure the peer actually gave something valid 428 if len(hashPack.hashes) == 0 { 429 glog.V(logger.Debug).Infof("Peer (%s) responded with empty hash set", active.id) 430 return errEmptyHashSet 431 } 432 for index, hash := range hashPack.hashes { 433 if d.banned.Has(hash) { 434 glog.V(logger.Debug).Infof("Peer (%s) sent a known invalid chain", active.id) 435 436 d.queue.Insert(hashPack.hashes[:index+1], false) 437 if err := d.banBlocks(active.id, hash); err != nil { 438 glog.V(logger.Debug).Infof("Failed to ban batch of blocks: %v", err) 439 } 440 return errInvalidChain 441 } 442 } 443 // Determine if we're done fetching hashes (queue up all pending), and continue if not done 444 done, index := false, 0 445 for index, head = range hashPack.hashes { 446 if d.hasBlock(head) || d.queue.GetBlock(head) != nil { 447 glog.V(logger.Debug).Infof("Found common hash %x", head[:4]) 448 hashPack.hashes = hashPack.hashes[:index] 449 done = true 450 break 451 } 452 } 453 // Insert all the new hashes, but only continue if got something useful 454 inserts := d.queue.Insert(hashPack.hashes, false) 455 if len(inserts) == 0 && !done { 456 glog.V(logger.Debug).Infof("Peer (%s) responded with stale hashes", active.id) 457 return errBadPeer 458 } 459 if !done { 460 // Check that the peer is not stalling the sync 461 if len(inserts) < MinHashFetch { 462 return errStallingPeer 463 } 464 // Try and fetch a random block to verify the hash batch 465 // Skip the last hash as the cross check races with the next hash fetch 466 cross := rand.Intn(len(inserts) - 1) 467 origin, parent := inserts[cross], inserts[cross+1] 468 glog.V(logger.Detail).Infof("Cross checking (%s) with %x/%x", active.id, origin, parent) 469 470 d.checks[origin] = &crossCheck{ 471 expire: time.Now().Add(blockSoftTTL), 472 parent: parent, 473 } 474 go active.getBlocks([]common.Hash{origin}) 475 476 // Also fetch a fresh batch of hashes 477 getHashes(head) 478 continue 479 } 480 // We're done, prepare the download cache and proceed pulling the blocks 481 offset := uint64(0) 482 if block := d.getBlock(head); block != nil { 483 offset = block.NumberU64() + 1 484 } 485 d.queue.Prepare(offset) 486 finished = true 487 488 case blockPack := <-d.blockCh: 489 // Cross check the block with the random verifications 490 if blockPack.peerId != active.id || len(blockPack.blocks) != 1 { 491 continue 492 } 493 block := blockPack.blocks[0] 494 if check, ok := d.checks[block.Hash()]; ok { 495 if block.ParentHash() != check.parent { 496 return errCrossCheckFailed 497 } 498 delete(d.checks, block.Hash()) 499 } 500 501 case <-crossTicker.C: 502 // Iterate over all the cross checks and fail the hash chain if they're not verified 503 for hash, check := range d.checks { 504 if time.Now().After(check.expire) { 505 glog.V(logger.Debug).Infof("Cross check timeout for %x", hash) 506 return errCrossCheckFailed 507 } 508 } 509 510 case <-timeout.C: 511 glog.V(logger.Debug).Infof("Peer (%s) didn't respond in time for hash request", p.id) 512 513 var p *peer // p will be set if a peer can be found 514 // Attempt to find a new peer by checking inclusion of peers best hash in our 515 // already fetched hash list. This can't guarantee 100% correctness but does 516 // a fair job. This is always either correct or false incorrect. 517 for _, peer := range d.peers.AllPeers() { 518 if d.queue.Has(peer.head) && !attempted[peer.id] { 519 p = peer 520 break 521 } 522 } 523 // if all peers have been tried, abort the process entirely or if the hash is 524 // the zero hash. 525 if p == nil || (head == common.Hash{}) { 526 return errTimeout 527 } 528 // set p to the active peer. this will invalidate any hashes that may be returned 529 // by our previous (delayed) peer. 530 active = p 531 getHashes(head) 532 glog.V(logger.Debug).Infof("Hash fetching switched to new peer(%s)", p.id) 533 } 534 } 535 glog.V(logger.Debug).Infof("Downloaded hashes (%d) in %v", d.queue.Pending(), time.Since(start)) 536 537 return nil 538 } 539 540 // fetchBlocks60 iteratively downloads the entire schedules block-chain, taking 541 // any available peers, reserving a chunk of blocks for each, wait for delivery 542 // and periodically checking for timeouts. 543 func (d *Downloader) fetchBlocks60() error { 544 glog.V(logger.Debug).Infoln("Downloading", d.queue.Pending(), "block(s)") 545 start := time.Now() 546 547 // Start a ticker to continue throttled downloads and check for bad peers 548 ticker := time.NewTicker(20 * time.Millisecond) 549 defer ticker.Stop() 550 551 out: 552 for { 553 select { 554 case <-d.cancelCh: 555 return errCancelBlockFetch 556 557 case <-d.hashCh: 558 // Out of bounds hashes received, ignore them 559 560 case blockPack := <-d.blockCh: 561 // Short circuit if it's a stale cross check 562 if len(blockPack.blocks) == 1 { 563 block := blockPack.blocks[0] 564 if _, ok := d.checks[block.Hash()]; ok { 565 delete(d.checks, block.Hash()) 566 break 567 } 568 } 569 // If the peer was previously banned and failed to deliver it's pack 570 // in a reasonable time frame, ignore it's message. 571 if peer := d.peers.Peer(blockPack.peerId); peer != nil { 572 // Deliver the received chunk of blocks, and demote in case of errors 573 err := d.queue.Deliver(blockPack.peerId, blockPack.blocks) 574 switch err { 575 case nil: 576 // If no blocks were delivered, demote the peer (need the delivery above) 577 if len(blockPack.blocks) == 0 { 578 peer.Demote() 579 peer.SetIdle() 580 glog.V(logger.Detail).Infof("%s: no blocks delivered", peer) 581 break 582 } 583 // All was successful, promote the peer and potentially start processing 584 peer.Promote() 585 peer.SetIdle() 586 glog.V(logger.Detail).Infof("%s: delivered %d blocks", peer, len(blockPack.blocks)) 587 go d.process() 588 589 case errInvalidChain: 590 // The hash chain is invalid (blocks are not ordered properly), abort 591 return err 592 593 case errNoFetchesPending: 594 // Peer probably timed out with its delivery but came through 595 // in the end, demote, but allow to to pull from this peer. 596 peer.Demote() 597 peer.SetIdle() 598 glog.V(logger.Detail).Infof("%s: out of bound delivery", peer) 599 600 case errStaleDelivery: 601 // Delivered something completely else than requested, usually 602 // caused by a timeout and delivery during a new sync cycle. 603 // Don't set it to idle as the original request should still be 604 // in flight. 605 peer.Demote() 606 glog.V(logger.Detail).Infof("%s: stale delivery", peer) 607 608 default: 609 // Peer did something semi-useful, demote but keep it around 610 peer.Demote() 611 peer.SetIdle() 612 glog.V(logger.Detail).Infof("%s: delivery partially failed: %v", peer, err) 613 go d.process() 614 } 615 } 616 617 case <-ticker.C: 618 // Short circuit if we lost all our peers 619 if d.peers.Len() == 0 { 620 return errNoPeers 621 } 622 // Check for block request timeouts and demote the responsible peers 623 badPeers := d.queue.Expire(blockHardTTL) 624 for _, pid := range badPeers { 625 if peer := d.peers.Peer(pid); peer != nil { 626 peer.Demote() 627 glog.V(logger.Detail).Infof("%s: block delivery timeout", peer) 628 } 629 } 630 // If there are unrequested hashes left start fetching from the available peers 631 if d.queue.Pending() > 0 { 632 // Throttle the download if block cache is full and waiting processing 633 if d.queue.Throttle() { 634 break 635 } 636 // Send a download request to all idle peers, until throttled 637 idlePeers := d.peers.IdlePeers() 638 for _, peer := range idlePeers { 639 // Short circuit if throttling activated since above 640 if d.queue.Throttle() { 641 break 642 } 643 // Get a possible chunk. If nil is returned no chunk 644 // could be returned due to no hashes available. 645 request := d.queue.Reserve(peer, peer.Capacity()) 646 if request == nil { 647 continue 648 } 649 if glog.V(logger.Detail) { 650 glog.Infof("%s: requesting %d blocks", peer, len(request.Hashes)) 651 } 652 // Fetch the chunk and check for error. If the peer was somehow 653 // already fetching a chunk due to a bug, it will be returned to 654 // the queue 655 if err := peer.Fetch(request); err != nil { 656 glog.V(logger.Error).Infof("Peer %s received double work", peer.id) 657 d.queue.Cancel(request) 658 } 659 } 660 // Make sure that we have peers available for fetching. If all peers have been tried 661 // and all failed throw an error 662 if d.queue.InFlight() == 0 { 663 return errPeersUnavailable 664 } 665 666 } else if d.queue.InFlight() == 0 { 667 // When there are no more queue and no more in flight, We can 668 // safely assume we're done. Another part of the process will check 669 // for parent errors and will re-request anything that's missing 670 break out 671 } 672 } 673 } 674 glog.V(logger.Detail).Infoln("Downloaded block(s) in", time.Since(start)) 675 return nil 676 } 677 678 // findAncestor tries to locate the common ancestor block of the local chain and 679 // a remote peers blockchain. In the general case when our node was in sync and 680 // on the correct chain, checking the top N blocks should already get us a match. 681 // In the rare scenario when we ended up on a long soft fork (i.e. none of the 682 // head blocks match), we do a binary search to find the common ancestor. 683 func (d *Downloader) findAncestor(p *peer) (uint64, error) { 684 glog.V(logger.Debug).Infof("%v: looking for common ancestor", p) 685 686 // Request out head blocks to short circuit ancestor location 687 head := d.headBlock().NumberU64() 688 from := int64(head) - int64(MaxHashFetch) 689 if from < 0 { 690 from = 0 691 } 692 go p.getAbsHashes(uint64(from), MaxHashFetch) 693 694 // Wait for the remote response to the head fetch 695 number, hash := uint64(0), common.Hash{} 696 timeout := time.After(hashTTL) 697 698 for finished := false; !finished; { 699 select { 700 case <-d.cancelCh: 701 return 0, errCancelHashFetch 702 703 case hashPack := <-d.hashCh: 704 // Discard anything not from the origin peer 705 if hashPack.peerId != p.id { 706 glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)", hashPack.peerId) 707 break 708 } 709 // Make sure the peer actually gave something valid 710 hashes := hashPack.hashes 711 if len(hashes) == 0 { 712 glog.V(logger.Debug).Infof("%v: empty head hash set", p) 713 return 0, errEmptyHashSet 714 } 715 // Check if a common ancestor was found 716 finished = true 717 for i := len(hashes) - 1; i >= 0; i-- { 718 if d.hasBlock(hashes[i]) { 719 number, hash = uint64(from)+uint64(i), hashes[i] 720 break 721 } 722 } 723 724 case <-d.blockCh: 725 // Out of bounds blocks received, ignore them 726 727 case <-timeout: 728 glog.V(logger.Debug).Infof("%v: head hash timeout", p) 729 return 0, errTimeout 730 } 731 } 732 // If the head fetch already found an ancestor, return 733 if !common.EmptyHash(hash) { 734 glog.V(logger.Debug).Infof("%v: common ancestor: #%d [%x]", p, number, hash[:4]) 735 return number, nil 736 } 737 // Ancestor not found, we need to binary search over our chain 738 start, end := uint64(0), head 739 for start+1 < end { 740 // Split our chain interval in two, and request the hash to cross check 741 check := (start + end) / 2 742 743 timeout := time.After(hashTTL) 744 go p.getAbsHashes(uint64(check), 1) 745 746 // Wait until a reply arrives to this request 747 for arrived := false; !arrived; { 748 select { 749 case <-d.cancelCh: 750 return 0, errCancelHashFetch 751 752 case hashPack := <-d.hashCh: 753 // Discard anything not from the origin peer 754 if hashPack.peerId != p.id { 755 glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)", hashPack.peerId) 756 break 757 } 758 // Make sure the peer actually gave something valid 759 hashes := hashPack.hashes 760 if len(hashes) != 1 { 761 glog.V(logger.Debug).Infof("%v: invalid search hash set (%d)", p, len(hashes)) 762 return 0, errBadPeer 763 } 764 arrived = true 765 766 // Modify the search interval based on the response 767 block := d.getBlock(hashes[0]) 768 if block == nil { 769 end = check 770 break 771 } 772 if block.NumberU64() != check { 773 glog.V(logger.Debug).Infof("%v: non requested hash #%d [%x], instead of #%d", p, block.NumberU64(), block.Hash().Bytes()[:4], check) 774 return 0, errBadPeer 775 } 776 start = check 777 778 case <-d.blockCh: 779 // Out of bounds blocks received, ignore them 780 781 case <-timeout: 782 glog.V(logger.Debug).Infof("%v: search hash timeout", p) 783 return 0, errTimeout 784 } 785 } 786 } 787 return start, nil 788 } 789 790 // fetchHashes keeps retrieving hashes from the requested number, until no more 791 // are returned, potentially throttling on the way. 792 func (d *Downloader) fetchHashes(p *peer, td *big.Int, from uint64) error { 793 glog.V(logger.Debug).Infof("%v: downloading hashes from #%d", p, from) 794 795 // Create a timeout timer, and the associated hash fetcher 796 timeout := time.NewTimer(0) // timer to dump a non-responsive active peer 797 <-timeout.C // timeout channel should be initially empty 798 defer timeout.Stop() 799 800 getHashes := func(from uint64) { 801 glog.V(logger.Detail).Infof("%v: fetching %d hashes from #%d", p, MaxHashFetch, from) 802 803 go p.getAbsHashes(from, MaxHashFetch) 804 timeout.Reset(hashTTL) 805 } 806 // Start pulling hashes, until all are exhausted 807 getHashes(from) 808 gotHashes := false 809 810 for { 811 select { 812 case <-d.cancelCh: 813 return errCancelHashFetch 814 815 case hashPack := <-d.hashCh: 816 // Make sure the active peer is giving us the hashes 817 if hashPack.peerId != p.id { 818 glog.V(logger.Debug).Infof("Received hashes from incorrect peer(%s)", hashPack.peerId) 819 break 820 } 821 timeout.Stop() 822 823 // If no more hashes are inbound, notify the block fetcher and return 824 if len(hashPack.hashes) == 0 { 825 glog.V(logger.Debug).Infof("%v: no available hashes", p) 826 827 select { 828 case d.processCh <- false: 829 case <-d.cancelCh: 830 } 831 // If no hashes were retrieved at all, the peer violated it's TD promise that it had a 832 // better chain compared to ours. The only exception is if it's promised blocks were 833 // already imported by other means (e.g. fecher): 834 // 835 // R <remote peer>, L <local node>: Both at block 10 836 // R: Mine block 11, and propagate it to L 837 // L: Queue block 11 for import 838 // L: Notice that R's head and TD increased compared to ours, start sync 839 // L: Import of block 11 finishes 840 // L: Sync begins, and finds common ancestor at 11 841 // L: Request new hashes up from 11 (R's TD was higher, it must have something) 842 // R: Nothing to give 843 if !gotHashes && td.Cmp(d.headBlock().Td) > 0 { 844 return errStallingPeer 845 } 846 return nil 847 } 848 gotHashes = true 849 850 // Otherwise insert all the new hashes, aborting in case of junk 851 glog.V(logger.Detail).Infof("%v: inserting %d hashes from #%d", p, len(hashPack.hashes), from) 852 853 inserts := d.queue.Insert(hashPack.hashes, true) 854 if len(inserts) != len(hashPack.hashes) { 855 glog.V(logger.Debug).Infof("%v: stale hashes", p) 856 return errBadPeer 857 } 858 // Notify the block fetcher of new hashes, but stop if queue is full 859 cont := d.queue.Pending() < maxQueuedHashes 860 select { 861 case d.processCh <- cont: 862 default: 863 } 864 if !cont { 865 return nil 866 } 867 // Queue not yet full, fetch the next batch 868 from += uint64(len(hashPack.hashes)) 869 getHashes(from) 870 871 case <-timeout.C: 872 glog.V(logger.Debug).Infof("%v: hash request timed out", p) 873 return errTimeout 874 } 875 } 876 } 877 878 // fetchBlocks iteratively downloads the scheduled hashes, taking any available 879 // peers, reserving a chunk of blocks for each, waiting for delivery and also 880 // periodically checking for timeouts. 881 func (d *Downloader) fetchBlocks(from uint64) error { 882 glog.V(logger.Debug).Infof("Downloading blocks from #%d", from) 883 defer glog.V(logger.Debug).Infof("Block download terminated") 884 885 // Create a timeout timer for scheduling expiration tasks 886 ticker := time.NewTicker(100 * time.Millisecond) 887 defer ticker.Stop() 888 889 update := make(chan struct{}, 1) 890 891 // Prepare the queue and fetch blocks until the hash fetcher's done 892 d.queue.Prepare(from) 893 finished := false 894 895 for { 896 select { 897 case <-d.cancelCh: 898 return errCancelBlockFetch 899 900 case blockPack := <-d.blockCh: 901 // If the peer was previously banned and failed to deliver it's pack 902 // in a reasonable time frame, ignore it's message. 903 if peer := d.peers.Peer(blockPack.peerId); peer != nil { 904 // Deliver the received chunk of blocks, and demote in case of errors 905 err := d.queue.Deliver(blockPack.peerId, blockPack.blocks) 906 switch err { 907 case nil: 908 // If no blocks were delivered, demote the peer (need the delivery above) 909 if len(blockPack.blocks) == 0 { 910 peer.Demote() 911 peer.SetIdle() 912 glog.V(logger.Detail).Infof("%s: no blocks delivered", peer) 913 break 914 } 915 // All was successful, promote the peer and potentially start processing 916 peer.Promote() 917 peer.SetIdle() 918 glog.V(logger.Detail).Infof("%s: delivered %d blocks", peer, len(blockPack.blocks)) 919 go d.process() 920 921 case errInvalidChain: 922 // The hash chain is invalid (blocks are not ordered properly), abort 923 return err 924 925 case errNoFetchesPending: 926 // Peer probably timed out with its delivery but came through 927 // in the end, demote, but allow to to pull from this peer. 928 peer.Demote() 929 peer.SetIdle() 930 glog.V(logger.Detail).Infof("%s: out of bound delivery", peer) 931 932 case errStaleDelivery: 933 // Delivered something completely else than requested, usually 934 // caused by a timeout and delivery during a new sync cycle. 935 // Don't set it to idle as the original request should still be 936 // in flight. 937 peer.Demote() 938 glog.V(logger.Detail).Infof("%s: stale delivery", peer) 939 940 default: 941 // Peer did something semi-useful, demote but keep it around 942 peer.Demote() 943 peer.SetIdle() 944 glog.V(logger.Detail).Infof("%s: delivery partially failed: %v", peer, err) 945 go d.process() 946 } 947 } 948 // Blocks arrived, try to update the progress 949 select { 950 case update <- struct{}{}: 951 default: 952 } 953 954 case cont := <-d.processCh: 955 // The hash fetcher sent a continuation flag, check if it's done 956 if !cont { 957 finished = true 958 } 959 // Hashes arrive, try to update the progress 960 select { 961 case update <- struct{}{}: 962 default: 963 } 964 965 case <-ticker.C: 966 // Sanity check update the progress 967 select { 968 case update <- struct{}{}: 969 default: 970 } 971 972 case <-update: 973 // Short circuit if we lost all our peers 974 if d.peers.Len() == 0 { 975 return errNoPeers 976 } 977 // Check for block request timeouts and demote the responsible peers 978 for _, pid := range d.queue.Expire(blockHardTTL) { 979 if peer := d.peers.Peer(pid); peer != nil { 980 peer.Demote() 981 glog.V(logger.Detail).Infof("%s: block delivery timeout", peer) 982 } 983 } 984 // If there's noting more to fetch, wait or terminate 985 if d.queue.Pending() == 0 { 986 if d.queue.InFlight() == 0 && finished { 987 glog.V(logger.Debug).Infof("Block fetching completed") 988 return nil 989 } 990 break 991 } 992 // Send a download request to all idle peers, until throttled 993 for _, peer := range d.peers.IdlePeers() { 994 // Short circuit if throttling activated 995 if d.queue.Throttle() { 996 break 997 } 998 // Reserve a chunk of hashes for a peer. A nil can mean either that 999 // no more hashes are available, or that the peer is known not to 1000 // have them. 1001 request := d.queue.Reserve(peer, peer.Capacity()) 1002 if request == nil { 1003 continue 1004 } 1005 if glog.V(logger.Detail) { 1006 glog.Infof("%s: requesting %d blocks", peer, len(request.Hashes)) 1007 } 1008 // Fetch the chunk and make sure any errors return the hashes to the queue 1009 if err := peer.Fetch(request); err != nil { 1010 glog.V(logger.Error).Infof("%v: fetch failed, rescheduling", peer) 1011 d.queue.Cancel(request) 1012 } 1013 } 1014 // Make sure that we have peers available for fetching. If all peers have been tried 1015 // and all failed throw an error 1016 if !d.queue.Throttle() && d.queue.InFlight() == 0 { 1017 return errPeersUnavailable 1018 } 1019 } 1020 } 1021 } 1022 1023 // banBlocks retrieves a batch of blocks from a peer feeding us invalid hashes, 1024 // and bans the head of the retrieved batch. 1025 // 1026 // This method only fetches one single batch as the goal is not ban an entire 1027 // (potentially long) invalid chain - wasting a lot of time in the meanwhile -, 1028 // but rather to gradually build up a blacklist if the peer keeps reconnecting. 1029 func (d *Downloader) banBlocks(peerId string, head common.Hash) error { 1030 glog.V(logger.Debug).Infof("Banning a batch out of %d blocks from %s", d.queue.Pending(), peerId) 1031 1032 // Ask the peer being banned for a batch of blocks from the banning point 1033 peer := d.peers.Peer(peerId) 1034 if peer == nil { 1035 return nil 1036 } 1037 request := d.queue.Reserve(peer, MaxBlockFetch) 1038 if request == nil { 1039 return nil 1040 } 1041 if err := peer.Fetch(request); err != nil { 1042 return err 1043 } 1044 // Wait a bit for the reply to arrive, and ban if done so 1045 timeout := time.After(blockHardTTL) 1046 for { 1047 select { 1048 case <-d.cancelCh: 1049 return errCancelBlockFetch 1050 1051 case <-timeout: 1052 return errTimeout 1053 1054 case <-d.hashCh: 1055 // Out of bounds hashes received, ignore them 1056 1057 case blockPack := <-d.blockCh: 1058 blocks := blockPack.blocks 1059 1060 // Short circuit if it's a stale cross check 1061 if len(blocks) == 1 { 1062 block := blocks[0] 1063 if _, ok := d.checks[block.Hash()]; ok { 1064 delete(d.checks, block.Hash()) 1065 break 1066 } 1067 } 1068 // Short circuit if it's not from the peer being banned 1069 if blockPack.peerId != peerId { 1070 break 1071 } 1072 // Short circuit if no blocks were returned 1073 if len(blocks) == 0 { 1074 return errors.New("no blocks returned to ban") 1075 } 1076 // Reconstruct the original chain order and ensure we're banning the correct blocks 1077 types.BlockBy(types.Number).Sort(blocks) 1078 if bytes.Compare(blocks[0].Hash().Bytes(), head.Bytes()) != 0 { 1079 return errors.New("head block not the banned one") 1080 } 1081 index := 0 1082 for _, block := range blocks[1:] { 1083 if bytes.Compare(block.ParentHash().Bytes(), blocks[index].Hash().Bytes()) != 0 { 1084 break 1085 } 1086 index++ 1087 } 1088 // Ban the head hash and phase out any excess 1089 d.banned.Add(blocks[index].Hash()) 1090 for d.banned.Size() > maxBannedHashes { 1091 var evacuate common.Hash 1092 1093 d.banned.Each(func(item interface{}) bool { 1094 // Skip any hard coded bans 1095 if core.BadHashes[item.(common.Hash)] { 1096 return true 1097 } 1098 evacuate = item.(common.Hash) 1099 return false 1100 }) 1101 d.banned.Remove(evacuate) 1102 } 1103 glog.V(logger.Debug).Infof("Banned %d blocks from: %s", index+1, peerId) 1104 return nil 1105 } 1106 } 1107 } 1108 1109 // process takes blocks from the queue and tries to import them into the chain. 1110 // 1111 // The algorithmic flow is as follows: 1112 // - The `processing` flag is swapped to 1 to ensure singleton access 1113 // - The current `cancel` channel is retrieved to detect sync abortions 1114 // - Blocks are iteratively taken from the cache and inserted into the chain 1115 // - When the cache becomes empty, insertion stops 1116 // - The `processing` flag is swapped back to 0 1117 // - A post-exit check is made whether new blocks became available 1118 // - This step is important: it handles a potential race condition between 1119 // checking for no more work, and releasing the processing "mutex". In 1120 // between these state changes, a block may have arrived, but a processing 1121 // attempt denied, so we need to re-enter to ensure the block isn't left 1122 // to idle in the cache. 1123 func (d *Downloader) process() { 1124 // Make sure only one goroutine is ever allowed to process blocks at once 1125 if !atomic.CompareAndSwapInt32(&d.processing, 0, 1) { 1126 return 1127 } 1128 // If the processor just exited, but there are freshly pending items, try to 1129 // reenter. This is needed because the goroutine spinned up for processing 1130 // the fresh blocks might have been rejected entry to to this present thread 1131 // not yet releasing the `processing` state. 1132 defer func() { 1133 if atomic.LoadInt32(&d.interrupt) == 0 && d.queue.GetHeadBlock() != nil { 1134 d.process() 1135 } 1136 }() 1137 // Release the lock upon exit (note, before checking for reentry!), and set 1138 // the import statistics to zero. 1139 defer func() { 1140 d.importLock.Lock() 1141 d.importQueue = nil 1142 d.importDone = 0 1143 d.importLock.Unlock() 1144 1145 atomic.StoreInt32(&d.processing, 0) 1146 }() 1147 // Repeat the processing as long as there are blocks to import 1148 for { 1149 // Fetch the next batch of blocks 1150 blocks := d.queue.TakeBlocks() 1151 if len(blocks) == 0 { 1152 return 1153 } 1154 // Reset the import statistics 1155 d.importLock.Lock() 1156 d.importStart = time.Now() 1157 d.importQueue = blocks 1158 d.importDone = 0 1159 d.importLock.Unlock() 1160 1161 // Actually import the blocks 1162 glog.V(logger.Debug).Infof("Inserting chain with %d blocks (#%v - #%v)\n", len(blocks), blocks[0].RawBlock.Number(), blocks[len(blocks)-1].RawBlock.Number()) 1163 for len(blocks) != 0 { 1164 // Check for any termination requests 1165 if atomic.LoadInt32(&d.interrupt) == 1 { 1166 return 1167 } 1168 // Retrieve the first batch of blocks to insert 1169 max := int(math.Min(float64(len(blocks)), float64(maxBlockProcess))) 1170 raw := make(types.Blocks, 0, max) 1171 for _, block := range blocks[:max] { 1172 raw = append(raw, block.RawBlock) 1173 } 1174 // Try to inset the blocks, drop the originating peer if there's an error 1175 index, err := d.insertChain(raw) 1176 if err != nil { 1177 glog.V(logger.Debug).Infof("Block #%d import failed: %v", raw[index].NumberU64(), err) 1178 d.dropPeer(blocks[index].OriginPeer) 1179 d.cancel() 1180 return 1181 } 1182 blocks = blocks[max:] 1183 } 1184 } 1185 } 1186 1187 // DeliverBlocks injects a new batch of blocks received from a remote node. 1188 // This is usually invoked through the BlocksMsg by the protocol handler. 1189 func (d *Downloader) DeliverBlocks(id string, blocks []*types.Block) error { 1190 // Make sure the downloader is active 1191 if atomic.LoadInt32(&d.synchronising) == 0 { 1192 return errNoSyncActive 1193 } 1194 // Deliver or abort if the sync is canceled while queuing 1195 d.cancelLock.RLock() 1196 cancel := d.cancelCh 1197 d.cancelLock.RUnlock() 1198 1199 select { 1200 case d.blockCh <- blockPack{id, blocks}: 1201 return nil 1202 1203 case <-cancel: 1204 return errNoSyncActive 1205 } 1206 } 1207 1208 // DeliverHashes injects a new batch of hashes received from a remote node into 1209 // the download schedule. This is usually invoked through the BlockHashesMsg by 1210 // the protocol handler. 1211 func (d *Downloader) DeliverHashes(id string, hashes []common.Hash) error { 1212 // Make sure the downloader is active 1213 if atomic.LoadInt32(&d.synchronising) == 0 { 1214 return errNoSyncActive 1215 } 1216 // Deliver or abort if the sync is canceled while queuing 1217 d.cancelLock.RLock() 1218 cancel := d.cancelCh 1219 d.cancelLock.RUnlock() 1220 1221 select { 1222 case d.hashCh <- hashPack{id, hashes}: 1223 return nil 1224 1225 case <-cancel: 1226 return errNoSyncActive 1227 } 1228 }