github.com/KYVENetwork/cometbft/v38@v38.0.3/blocksync/pool.go (about) 1 package blocksync 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sort" 8 "sync/atomic" 9 "time" 10 11 flow "github.com/KYVENetwork/cometbft/v38/libs/flowrate" 12 "github.com/KYVENetwork/cometbft/v38/libs/log" 13 "github.com/KYVENetwork/cometbft/v38/libs/service" 14 cmtsync "github.com/KYVENetwork/cometbft/v38/libs/sync" 15 "github.com/KYVENetwork/cometbft/v38/p2p" 16 "github.com/KYVENetwork/cometbft/v38/types" 17 ) 18 19 /* 20 eg, L = latency = 0.1s 21 P = num peers = 10 22 FN = num full nodes 23 BS = 1kB block size 24 CB = 1 Mbit/s = 128 kB/s 25 CB/P = 12.8 kB 26 B/S = CB/P/BS = 12.8 blocks/s 27 28 12.8 * 0.1 = 1.28 blocks on conn 29 */ 30 31 const ( 32 requestIntervalMS = 2 33 maxPendingRequestsPerPeer = 20 34 requestRetrySeconds = 30 35 36 // Minimum recv rate to ensure we're receiving blocks from a peer fast 37 // enough. If a peer is not sending us data at at least that rate, we 38 // consider them to have timedout and we disconnect. 39 // 40 // Based on the experiments with [Osmosis](https://osmosis.zone/), the 41 // minimum rate could be as high as 500 KB/s. However, we're setting it to 42 // 128 KB/s for now to be conservative. 43 minRecvRate = 128 * 1024 // 128 KB/s 44 45 // peerConnWait is the time that must have elapsed since the pool routine 46 // was created before we start making requests. This is to give the peer 47 // routine time to connect to peers. 48 peerConnWait = 3 * time.Second 49 50 // If we're within minBlocksForSingleRequest blocks of the pool's height, we 51 // send 2 parallel requests to 2 peers for the same block. If we're further 52 // away, we send a single request. 53 minBlocksForSingleRequest = 50 54 ) 55 56 var peerTimeout = 15 * time.Second // not const so we can override with tests 57 58 /* 59 Peers self report their heights when we join the block pool. 60 Starting from our latest pool.height, we request blocks 61 in sequence from peers that reported higher heights than ours. 62 Every so often we ask peers what height they're on so we can keep going. 63 64 Requests are continuously made for blocks of higher heights until 65 the limit is reached. If most of the requests have no available peers, and we 66 are not at peer limits, we can probably switch to consensus reactor 67 */ 68 69 // BlockPool keeps track of the block sync peers, block requests and block responses. 70 type BlockPool struct { 71 service.BaseService 72 startTime time.Time 73 startHeight int64 74 75 mtx cmtsync.Mutex 76 // block requests 77 requesters map[int64]*bpRequester 78 height int64 // the lowest key in requesters. 79 // peers 80 peers map[p2p.ID]*bpPeer 81 sortedPeers []*bpPeer // sorted by curRate, highest first 82 maxPeerHeight int64 // the biggest reported height 83 84 // atomic 85 numPending int32 // number of requests pending assignment or block response 86 87 requestsCh chan<- BlockRequest 88 errorsCh chan<- peerError 89 } 90 91 // NewBlockPool returns a new BlockPool with the height equal to start. Block 92 // requests and errors will be sent to requestsCh and errorsCh accordingly. 93 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 94 bp := &BlockPool{ 95 peers: make(map[p2p.ID]*bpPeer), 96 97 requesters: make(map[int64]*bpRequester), 98 height: start, 99 startHeight: start, 100 numPending: 0, 101 102 requestsCh: requestsCh, 103 errorsCh: errorsCh, 104 } 105 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 106 return bp 107 } 108 109 // OnStart implements service.Service by spawning requesters routine and recording 110 // pool's start time. 111 func (pool *BlockPool) OnStart() error { 112 pool.startTime = time.Now() 113 go pool.makeRequestersRoutine() 114 return nil 115 } 116 117 // spawns requesters as needed 118 func (pool *BlockPool) makeRequestersRoutine() { 119 for { 120 if !pool.IsRunning() { 121 return 122 } 123 124 // Check if we are within peerConnWait seconds of start time 125 // This gives us some time to connect to peers before starting a wave of requests 126 if time.Since(pool.startTime) < peerConnWait { 127 // Calculate the duration to sleep until peerConnWait seconds have passed since pool.startTime 128 sleepDuration := peerConnWait - time.Since(pool.startTime) 129 time.Sleep(sleepDuration) 130 } 131 132 pool.mtx.Lock() 133 var ( 134 maxRequestersCreated = len(pool.requesters) >= len(pool.peers)*maxPendingRequestsPerPeer 135 136 nextHeight = pool.height + int64(len(pool.requesters)) 137 maxPeerHeightReached = nextHeight > pool.maxPeerHeight 138 ) 139 pool.mtx.Unlock() 140 141 switch { 142 case maxRequestersCreated: // If we have enough requesters, wait for them to finish. 143 time.Sleep(requestIntervalMS * time.Millisecond) 144 pool.removeTimedoutPeers() 145 case maxPeerHeightReached: // If we're caught up, wait for a bit so reactor could finish or a higher height is reported. 146 time.Sleep(requestIntervalMS * time.Millisecond) 147 default: 148 // request for more blocks. 149 pool.makeNextRequester(nextHeight) 150 // Sleep for a bit to make the requests more ordered. 151 time.Sleep(requestIntervalMS * time.Millisecond) 152 } 153 } 154 } 155 156 func (pool *BlockPool) removeTimedoutPeers() { 157 pool.mtx.Lock() 158 defer pool.mtx.Unlock() 159 160 for _, peer := range pool.peers { 161 if !peer.didTimeout && peer.numPending > 0 { 162 curRate := peer.recvMonitor.Status().CurRate 163 // curRate can be 0 on start 164 if curRate != 0 && curRate < minRecvRate { 165 err := errors.New("peer is not sending us data fast enough") 166 pool.sendError(err, peer.id) 167 pool.Logger.Error("SendTimeout", "peer", peer.id, 168 "reason", err, 169 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 170 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 171 peer.didTimeout = true 172 } 173 174 peer.curRate = curRate 175 } 176 177 if peer.didTimeout { 178 pool.removePeer(peer.id) 179 } 180 } 181 182 pool.sortPeers() 183 } 184 185 // GetStatus returns pool's height, numPending requests and the number of 186 // requesters. 187 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 188 pool.mtx.Lock() 189 defer pool.mtx.Unlock() 190 191 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 192 } 193 194 // IsCaughtUp returns true if this node is caught up, false - otherwise. 195 // TODO: relax conditions, prevent abuse. 196 func (pool *BlockPool) IsCaughtUp() bool { 197 pool.mtx.Lock() 198 defer pool.mtx.Unlock() 199 200 // Need at least 1 peer to be considered caught up. 201 if len(pool.peers) == 0 { 202 pool.Logger.Debug("Blockpool has no peers") 203 return false 204 } 205 206 // Some conditions to determine if we're caught up. 207 // Ensures we've either received a block or waited some amount of time, 208 // and that we're synced to the highest known height. 209 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 210 // to verify the LastCommit. 211 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 212 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 213 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 214 return isCaughtUp 215 } 216 217 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. We need to 218 // see the second block's Commit to validate the first block. So we peek two 219 // blocks at a time. We return an extended commit, containing vote extensions 220 // and their associated signatures, as this is critical to consensus in ABCI++ 221 // as we switch from block sync to consensus mode. 222 // 223 // The caller will verify the commit. 224 func (pool *BlockPool) PeekTwoBlocks() (first, second *types.Block, firstExtCommit *types.ExtendedCommit) { 225 pool.mtx.Lock() 226 defer pool.mtx.Unlock() 227 228 if r := pool.requesters[pool.height]; r != nil { 229 first = r.getBlock() 230 firstExtCommit = r.getExtendedCommit() 231 } 232 if r := pool.requesters[pool.height+1]; r != nil { 233 second = r.getBlock() 234 } 235 return 236 } 237 238 // PopRequest removes the requester at pool.height and increments pool.height. 239 func (pool *BlockPool) PopRequest() { 240 pool.mtx.Lock() 241 defer pool.mtx.Unlock() 242 243 r := pool.requesters[pool.height] 244 if r == nil { 245 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 246 } 247 248 if err := r.Stop(); err != nil { 249 pool.Logger.Error("Error stopping requester", "err", err) 250 } 251 delete(pool.requesters, pool.height) 252 pool.height++ 253 254 // Notify the next minBlocksForSingleRequest requesters about new height, so 255 // they can potentially request a block from the second peer. 256 for i := int64(0); i < minBlocksForSingleRequest && i < int64(len(pool.requesters)); i++ { 257 pool.requesters[pool.height+i].newHeight(pool.height) 258 } 259 } 260 261 // RemovePeerAndRedoAllPeerRequests retries the request at the given height and 262 // all the requests made to the same peer. The peer is removed from the pool. 263 // Returns the ID of the removed peer. 264 func (pool *BlockPool) RemovePeerAndRedoAllPeerRequests(height int64) p2p.ID { 265 pool.mtx.Lock() 266 defer pool.mtx.Unlock() 267 268 request := pool.requesters[height] 269 peerID := request.gotBlockFromPeerID() 270 // RemovePeer will redo all requesters associated with this peer. 271 pool.removePeer(peerID) 272 return peerID 273 } 274 275 // RedoRequestFrom retries the request at the given height. It does not remove the 276 // peer. 277 func (pool *BlockPool) RedoRequestFrom(height int64, peerID p2p.ID) { 278 pool.mtx.Lock() 279 defer pool.mtx.Unlock() 280 281 if requester, ok := pool.requesters[height]; ok { // If we requested this block 282 if requester.didRequestFrom(peerID) { // From this specific peer 283 requester.redo(peerID) 284 } 285 } 286 } 287 288 // Deprecated: use RemovePeerAndRedoAllPeerRequests instead. 289 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 290 return pool.RemovePeerAndRedoAllPeerRequests(height) 291 } 292 293 // AddBlock validates that the block comes from the peer it was expected from 294 // and calls the requester to store it. 295 // 296 // This requires an extended commit at the same height as the supplied block - 297 // the block contains the last commit, but we need the latest commit in case we 298 // need to switch over from block sync to consensus at this height. If the 299 // height of the extended commit and the height of the block do not match, we 300 // do not add the block and return an error. 301 // TODO: ensure that blocks come in order for each peer. 302 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, extCommit *types.ExtendedCommit, blockSize int) error { 303 pool.mtx.Lock() 304 defer pool.mtx.Unlock() 305 306 if extCommit != nil && block.Height != extCommit.Height { 307 err := fmt.Errorf("block height %d != extCommit height %d", block.Height, extCommit.Height) 308 // Peer sent us an invalid block => remove it. 309 pool.sendError(err, peerID) 310 return err 311 } 312 313 requester := pool.requesters[block.Height] 314 if requester == nil { 315 // Because we're issuing 2nd requests for closer blocks, it's possible to 316 // receive a block we've already processed from a second peer. Hence, we 317 // can't punish it. But if the peer sent us a block we clearly didn't 318 // request, we disconnect. 319 if block.Height > pool.height || block.Height < pool.startHeight { 320 err := fmt.Errorf("peer sent us block #%d we didn't expect (current height: %d, start height: %d)", 321 block.Height, pool.height, pool.startHeight) 322 pool.sendError(err, peerID) 323 return err 324 } 325 326 return fmt.Errorf("got an already committed block #%d (possibly from the slow peer %s)", block.Height, peerID) 327 } 328 329 if !requester.setBlock(block, extCommit, peerID) { 330 err := fmt.Errorf("requested block #%d from %v, not %s", block.Height, requester.requestedFrom(), peerID) 331 pool.sendError(err, peerID) 332 return err 333 } 334 335 atomic.AddInt32(&pool.numPending, -1) 336 peer := pool.peers[peerID] 337 if peer != nil { 338 peer.decrPending(blockSize) 339 } 340 341 return nil 342 } 343 344 // Height returns the pool's height. 345 func (pool *BlockPool) Height() int64 { 346 pool.mtx.Lock() 347 defer pool.mtx.Unlock() 348 return pool.height 349 } 350 351 // MaxPeerHeight returns the highest reported height. 352 func (pool *BlockPool) MaxPeerHeight() int64 { 353 pool.mtx.Lock() 354 defer pool.mtx.Unlock() 355 return pool.maxPeerHeight 356 } 357 358 // SetPeerRange sets the peer's alleged blockchain base and height. 359 func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { 360 pool.mtx.Lock() 361 defer pool.mtx.Unlock() 362 363 peer := pool.peers[peerID] 364 if peer != nil { 365 peer.base = base 366 peer.height = height 367 } else { 368 peer = newBPPeer(pool, peerID, base, height) 369 peer.setLogger(pool.Logger.With("peer", peerID)) 370 pool.peers[peerID] = peer 371 // no need to sort because curRate is 0 at start. 372 // just add to the beginning so it's picked first by pickIncrAvailablePeer. 373 pool.sortedPeers = append([]*bpPeer{peer}, pool.sortedPeers...) 374 } 375 376 if height > pool.maxPeerHeight { 377 pool.maxPeerHeight = height 378 } 379 } 380 381 // RemovePeer removes the peer with peerID from the pool. If there's no peer 382 // with peerID, function is a no-op. 383 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 384 pool.mtx.Lock() 385 defer pool.mtx.Unlock() 386 387 pool.removePeer(peerID) 388 } 389 390 func (pool *BlockPool) removePeer(peerID p2p.ID) { 391 for _, requester := range pool.requesters { 392 if requester.didRequestFrom(peerID) { 393 requester.redo(peerID) 394 } 395 } 396 397 peer, ok := pool.peers[peerID] 398 if ok { 399 if peer.timeout != nil { 400 peer.timeout.Stop() 401 } 402 403 delete(pool.peers, peerID) 404 for i, p := range pool.sortedPeers { 405 if p.id == peerID { 406 pool.sortedPeers = append(pool.sortedPeers[:i], pool.sortedPeers[i+1:]...) 407 break 408 } 409 } 410 411 // Find a new peer with the biggest height and update maxPeerHeight if the 412 // peer's height was the biggest. 413 if peer.height == pool.maxPeerHeight { 414 pool.updateMaxPeerHeight() 415 } 416 } 417 } 418 419 // If no peers are left, maxPeerHeight is set to 0. 420 func (pool *BlockPool) updateMaxPeerHeight() { 421 var max int64 422 for _, peer := range pool.peers { 423 if peer.height > max { 424 max = peer.height 425 } 426 } 427 pool.maxPeerHeight = max 428 } 429 430 // Pick an available peer with the given height available. 431 // If no peers are available, returns nil. 432 func (pool *BlockPool) pickIncrAvailablePeer(height int64, excludePeerID p2p.ID) *bpPeer { 433 pool.mtx.Lock() 434 defer pool.mtx.Unlock() 435 436 for _, peer := range pool.sortedPeers { 437 if peer.id == excludePeerID { 438 continue 439 } 440 if peer.didTimeout { 441 pool.removePeer(peer.id) 442 continue 443 } 444 if peer.numPending >= maxPendingRequestsPerPeer { 445 continue 446 } 447 if height < peer.base || height > peer.height { 448 continue 449 } 450 peer.incrPending() 451 return peer 452 } 453 454 return nil 455 } 456 457 // Sort peers by curRate, highest first. 458 // 459 // CONTRACT: pool.mtx must be locked. 460 func (pool *BlockPool) sortPeers() { 461 sort.Slice(pool.sortedPeers, func(i, j int) bool { 462 return pool.sortedPeers[i].curRate > pool.sortedPeers[j].curRate 463 }) 464 } 465 466 func (pool *BlockPool) makeNextRequester(nextHeight int64) { 467 pool.mtx.Lock() 468 defer pool.mtx.Unlock() 469 470 request := newBPRequester(pool, nextHeight) 471 472 pool.requesters[nextHeight] = request 473 atomic.AddInt32(&pool.numPending, 1) 474 475 if err := request.Start(); err != nil { 476 request.Logger.Error("Error starting request", "err", err) 477 } 478 } 479 480 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 481 if !pool.IsRunning() { 482 return 483 } 484 pool.requestsCh <- BlockRequest{height, peerID} 485 } 486 487 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 488 if !pool.IsRunning() { 489 return 490 } 491 pool.errorsCh <- peerError{err, peerID} 492 } 493 494 // for debugging purposes 495 // 496 //nolint:unused 497 func (pool *BlockPool) debug() string { 498 pool.mtx.Lock() 499 defer pool.mtx.Unlock() 500 501 str := "" 502 nextHeight := pool.height + int64(len(pool.requesters)) 503 for h := pool.height; h < nextHeight; h++ { 504 if pool.requesters[h] == nil { 505 str += fmt.Sprintf("H(%v):X ", h) 506 } else { 507 str += fmt.Sprintf("H(%v):", h) 508 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 509 str += fmt.Sprintf("C?(%v) ", pool.requesters[h].extCommit != nil) 510 } 511 } 512 return str 513 } 514 515 //------------------------------------- 516 517 type bpPeer struct { 518 didTimeout bool 519 curRate int64 520 numPending int32 521 height int64 522 base int64 523 pool *BlockPool 524 id p2p.ID 525 recvMonitor *flow.Monitor 526 527 timeout *time.Timer 528 529 logger log.Logger 530 } 531 532 func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer { 533 peer := &bpPeer{ 534 pool: pool, 535 id: peerID, 536 base: base, 537 height: height, 538 numPending: 0, 539 logger: log.NewNopLogger(), 540 } 541 return peer 542 } 543 544 func (peer *bpPeer) setLogger(l log.Logger) { 545 peer.logger = l 546 } 547 548 func (peer *bpPeer) resetMonitor() { 549 peer.recvMonitor = flow.New(time.Second, time.Second*40) 550 initialValue := float64(minRecvRate) * math.E 551 peer.recvMonitor.SetREMA(initialValue) 552 } 553 554 func (peer *bpPeer) resetTimeout() { 555 if peer.timeout == nil { 556 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 557 } else { 558 peer.timeout.Reset(peerTimeout) 559 } 560 } 561 562 func (peer *bpPeer) incrPending() { 563 if peer.numPending == 0 { 564 peer.resetMonitor() 565 peer.resetTimeout() 566 } 567 peer.numPending++ 568 } 569 570 func (peer *bpPeer) decrPending(recvSize int) { 571 peer.numPending-- 572 if peer.numPending == 0 { 573 peer.timeout.Stop() 574 } else { 575 peer.recvMonitor.Update(recvSize) 576 peer.resetTimeout() 577 } 578 } 579 580 func (peer *bpPeer) onTimeout() { 581 peer.pool.mtx.Lock() 582 defer peer.pool.mtx.Unlock() 583 584 err := errors.New("peer did not send us anything") 585 peer.pool.sendError(err, peer.id) 586 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 587 peer.didTimeout = true 588 } 589 590 //------------------------------------- 591 592 // bpRequester requests a block from a peer. 593 // 594 // If the height is within minBlocksForSingleRequest blocks of the pool's 595 // height, it will send an additional request to another peer. This is to avoid 596 // a situation where blocksync is stuck because of a single slow peer. Note 597 // that it's okay to send a single request when the requested height is far 598 // from the pool's height. If the peer is slow, it will timeout and be replaced 599 // with another peer. 600 type bpRequester struct { 601 service.BaseService 602 603 pool *BlockPool 604 height int64 605 gotBlockCh chan struct{} 606 redoCh chan p2p.ID // redo may got multiple messages, add peerId to identify repeat 607 newHeightCh chan int64 608 609 mtx cmtsync.Mutex 610 peerID p2p.ID 611 secondPeerID p2p.ID // alternative peer to request from (if close to pool's height) 612 gotBlockFrom p2p.ID 613 block *types.Block 614 extCommit *types.ExtendedCommit 615 } 616 617 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 618 bpr := &bpRequester{ 619 pool: pool, 620 height: height, 621 gotBlockCh: make(chan struct{}, 1), 622 redoCh: make(chan p2p.ID, 1), 623 newHeightCh: make(chan int64, 1), 624 625 peerID: "", 626 secondPeerID: "", 627 block: nil, 628 } 629 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 630 return bpr 631 } 632 633 func (bpr *bpRequester) OnStart() error { 634 go bpr.requestRoutine() 635 return nil 636 } 637 638 // Returns true if the peer(s) match and block doesn't already exist. 639 func (bpr *bpRequester) setBlock(block *types.Block, extCommit *types.ExtendedCommit, peerID p2p.ID) bool { 640 bpr.mtx.Lock() 641 if bpr.peerID != peerID && bpr.secondPeerID != peerID { 642 bpr.mtx.Unlock() 643 return false 644 } 645 if bpr.block != nil { 646 bpr.mtx.Unlock() 647 return true // getting a block from both peers is not an error 648 } 649 650 bpr.block = block 651 bpr.extCommit = extCommit 652 bpr.gotBlockFrom = peerID 653 bpr.mtx.Unlock() 654 655 select { 656 case bpr.gotBlockCh <- struct{}{}: 657 default: 658 } 659 return true 660 } 661 662 func (bpr *bpRequester) getBlock() *types.Block { 663 bpr.mtx.Lock() 664 defer bpr.mtx.Unlock() 665 return bpr.block 666 } 667 668 func (bpr *bpRequester) getExtendedCommit() *types.ExtendedCommit { 669 bpr.mtx.Lock() 670 defer bpr.mtx.Unlock() 671 return bpr.extCommit 672 } 673 674 // Returns the IDs of peers we've requested a block from. 675 func (bpr *bpRequester) requestedFrom() []p2p.ID { 676 bpr.mtx.Lock() 677 defer bpr.mtx.Unlock() 678 peerIDs := make([]p2p.ID, 0, 2) 679 if bpr.peerID != "" { 680 peerIDs = append(peerIDs, bpr.peerID) 681 } 682 if bpr.secondPeerID != "" { 683 peerIDs = append(peerIDs, bpr.secondPeerID) 684 } 685 return peerIDs 686 } 687 688 // Returns true if we've requested a block from the given peer. 689 func (bpr *bpRequester) didRequestFrom(peerID p2p.ID) bool { 690 bpr.mtx.Lock() 691 defer bpr.mtx.Unlock() 692 return bpr.peerID == peerID || bpr.secondPeerID == peerID 693 } 694 695 // Returns the ID of the peer who sent us the block. 696 func (bpr *bpRequester) gotBlockFromPeerID() p2p.ID { 697 bpr.mtx.Lock() 698 defer bpr.mtx.Unlock() 699 return bpr.gotBlockFrom 700 } 701 702 // Removes the block (IF we got it from the given peer) and resets the peer. 703 func (bpr *bpRequester) reset(peerID p2p.ID) (removedBlock bool) { 704 bpr.mtx.Lock() 705 defer bpr.mtx.Unlock() 706 707 // Only remove the block if we got it from that peer. 708 if bpr.gotBlockFrom == peerID { 709 bpr.block = nil 710 bpr.extCommit = nil 711 bpr.gotBlockFrom = "" 712 removedBlock = true 713 atomic.AddInt32(&bpr.pool.numPending, 1) 714 } 715 716 if bpr.peerID == peerID { 717 bpr.peerID = "" 718 } else { 719 bpr.secondPeerID = "" 720 } 721 722 return removedBlock 723 } 724 725 // Tells bpRequester to pick another peer and try again. 726 // NOTE: Nonblocking, and does nothing if another redo 727 // was already requested. 728 func (bpr *bpRequester) redo(peerID p2p.ID) { 729 select { 730 case bpr.redoCh <- peerID: 731 default: 732 } 733 } 734 735 func (bpr *bpRequester) pickPeerAndSendRequest() { 736 bpr.mtx.Lock() 737 secondPeerID := bpr.secondPeerID 738 bpr.mtx.Unlock() 739 740 var peer *bpPeer 741 PICK_PEER_LOOP: 742 for { 743 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 744 return 745 } 746 peer = bpr.pool.pickIncrAvailablePeer(bpr.height, secondPeerID) 747 if peer == nil { 748 bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height) 749 time.Sleep(requestIntervalMS * time.Millisecond) 750 continue PICK_PEER_LOOP 751 } 752 break PICK_PEER_LOOP 753 } 754 bpr.mtx.Lock() 755 bpr.peerID = peer.id 756 bpr.mtx.Unlock() 757 758 bpr.pool.sendRequest(bpr.height, peer.id) 759 } 760 761 // Picks a second peer and sends a request to it. If the second peer is already 762 // set, does nothing. 763 func (bpr *bpRequester) pickSecondPeerAndSendRequest() (picked bool) { 764 bpr.mtx.Lock() 765 if bpr.secondPeerID != "" { 766 bpr.mtx.Unlock() 767 return false 768 } 769 peerID := bpr.peerID 770 bpr.mtx.Unlock() 771 772 secondPeer := bpr.pool.pickIncrAvailablePeer(bpr.height, peerID) 773 if secondPeer != nil { 774 bpr.mtx.Lock() 775 bpr.secondPeerID = secondPeer.id 776 bpr.mtx.Unlock() 777 778 bpr.pool.sendRequest(bpr.height, secondPeer.id) 779 return true 780 } 781 782 return false 783 } 784 785 // Informs the requester of a new pool's height. 786 func (bpr *bpRequester) newHeight(height int64) { 787 select { 788 case bpr.newHeightCh <- height: 789 default: 790 } 791 } 792 793 // Responsible for making more requests as necessary 794 // Returns only when a block is found (e.g. AddBlock() is called) 795 func (bpr *bpRequester) requestRoutine() { 796 gotBlock := false 797 798 OUTER_LOOP: 799 for { 800 bpr.pickPeerAndSendRequest() 801 802 poolHeight := bpr.pool.Height() 803 if bpr.height-poolHeight < minBlocksForSingleRequest { 804 bpr.pickSecondPeerAndSendRequest() 805 } 806 807 retryTimer := time.NewTimer(requestRetrySeconds * time.Second) 808 defer retryTimer.Stop() 809 810 for { 811 select { 812 case <-bpr.pool.Quit(): 813 if err := bpr.Stop(); err != nil { 814 bpr.Logger.Error("Error stopped requester", "err", err) 815 } 816 return 817 case <-bpr.Quit(): 818 return 819 case <-retryTimer.C: 820 if !gotBlock { 821 bpr.Logger.Debug("Retrying block request(s) after timeout", "height", bpr.height, "peer", bpr.peerID, "secondPeerID", bpr.secondPeerID) 822 bpr.reset(bpr.peerID) 823 bpr.reset(bpr.secondPeerID) 824 continue OUTER_LOOP 825 } 826 case peerID := <-bpr.redoCh: 827 if bpr.didRequestFrom(peerID) { 828 removedBlock := bpr.reset(peerID) 829 if removedBlock { 830 gotBlock = false 831 } 832 } 833 // If both peers returned NoBlockResponse or bad block, reschedule both 834 // requests. If not, wait for the other peer. 835 if len(bpr.requestedFrom()) == 0 { 836 retryTimer.Stop() 837 continue OUTER_LOOP 838 } 839 case newHeight := <-bpr.newHeightCh: 840 if !gotBlock && bpr.height-newHeight < minBlocksForSingleRequest { 841 // The operation is a noop if the second peer is already set. The cost is checking a mutex. 842 // 843 // If the second peer was just set, reset the retryTimer to give the 844 // second peer a chance to respond. 845 if picked := bpr.pickSecondPeerAndSendRequest(); picked { 846 if !retryTimer.Stop() { 847 <-retryTimer.C 848 } 849 retryTimer.Reset(requestRetrySeconds * time.Second) 850 } 851 } 852 case <-bpr.gotBlockCh: 853 gotBlock = true 854 // We got a block! 855 // Continue the for-loop and wait til Quit. 856 } 857 } 858 } 859 } 860 861 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 862 // delivering the block 863 type BlockRequest struct { 864 Height int64 865 PeerID p2p.ID 866 }