github.com/KYVENetwork/cometbft/v38@v38.0.3/blocksync/pool.go (about)

     1  package blocksync
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sort"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	flow "github.com/KYVENetwork/cometbft/v38/libs/flowrate"
    12  	"github.com/KYVENetwork/cometbft/v38/libs/log"
    13  	"github.com/KYVENetwork/cometbft/v38/libs/service"
    14  	cmtsync "github.com/KYVENetwork/cometbft/v38/libs/sync"
    15  	"github.com/KYVENetwork/cometbft/v38/p2p"
    16  	"github.com/KYVENetwork/cometbft/v38/types"
    17  )
    18  
    19  /*
    20  eg, L = latency = 0.1s
    21  	P = num peers = 10
    22  	FN = num full nodes
    23  	BS = 1kB block size
    24  	CB = 1 Mbit/s = 128 kB/s
    25  	CB/P = 12.8 kB
    26  	B/S = CB/P/BS = 12.8 blocks/s
    27  
    28  	12.8 * 0.1 = 1.28 blocks on conn
    29  */
    30  
    31  const (
    32  	requestIntervalMS         = 2
    33  	maxPendingRequestsPerPeer = 20
    34  	requestRetrySeconds       = 30
    35  
    36  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    37  	// enough. If a peer is not sending us data at at least that rate, we
    38  	// consider them to have timedout and we disconnect.
    39  	//
    40  	// Based on the experiments with [Osmosis](https://osmosis.zone/), the
    41  	// minimum rate could be as high as 500 KB/s. However, we're setting it to
    42  	// 128 KB/s for now to be conservative.
    43  	minRecvRate = 128 * 1024 // 128 KB/s
    44  
    45  	// peerConnWait is the time that must have elapsed since the pool routine
    46  	// was created before we start making requests. This is to give the peer
    47  	// routine time to connect to peers.
    48  	peerConnWait = 3 * time.Second
    49  
    50  	// If we're within minBlocksForSingleRequest blocks of the pool's height, we
    51  	// send 2 parallel requests to 2 peers for the same block. If we're further
    52  	// away, we send a single request.
    53  	minBlocksForSingleRequest = 50
    54  )
    55  
    56  var peerTimeout = 15 * time.Second // not const so we can override with tests
    57  
    58  /*
    59  	Peers self report their heights when we join the block pool.
    60  	Starting from our latest pool.height, we request blocks
    61  	in sequence from peers that reported higher heights than ours.
    62  	Every so often we ask peers what height they're on so we can keep going.
    63  
    64  	Requests are continuously made for blocks of higher heights until
    65  	the limit is reached. If most of the requests have no available peers, and we
    66  	are not at peer limits, we can probably switch to consensus reactor
    67  */
    68  
    69  // BlockPool keeps track of the block sync peers, block requests and block responses.
    70  type BlockPool struct {
    71  	service.BaseService
    72  	startTime   time.Time
    73  	startHeight int64
    74  
    75  	mtx cmtsync.Mutex
    76  	// block requests
    77  	requesters map[int64]*bpRequester
    78  	height     int64 // the lowest key in requesters.
    79  	// peers
    80  	peers         map[p2p.ID]*bpPeer
    81  	sortedPeers   []*bpPeer // sorted by curRate, highest first
    82  	maxPeerHeight int64     // the biggest reported height
    83  
    84  	// atomic
    85  	numPending int32 // number of requests pending assignment or block response
    86  
    87  	requestsCh chan<- BlockRequest
    88  	errorsCh   chan<- peerError
    89  }
    90  
    91  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    92  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    93  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    94  	bp := &BlockPool{
    95  		peers: make(map[p2p.ID]*bpPeer),
    96  
    97  		requesters:  make(map[int64]*bpRequester),
    98  		height:      start,
    99  		startHeight: start,
   100  		numPending:  0,
   101  
   102  		requestsCh: requestsCh,
   103  		errorsCh:   errorsCh,
   104  	}
   105  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
   106  	return bp
   107  }
   108  
   109  // OnStart implements service.Service by spawning requesters routine and recording
   110  // pool's start time.
   111  func (pool *BlockPool) OnStart() error {
   112  	pool.startTime = time.Now()
   113  	go pool.makeRequestersRoutine()
   114  	return nil
   115  }
   116  
   117  // spawns requesters as needed
   118  func (pool *BlockPool) makeRequestersRoutine() {
   119  	for {
   120  		if !pool.IsRunning() {
   121  			return
   122  		}
   123  
   124  		// Check if we are within peerConnWait seconds of start time
   125  		// This gives us some time to connect to peers before starting a wave of requests
   126  		if time.Since(pool.startTime) < peerConnWait {
   127  			// Calculate the duration to sleep until peerConnWait seconds have passed since pool.startTime
   128  			sleepDuration := peerConnWait - time.Since(pool.startTime)
   129  			time.Sleep(sleepDuration)
   130  		}
   131  
   132  		pool.mtx.Lock()
   133  		var (
   134  			maxRequestersCreated = len(pool.requesters) >= len(pool.peers)*maxPendingRequestsPerPeer
   135  
   136  			nextHeight           = pool.height + int64(len(pool.requesters))
   137  			maxPeerHeightReached = nextHeight > pool.maxPeerHeight
   138  		)
   139  		pool.mtx.Unlock()
   140  
   141  		switch {
   142  		case maxRequestersCreated: // If we have enough requesters, wait for them to finish.
   143  			time.Sleep(requestIntervalMS * time.Millisecond)
   144  			pool.removeTimedoutPeers()
   145  		case maxPeerHeightReached: // If we're caught up, wait for a bit so reactor could finish or a higher height is reported.
   146  			time.Sleep(requestIntervalMS * time.Millisecond)
   147  		default:
   148  			// request for more blocks.
   149  			pool.makeNextRequester(nextHeight)
   150  			// Sleep for a bit to make the requests more ordered.
   151  			time.Sleep(requestIntervalMS * time.Millisecond)
   152  		}
   153  	}
   154  }
   155  
   156  func (pool *BlockPool) removeTimedoutPeers() {
   157  	pool.mtx.Lock()
   158  	defer pool.mtx.Unlock()
   159  
   160  	for _, peer := range pool.peers {
   161  		if !peer.didTimeout && peer.numPending > 0 {
   162  			curRate := peer.recvMonitor.Status().CurRate
   163  			// curRate can be 0 on start
   164  			if curRate != 0 && curRate < minRecvRate {
   165  				err := errors.New("peer is not sending us data fast enough")
   166  				pool.sendError(err, peer.id)
   167  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   168  					"reason", err,
   169  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   170  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   171  				peer.didTimeout = true
   172  			}
   173  
   174  			peer.curRate = curRate
   175  		}
   176  
   177  		if peer.didTimeout {
   178  			pool.removePeer(peer.id)
   179  		}
   180  	}
   181  
   182  	pool.sortPeers()
   183  }
   184  
   185  // GetStatus returns pool's height, numPending requests and the number of
   186  // requesters.
   187  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   188  	pool.mtx.Lock()
   189  	defer pool.mtx.Unlock()
   190  
   191  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   192  }
   193  
   194  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   195  // TODO: relax conditions, prevent abuse.
   196  func (pool *BlockPool) IsCaughtUp() bool {
   197  	pool.mtx.Lock()
   198  	defer pool.mtx.Unlock()
   199  
   200  	// Need at least 1 peer to be considered caught up.
   201  	if len(pool.peers) == 0 {
   202  		pool.Logger.Debug("Blockpool has no peers")
   203  		return false
   204  	}
   205  
   206  	// Some conditions to determine if we're caught up.
   207  	// Ensures we've either received a block or waited some amount of time,
   208  	// and that we're synced to the highest known height.
   209  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   210  	// to verify the LastCommit.
   211  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   212  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   213  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   214  	return isCaughtUp
   215  }
   216  
   217  // PeekTwoBlocks returns blocks at pool.height and pool.height+1. We need to
   218  // see the second block's Commit to validate the first block. So we peek two
   219  // blocks at a time. We return an extended commit, containing vote extensions
   220  // and their associated signatures, as this is critical to consensus in ABCI++
   221  // as we switch from block sync to consensus mode.
   222  //
   223  // The caller will verify the commit.
   224  func (pool *BlockPool) PeekTwoBlocks() (first, second *types.Block, firstExtCommit *types.ExtendedCommit) {
   225  	pool.mtx.Lock()
   226  	defer pool.mtx.Unlock()
   227  
   228  	if r := pool.requesters[pool.height]; r != nil {
   229  		first = r.getBlock()
   230  		firstExtCommit = r.getExtendedCommit()
   231  	}
   232  	if r := pool.requesters[pool.height+1]; r != nil {
   233  		second = r.getBlock()
   234  	}
   235  	return
   236  }
   237  
   238  // PopRequest removes the requester at pool.height and increments pool.height.
   239  func (pool *BlockPool) PopRequest() {
   240  	pool.mtx.Lock()
   241  	defer pool.mtx.Unlock()
   242  
   243  	r := pool.requesters[pool.height]
   244  	if r == nil {
   245  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   246  	}
   247  
   248  	if err := r.Stop(); err != nil {
   249  		pool.Logger.Error("Error stopping requester", "err", err)
   250  	}
   251  	delete(pool.requesters, pool.height)
   252  	pool.height++
   253  
   254  	// Notify the next minBlocksForSingleRequest requesters about new height, so
   255  	// they can potentially request a block from the second peer.
   256  	for i := int64(0); i < minBlocksForSingleRequest && i < int64(len(pool.requesters)); i++ {
   257  		pool.requesters[pool.height+i].newHeight(pool.height)
   258  	}
   259  }
   260  
   261  // RemovePeerAndRedoAllPeerRequests retries the request at the given height and
   262  // all the requests made to the same peer. The peer is removed from the pool.
   263  // Returns the ID of the removed peer.
   264  func (pool *BlockPool) RemovePeerAndRedoAllPeerRequests(height int64) p2p.ID {
   265  	pool.mtx.Lock()
   266  	defer pool.mtx.Unlock()
   267  
   268  	request := pool.requesters[height]
   269  	peerID := request.gotBlockFromPeerID()
   270  	// RemovePeer will redo all requesters associated with this peer.
   271  	pool.removePeer(peerID)
   272  	return peerID
   273  }
   274  
   275  // RedoRequestFrom retries the request at the given height. It does not remove the
   276  // peer.
   277  func (pool *BlockPool) RedoRequestFrom(height int64, peerID p2p.ID) {
   278  	pool.mtx.Lock()
   279  	defer pool.mtx.Unlock()
   280  
   281  	if requester, ok := pool.requesters[height]; ok { // If we requested this block
   282  		if requester.didRequestFrom(peerID) { // From this specific peer
   283  			requester.redo(peerID)
   284  		}
   285  	}
   286  }
   287  
   288  // Deprecated: use RemovePeerAndRedoAllPeerRequests instead.
   289  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   290  	return pool.RemovePeerAndRedoAllPeerRequests(height)
   291  }
   292  
   293  // AddBlock validates that the block comes from the peer it was expected from
   294  // and calls the requester to store it.
   295  //
   296  // This requires an extended commit at the same height as the supplied block -
   297  // the block contains the last commit, but we need the latest commit in case we
   298  // need to switch over from block sync to consensus at this height. If the
   299  // height of the extended commit and the height of the block do not match, we
   300  // do not add the block and return an error.
   301  // TODO: ensure that blocks come in order for each peer.
   302  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, extCommit *types.ExtendedCommit, blockSize int) error {
   303  	pool.mtx.Lock()
   304  	defer pool.mtx.Unlock()
   305  
   306  	if extCommit != nil && block.Height != extCommit.Height {
   307  		err := fmt.Errorf("block height %d != extCommit height %d", block.Height, extCommit.Height)
   308  		// Peer sent us an invalid block => remove it.
   309  		pool.sendError(err, peerID)
   310  		return err
   311  	}
   312  
   313  	requester := pool.requesters[block.Height]
   314  	if requester == nil {
   315  		// Because we're issuing 2nd requests for closer blocks, it's possible to
   316  		// receive a block we've already processed from a second peer. Hence, we
   317  		// can't punish it. But if the peer sent us a block we clearly didn't
   318  		// request, we disconnect.
   319  		if block.Height > pool.height || block.Height < pool.startHeight {
   320  			err := fmt.Errorf("peer sent us block #%d we didn't expect (current height: %d, start height: %d)",
   321  				block.Height, pool.height, pool.startHeight)
   322  			pool.sendError(err, peerID)
   323  			return err
   324  		}
   325  
   326  		return fmt.Errorf("got an already committed block #%d (possibly from the slow peer %s)", block.Height, peerID)
   327  	}
   328  
   329  	if !requester.setBlock(block, extCommit, peerID) {
   330  		err := fmt.Errorf("requested block #%d from %v, not %s", block.Height, requester.requestedFrom(), peerID)
   331  		pool.sendError(err, peerID)
   332  		return err
   333  	}
   334  
   335  	atomic.AddInt32(&pool.numPending, -1)
   336  	peer := pool.peers[peerID]
   337  	if peer != nil {
   338  		peer.decrPending(blockSize)
   339  	}
   340  
   341  	return nil
   342  }
   343  
   344  // Height returns the pool's height.
   345  func (pool *BlockPool) Height() int64 {
   346  	pool.mtx.Lock()
   347  	defer pool.mtx.Unlock()
   348  	return pool.height
   349  }
   350  
   351  // MaxPeerHeight returns the highest reported height.
   352  func (pool *BlockPool) MaxPeerHeight() int64 {
   353  	pool.mtx.Lock()
   354  	defer pool.mtx.Unlock()
   355  	return pool.maxPeerHeight
   356  }
   357  
   358  // SetPeerRange sets the peer's alleged blockchain base and height.
   359  func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) {
   360  	pool.mtx.Lock()
   361  	defer pool.mtx.Unlock()
   362  
   363  	peer := pool.peers[peerID]
   364  	if peer != nil {
   365  		peer.base = base
   366  		peer.height = height
   367  	} else {
   368  		peer = newBPPeer(pool, peerID, base, height)
   369  		peer.setLogger(pool.Logger.With("peer", peerID))
   370  		pool.peers[peerID] = peer
   371  		// no need to sort because curRate is 0 at start.
   372  		// just add to the beginning so it's picked first by pickIncrAvailablePeer.
   373  		pool.sortedPeers = append([]*bpPeer{peer}, pool.sortedPeers...)
   374  	}
   375  
   376  	if height > pool.maxPeerHeight {
   377  		pool.maxPeerHeight = height
   378  	}
   379  }
   380  
   381  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   382  // with peerID, function is a no-op.
   383  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   384  	pool.mtx.Lock()
   385  	defer pool.mtx.Unlock()
   386  
   387  	pool.removePeer(peerID)
   388  }
   389  
   390  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   391  	for _, requester := range pool.requesters {
   392  		if requester.didRequestFrom(peerID) {
   393  			requester.redo(peerID)
   394  		}
   395  	}
   396  
   397  	peer, ok := pool.peers[peerID]
   398  	if ok {
   399  		if peer.timeout != nil {
   400  			peer.timeout.Stop()
   401  		}
   402  
   403  		delete(pool.peers, peerID)
   404  		for i, p := range pool.sortedPeers {
   405  			if p.id == peerID {
   406  				pool.sortedPeers = append(pool.sortedPeers[:i], pool.sortedPeers[i+1:]...)
   407  				break
   408  			}
   409  		}
   410  
   411  		// Find a new peer with the biggest height and update maxPeerHeight if the
   412  		// peer's height was the biggest.
   413  		if peer.height == pool.maxPeerHeight {
   414  			pool.updateMaxPeerHeight()
   415  		}
   416  	}
   417  }
   418  
   419  // If no peers are left, maxPeerHeight is set to 0.
   420  func (pool *BlockPool) updateMaxPeerHeight() {
   421  	var max int64
   422  	for _, peer := range pool.peers {
   423  		if peer.height > max {
   424  			max = peer.height
   425  		}
   426  	}
   427  	pool.maxPeerHeight = max
   428  }
   429  
   430  // Pick an available peer with the given height available.
   431  // If no peers are available, returns nil.
   432  func (pool *BlockPool) pickIncrAvailablePeer(height int64, excludePeerID p2p.ID) *bpPeer {
   433  	pool.mtx.Lock()
   434  	defer pool.mtx.Unlock()
   435  
   436  	for _, peer := range pool.sortedPeers {
   437  		if peer.id == excludePeerID {
   438  			continue
   439  		}
   440  		if peer.didTimeout {
   441  			pool.removePeer(peer.id)
   442  			continue
   443  		}
   444  		if peer.numPending >= maxPendingRequestsPerPeer {
   445  			continue
   446  		}
   447  		if height < peer.base || height > peer.height {
   448  			continue
   449  		}
   450  		peer.incrPending()
   451  		return peer
   452  	}
   453  
   454  	return nil
   455  }
   456  
   457  // Sort peers by curRate, highest first.
   458  //
   459  // CONTRACT: pool.mtx must be locked.
   460  func (pool *BlockPool) sortPeers() {
   461  	sort.Slice(pool.sortedPeers, func(i, j int) bool {
   462  		return pool.sortedPeers[i].curRate > pool.sortedPeers[j].curRate
   463  	})
   464  }
   465  
   466  func (pool *BlockPool) makeNextRequester(nextHeight int64) {
   467  	pool.mtx.Lock()
   468  	defer pool.mtx.Unlock()
   469  
   470  	request := newBPRequester(pool, nextHeight)
   471  
   472  	pool.requesters[nextHeight] = request
   473  	atomic.AddInt32(&pool.numPending, 1)
   474  
   475  	if err := request.Start(); err != nil {
   476  		request.Logger.Error("Error starting request", "err", err)
   477  	}
   478  }
   479  
   480  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   481  	if !pool.IsRunning() {
   482  		return
   483  	}
   484  	pool.requestsCh <- BlockRequest{height, peerID}
   485  }
   486  
   487  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   488  	if !pool.IsRunning() {
   489  		return
   490  	}
   491  	pool.errorsCh <- peerError{err, peerID}
   492  }
   493  
   494  // for debugging purposes
   495  //
   496  //nolint:unused
   497  func (pool *BlockPool) debug() string {
   498  	pool.mtx.Lock()
   499  	defer pool.mtx.Unlock()
   500  
   501  	str := ""
   502  	nextHeight := pool.height + int64(len(pool.requesters))
   503  	for h := pool.height; h < nextHeight; h++ {
   504  		if pool.requesters[h] == nil {
   505  			str += fmt.Sprintf("H(%v):X ", h)
   506  		} else {
   507  			str += fmt.Sprintf("H(%v):", h)
   508  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   509  			str += fmt.Sprintf("C?(%v) ", pool.requesters[h].extCommit != nil)
   510  		}
   511  	}
   512  	return str
   513  }
   514  
   515  //-------------------------------------
   516  
   517  type bpPeer struct {
   518  	didTimeout  bool
   519  	curRate     int64
   520  	numPending  int32
   521  	height      int64
   522  	base        int64
   523  	pool        *BlockPool
   524  	id          p2p.ID
   525  	recvMonitor *flow.Monitor
   526  
   527  	timeout *time.Timer
   528  
   529  	logger log.Logger
   530  }
   531  
   532  func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer {
   533  	peer := &bpPeer{
   534  		pool:       pool,
   535  		id:         peerID,
   536  		base:       base,
   537  		height:     height,
   538  		numPending: 0,
   539  		logger:     log.NewNopLogger(),
   540  	}
   541  	return peer
   542  }
   543  
   544  func (peer *bpPeer) setLogger(l log.Logger) {
   545  	peer.logger = l
   546  }
   547  
   548  func (peer *bpPeer) resetMonitor() {
   549  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   550  	initialValue := float64(minRecvRate) * math.E
   551  	peer.recvMonitor.SetREMA(initialValue)
   552  }
   553  
   554  func (peer *bpPeer) resetTimeout() {
   555  	if peer.timeout == nil {
   556  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   557  	} else {
   558  		peer.timeout.Reset(peerTimeout)
   559  	}
   560  }
   561  
   562  func (peer *bpPeer) incrPending() {
   563  	if peer.numPending == 0 {
   564  		peer.resetMonitor()
   565  		peer.resetTimeout()
   566  	}
   567  	peer.numPending++
   568  }
   569  
   570  func (peer *bpPeer) decrPending(recvSize int) {
   571  	peer.numPending--
   572  	if peer.numPending == 0 {
   573  		peer.timeout.Stop()
   574  	} else {
   575  		peer.recvMonitor.Update(recvSize)
   576  		peer.resetTimeout()
   577  	}
   578  }
   579  
   580  func (peer *bpPeer) onTimeout() {
   581  	peer.pool.mtx.Lock()
   582  	defer peer.pool.mtx.Unlock()
   583  
   584  	err := errors.New("peer did not send us anything")
   585  	peer.pool.sendError(err, peer.id)
   586  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   587  	peer.didTimeout = true
   588  }
   589  
   590  //-------------------------------------
   591  
   592  // bpRequester requests a block from a peer.
   593  //
   594  // If the height is within minBlocksForSingleRequest blocks of the pool's
   595  // height, it will send an additional request to another peer. This is to avoid
   596  // a situation where blocksync is stuck because of a single slow peer. Note
   597  // that it's okay to send a single request when the requested height is far
   598  // from the pool's height. If the peer is slow, it will timeout and be replaced
   599  // with another peer.
   600  type bpRequester struct {
   601  	service.BaseService
   602  
   603  	pool        *BlockPool
   604  	height      int64
   605  	gotBlockCh  chan struct{}
   606  	redoCh      chan p2p.ID // redo may got multiple messages, add peerId to identify repeat
   607  	newHeightCh chan int64
   608  
   609  	mtx          cmtsync.Mutex
   610  	peerID       p2p.ID
   611  	secondPeerID p2p.ID // alternative peer to request from (if close to pool's height)
   612  	gotBlockFrom p2p.ID
   613  	block        *types.Block
   614  	extCommit    *types.ExtendedCommit
   615  }
   616  
   617  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   618  	bpr := &bpRequester{
   619  		pool:        pool,
   620  		height:      height,
   621  		gotBlockCh:  make(chan struct{}, 1),
   622  		redoCh:      make(chan p2p.ID, 1),
   623  		newHeightCh: make(chan int64, 1),
   624  
   625  		peerID:       "",
   626  		secondPeerID: "",
   627  		block:        nil,
   628  	}
   629  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   630  	return bpr
   631  }
   632  
   633  func (bpr *bpRequester) OnStart() error {
   634  	go bpr.requestRoutine()
   635  	return nil
   636  }
   637  
   638  // Returns true if the peer(s) match and block doesn't already exist.
   639  func (bpr *bpRequester) setBlock(block *types.Block, extCommit *types.ExtendedCommit, peerID p2p.ID) bool {
   640  	bpr.mtx.Lock()
   641  	if bpr.peerID != peerID && bpr.secondPeerID != peerID {
   642  		bpr.mtx.Unlock()
   643  		return false
   644  	}
   645  	if bpr.block != nil {
   646  		bpr.mtx.Unlock()
   647  		return true // getting a block from both peers is not an error
   648  	}
   649  
   650  	bpr.block = block
   651  	bpr.extCommit = extCommit
   652  	bpr.gotBlockFrom = peerID
   653  	bpr.mtx.Unlock()
   654  
   655  	select {
   656  	case bpr.gotBlockCh <- struct{}{}:
   657  	default:
   658  	}
   659  	return true
   660  }
   661  
   662  func (bpr *bpRequester) getBlock() *types.Block {
   663  	bpr.mtx.Lock()
   664  	defer bpr.mtx.Unlock()
   665  	return bpr.block
   666  }
   667  
   668  func (bpr *bpRequester) getExtendedCommit() *types.ExtendedCommit {
   669  	bpr.mtx.Lock()
   670  	defer bpr.mtx.Unlock()
   671  	return bpr.extCommit
   672  }
   673  
   674  // Returns the IDs of peers we've requested a block from.
   675  func (bpr *bpRequester) requestedFrom() []p2p.ID {
   676  	bpr.mtx.Lock()
   677  	defer bpr.mtx.Unlock()
   678  	peerIDs := make([]p2p.ID, 0, 2)
   679  	if bpr.peerID != "" {
   680  		peerIDs = append(peerIDs, bpr.peerID)
   681  	}
   682  	if bpr.secondPeerID != "" {
   683  		peerIDs = append(peerIDs, bpr.secondPeerID)
   684  	}
   685  	return peerIDs
   686  }
   687  
   688  // Returns true if we've requested a block from the given peer.
   689  func (bpr *bpRequester) didRequestFrom(peerID p2p.ID) bool {
   690  	bpr.mtx.Lock()
   691  	defer bpr.mtx.Unlock()
   692  	return bpr.peerID == peerID || bpr.secondPeerID == peerID
   693  }
   694  
   695  // Returns the ID of the peer who sent us the block.
   696  func (bpr *bpRequester) gotBlockFromPeerID() p2p.ID {
   697  	bpr.mtx.Lock()
   698  	defer bpr.mtx.Unlock()
   699  	return bpr.gotBlockFrom
   700  }
   701  
   702  // Removes the block (IF we got it from the given peer) and resets the peer.
   703  func (bpr *bpRequester) reset(peerID p2p.ID) (removedBlock bool) {
   704  	bpr.mtx.Lock()
   705  	defer bpr.mtx.Unlock()
   706  
   707  	// Only remove the block if we got it from that peer.
   708  	if bpr.gotBlockFrom == peerID {
   709  		bpr.block = nil
   710  		bpr.extCommit = nil
   711  		bpr.gotBlockFrom = ""
   712  		removedBlock = true
   713  		atomic.AddInt32(&bpr.pool.numPending, 1)
   714  	}
   715  
   716  	if bpr.peerID == peerID {
   717  		bpr.peerID = ""
   718  	} else {
   719  		bpr.secondPeerID = ""
   720  	}
   721  
   722  	return removedBlock
   723  }
   724  
   725  // Tells bpRequester to pick another peer and try again.
   726  // NOTE: Nonblocking, and does nothing if another redo
   727  // was already requested.
   728  func (bpr *bpRequester) redo(peerID p2p.ID) {
   729  	select {
   730  	case bpr.redoCh <- peerID:
   731  	default:
   732  	}
   733  }
   734  
   735  func (bpr *bpRequester) pickPeerAndSendRequest() {
   736  	bpr.mtx.Lock()
   737  	secondPeerID := bpr.secondPeerID
   738  	bpr.mtx.Unlock()
   739  
   740  	var peer *bpPeer
   741  PICK_PEER_LOOP:
   742  	for {
   743  		if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   744  			return
   745  		}
   746  		peer = bpr.pool.pickIncrAvailablePeer(bpr.height, secondPeerID)
   747  		if peer == nil {
   748  			bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height)
   749  			time.Sleep(requestIntervalMS * time.Millisecond)
   750  			continue PICK_PEER_LOOP
   751  		}
   752  		break PICK_PEER_LOOP
   753  	}
   754  	bpr.mtx.Lock()
   755  	bpr.peerID = peer.id
   756  	bpr.mtx.Unlock()
   757  
   758  	bpr.pool.sendRequest(bpr.height, peer.id)
   759  }
   760  
   761  // Picks a second peer and sends a request to it. If the second peer is already
   762  // set, does nothing.
   763  func (bpr *bpRequester) pickSecondPeerAndSendRequest() (picked bool) {
   764  	bpr.mtx.Lock()
   765  	if bpr.secondPeerID != "" {
   766  		bpr.mtx.Unlock()
   767  		return false
   768  	}
   769  	peerID := bpr.peerID
   770  	bpr.mtx.Unlock()
   771  
   772  	secondPeer := bpr.pool.pickIncrAvailablePeer(bpr.height, peerID)
   773  	if secondPeer != nil {
   774  		bpr.mtx.Lock()
   775  		bpr.secondPeerID = secondPeer.id
   776  		bpr.mtx.Unlock()
   777  
   778  		bpr.pool.sendRequest(bpr.height, secondPeer.id)
   779  		return true
   780  	}
   781  
   782  	return false
   783  }
   784  
   785  // Informs the requester of a new pool's height.
   786  func (bpr *bpRequester) newHeight(height int64) {
   787  	select {
   788  	case bpr.newHeightCh <- height:
   789  	default:
   790  	}
   791  }
   792  
   793  // Responsible for making more requests as necessary
   794  // Returns only when a block is found (e.g. AddBlock() is called)
   795  func (bpr *bpRequester) requestRoutine() {
   796  	gotBlock := false
   797  
   798  OUTER_LOOP:
   799  	for {
   800  		bpr.pickPeerAndSendRequest()
   801  
   802  		poolHeight := bpr.pool.Height()
   803  		if bpr.height-poolHeight < minBlocksForSingleRequest {
   804  			bpr.pickSecondPeerAndSendRequest()
   805  		}
   806  
   807  		retryTimer := time.NewTimer(requestRetrySeconds * time.Second)
   808  		defer retryTimer.Stop()
   809  
   810  		for {
   811  			select {
   812  			case <-bpr.pool.Quit():
   813  				if err := bpr.Stop(); err != nil {
   814  					bpr.Logger.Error("Error stopped requester", "err", err)
   815  				}
   816  				return
   817  			case <-bpr.Quit():
   818  				return
   819  			case <-retryTimer.C:
   820  				if !gotBlock {
   821  					bpr.Logger.Debug("Retrying block request(s) after timeout", "height", bpr.height, "peer", bpr.peerID, "secondPeerID", bpr.secondPeerID)
   822  					bpr.reset(bpr.peerID)
   823  					bpr.reset(bpr.secondPeerID)
   824  					continue OUTER_LOOP
   825  				}
   826  			case peerID := <-bpr.redoCh:
   827  				if bpr.didRequestFrom(peerID) {
   828  					removedBlock := bpr.reset(peerID)
   829  					if removedBlock {
   830  						gotBlock = false
   831  					}
   832  				}
   833  				// If both peers returned NoBlockResponse or bad block, reschedule both
   834  				// requests. If not, wait for the other peer.
   835  				if len(bpr.requestedFrom()) == 0 {
   836  					retryTimer.Stop()
   837  					continue OUTER_LOOP
   838  				}
   839  			case newHeight := <-bpr.newHeightCh:
   840  				if !gotBlock && bpr.height-newHeight < minBlocksForSingleRequest {
   841  					// The operation is a noop if the second peer is already set. The cost is checking a mutex.
   842  					//
   843  					// If the second peer was just set, reset the retryTimer to give the
   844  					// second peer a chance to respond.
   845  					if picked := bpr.pickSecondPeerAndSendRequest(); picked {
   846  						if !retryTimer.Stop() {
   847  							<-retryTimer.C
   848  						}
   849  						retryTimer.Reset(requestRetrySeconds * time.Second)
   850  					}
   851  				}
   852  			case <-bpr.gotBlockCh:
   853  				gotBlock = true
   854  				// We got a block!
   855  				// Continue the for-loop and wait til Quit.
   856  			}
   857  		}
   858  	}
   859  }
   860  
   861  // BlockRequest stores a block request identified by the block Height and the PeerID responsible for
   862  // delivering the block
   863  type BlockRequest struct {
   864  	Height int64
   865  	PeerID p2p.ID
   866  }