github.com/okex/exchain@v1.8.0/libs/tendermint/blockchain/v0/pool.go (about)

     1  package v0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	flow "github.com/okex/exchain/libs/tendermint/libs/flowrate"
    12  	"github.com/okex/exchain/libs/tendermint/libs/log"
    13  	"github.com/okex/exchain/libs/tendermint/libs/service"
    14  	"github.com/okex/exchain/libs/tendermint/p2p"
    15  	"github.com/okex/exchain/libs/tendermint/types"
    16  )
    17  
    18  /*
    19  eg, L = latency = 0.1s
    20  	P = num peers = 10
    21  	FN = num full nodes
    22  	BS = 1kB block size
    23  	CB = 1 Mbit/s = 128 kB/s
    24  	CB/P = 12.8 kB
    25  	B/S = CB/P/BS = 12.8 blocks/s
    26  
    27  	12.8 * 0.1 = 1.28 blocks on conn
    28  */
    29  
    30  const (
    31  	requestIntervalMS         = 2
    32  	maxTotalRequesters        = 600
    33  	maxPendingRequests        = maxTotalRequesters
    34  	maxPendingRequestsPerPeer = 20
    35  
    36  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    37  	// enough. If a peer is not sending us data at at least that rate, we
    38  	// consider them to have timedout and we disconnect.
    39  	//
    40  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    41  	// sending data across atlantic ~ 7.5 KB/s.
    42  	minRecvRate = 7680
    43  
    44  	// Maximum difference between current and new block's height.
    45  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    46  )
    47  
    48  var peerTimeout = 15 * time.Second // not const so we can override with tests
    49  
    50  /*
    51  	Peers self report their heights when we join the block pool.
    52  	Starting from our latest pool.height, we request blocks
    53  	in sequence from peers that reported higher heights than ours.
    54  	Every so often we ask peers what height they're on so we can keep going.
    55  
    56  	Requests are continuously made for blocks of higher heights until
    57  	the limit is reached. If most of the requests have no available peers, and we
    58  	are not at peer limits, we can probably switch to consensus reactor
    59  */
    60  
    61  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    62  type BlockPool struct {
    63  	service.BaseService
    64  	startTime time.Time
    65  
    66  	mtx sync.Mutex
    67  	// block requests
    68  	requesters map[int64]*bpRequester
    69  	height     int64 // the lowest key in requesters.
    70  	// peers
    71  	peers         map[p2p.ID]*bpPeer
    72  	maxPeerHeight int64 // the biggest reported height
    73  
    74  	// atomic
    75  	numPending int32 // number of requests pending assignment or block response
    76  
    77  	requestsCh chan<- BlockRequest
    78  	errorsCh   chan<- peerError
    79  }
    80  
    81  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    82  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    83  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    84  	bp := &BlockPool{
    85  		peers: make(map[p2p.ID]*bpPeer),
    86  
    87  		requesters: make(map[int64]*bpRequester),
    88  		height:     start,
    89  		numPending: 0,
    90  
    91  		requestsCh: requestsCh,
    92  		errorsCh:   errorsCh,
    93  	}
    94  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
    95  	return bp
    96  }
    97  
    98  func (pool *BlockPool) SetHeight(height int64) {
    99  	pool.mtx.Lock()
   100  	defer pool.mtx.Unlock()
   101  
   102  	pool.height = height
   103  }
   104  
   105  // OnStart implements service.Service by spawning requesters routine and recording
   106  // pool's start time.
   107  func (pool *BlockPool) OnStart() error {
   108  	go pool.makeRequestersRoutine()
   109  	pool.startTime = time.Now()
   110  	return nil
   111  }
   112  
   113  func (pool *BlockPool) OnReset() error {
   114  	// clear up all requesters
   115  	pool.mtx.Lock()
   116  	defer pool.mtx.Unlock()
   117  
   118  	for height, r := range pool.requesters {
   119  		r.Stop()
   120  		delete(pool.requesters, height)
   121  	}
   122  	pool.numPending = 0
   123  
   124  	return nil
   125  }
   126  
   127  // spawns requesters as needed
   128  func (pool *BlockPool) makeRequestersRoutine() {
   129  	for {
   130  		if !pool.IsRunning() {
   131  			break
   132  		}
   133  
   134  		_, numPending, lenRequesters := pool.GetStatus()
   135  		switch {
   136  		case numPending >= maxPendingRequests:
   137  			// sleep for a bit.
   138  			time.Sleep(requestIntervalMS * time.Millisecond)
   139  			// check for timed out peers
   140  			pool.removeTimedoutPeers()
   141  		case lenRequesters >= maxTotalRequesters:
   142  			// sleep for a bit.
   143  			time.Sleep(requestIntervalMS * time.Millisecond)
   144  			// check for timed out peers
   145  			pool.removeTimedoutPeers()
   146  		default:
   147  			// request for more blocks.
   148  			pool.makeNextRequester()
   149  		}
   150  	}
   151  }
   152  
   153  func (pool *BlockPool) removeTimedoutPeers() {
   154  	pool.mtx.Lock()
   155  	defer pool.mtx.Unlock()
   156  
   157  	for _, peer := range pool.peers {
   158  		if !peer.didTimeout && peer.numPending > 0 {
   159  			curRate := peer.recvMonitor.Status().CurRate
   160  			// curRate can be 0 on start
   161  			if curRate != 0 && curRate < minRecvRate {
   162  				err := errors.New("peer is not sending us data fast enough")
   163  				pool.sendError(err, peer.id)
   164  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   165  					"reason", err,
   166  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   167  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   168  				peer.didTimeout = true
   169  			}
   170  		}
   171  		if peer.didTimeout {
   172  			pool.removePeer(peer.id)
   173  		}
   174  	}
   175  }
   176  
   177  // GetStatus returns pool's height, numPending requests and the number of
   178  // requesters.
   179  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   180  	pool.mtx.Lock()
   181  	defer pool.mtx.Unlock()
   182  
   183  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   184  }
   185  
   186  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   187  // TODO: relax conditions, prevent abuse.
   188  func (pool *BlockPool) IsCaughtUp() bool {
   189  	pool.mtx.Lock()
   190  	defer pool.mtx.Unlock()
   191  
   192  	// Need at least 1 peer to be considered caught up.
   193  	if len(pool.peers) == 0 {
   194  		pool.Logger.Debug("Blockpool has no peers")
   195  		return false
   196  	}
   197  
   198  	// Some conditions to determine if we're caught up.
   199  	// Ensures we've either received a block or waited some amount of time,
   200  	// and that we're synced to the highest known height.
   201  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   202  	// to verify the LastCommit.
   203  	// TODO: should change judge conditions
   204  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   205  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   206  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   207  	return isCaughtUp
   208  }
   209  
   210  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   211  // We need to see the second block's Commit to validate the first block.
   212  // So we peek two blocks at a time.
   213  // The caller will verify the commit.
   214  func (pool *BlockPool) PeekTwoBlocks() (first, second *types.Block, firstParts *types.PartSet) {
   215  	pool.mtx.Lock()
   216  	defer pool.mtx.Unlock()
   217  
   218  	if r := pool.requesters[pool.height]; r != nil {
   219  		first, firstParts = r.getBlock()
   220  	}
   221  	if r := pool.requesters[pool.height+1]; r != nil {
   222  		second, _ = r.getBlock()
   223  	}
   224  	return
   225  }
   226  
   227  // PopRequest pops the first block at pool.height.
   228  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   229  func (pool *BlockPool) PopRequest() {
   230  	pool.mtx.Lock()
   231  	defer pool.mtx.Unlock()
   232  
   233  	if r := pool.requesters[pool.height]; r != nil {
   234  		/*  The block can disappear at any time, due to removePeer().
   235  		if r := pool.requesters[pool.height]; r == nil || r.block == nil {
   236  			PanicSanity("PopRequest() requires a valid block")
   237  		}
   238  		*/
   239  		r.Stop()
   240  		delete(pool.requesters, pool.height)
   241  		pool.height++
   242  	} else {
   243  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   244  	}
   245  }
   246  
   247  // RedoRequest invalidates the block at pool.height,
   248  // Remove the peer and redo request from others.
   249  // Returns the ID of the removed peer.
   250  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   251  	pool.mtx.Lock()
   252  	defer pool.mtx.Unlock()
   253  
   254  	request := pool.requesters[height]
   255  	peerID := request.getPeerID()
   256  	if peerID != p2p.ID("") {
   257  		// RemovePeer will redo all requesters associated with this peer.
   258  		pool.removePeer(peerID)
   259  	}
   260  	return peerID
   261  }
   262  
   263  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   264  // TODO: ensure that blocks come in order for each peer.
   265  func (pool *BlockPool) AddBlock(peerID p2p.ID, msg *bcBlockResponseMessage, blockSize int) {
   266  	pool.mtx.Lock()
   267  	defer pool.mtx.Unlock()
   268  
   269  	block := msg.Block
   270  	requester := pool.requesters[block.Height]
   271  	if requester == nil {
   272  		pool.Logger.Info(
   273  			"peer sent us a block we didn't expect",
   274  			"peer",
   275  			peerID,
   276  			"curHeight",
   277  			pool.height,
   278  			"blockHeight",
   279  			block.Height)
   280  		diff := pool.height - block.Height
   281  		if diff < 0 {
   282  			diff *= -1
   283  		}
   284  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   285  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   286  		}
   287  		return
   288  	}
   289  
   290  	if requester.setBlock(block, msg.ExInfo, peerID) {
   291  		atomic.AddInt32(&pool.numPending, -1)
   292  		peer := pool.peers[peerID]
   293  		if peer != nil {
   294  			peer.decrPending(blockSize)
   295  		}
   296  	} else {
   297  		pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height)
   298  		pool.sendError(errors.New("invalid peer"), peerID)
   299  	}
   300  }
   301  
   302  // MaxPeerHeight returns the highest reported height.
   303  func (pool *BlockPool) MaxPeerHeight() int64 {
   304  	pool.mtx.Lock()
   305  	defer pool.mtx.Unlock()
   306  	return pool.maxPeerHeight
   307  }
   308  
   309  // SetPeerRange sets the peer's alleged blockchain base and height.
   310  func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64, storeHeight int64) bool {
   311  	pool.mtx.Lock()
   312  	defer pool.mtx.Unlock()
   313  
   314  	peer := pool.peers[peerID]
   315  	if peer != nil {
   316  		peer.base = base
   317  		peer.height = height
   318  	} else {
   319  		peer = newBPPeer(pool, peerID, base, height)
   320  		peer.setLogger(pool.Logger.With("peer", peerID))
   321  		pool.peers[peerID] = peer
   322  	}
   323  
   324  	if height > pool.maxPeerHeight {
   325  		pool.maxPeerHeight = height
   326  	}
   327  
   328  	// compute how many peers' height is greater than height
   329  	if !pool.IsRunning() && storeHeight+MaxIntervalForFastSync <= height {
   330  		return true
   331  	}
   332  
   333  	return false
   334  }
   335  
   336  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   337  // with peerID, function is a no-op.
   338  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   339  	pool.mtx.Lock()
   340  	defer pool.mtx.Unlock()
   341  
   342  	pool.removePeer(peerID)
   343  }
   344  
   345  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   346  	for _, requester := range pool.requesters {
   347  		if requester.getPeerID() == peerID {
   348  			requester.redo(peerID)
   349  		}
   350  	}
   351  
   352  	peer, ok := pool.peers[peerID]
   353  	if ok {
   354  		if peer.timeout != nil {
   355  			peer.timeout.Stop()
   356  		}
   357  
   358  		delete(pool.peers, peerID)
   359  
   360  		// Find a new peer with the biggest height and update maxPeerHeight if the
   361  		// peer's height was the biggest.
   362  		if peer.height == pool.maxPeerHeight {
   363  			pool.updateMaxPeerHeight()
   364  		}
   365  	}
   366  }
   367  
   368  // If no peers are left, maxPeerHeight is set to 0.
   369  func (pool *BlockPool) updateMaxPeerHeight() {
   370  	var max int64
   371  	for _, peer := range pool.peers {
   372  		if peer.height > max {
   373  			max = peer.height
   374  		}
   375  	}
   376  	pool.maxPeerHeight = max
   377  }
   378  
   379  // Pick an available peer with the given height available.
   380  // If no peers are available, returns nil.
   381  func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer {
   382  	pool.mtx.Lock()
   383  	defer pool.mtx.Unlock()
   384  
   385  	for _, peer := range pool.peers {
   386  		if peer.didTimeout {
   387  			pool.removePeer(peer.id)
   388  			continue
   389  		}
   390  		if peer.numPending >= maxPendingRequestsPerPeer {
   391  			continue
   392  		}
   393  		if height < peer.base || height > peer.height {
   394  			continue
   395  		}
   396  		peer.incrPending()
   397  		return peer
   398  	}
   399  	return nil
   400  }
   401  
   402  func (pool *BlockPool) makeNextRequester() {
   403  	pool.mtx.Lock()
   404  	defer pool.mtx.Unlock()
   405  
   406  	nextHeight := pool.height + pool.requestersLen()
   407  	if nextHeight > pool.maxPeerHeight {
   408  		return
   409  	}
   410  
   411  	request := newBPRequester(pool, nextHeight)
   412  
   413  	pool.requesters[nextHeight] = request
   414  	atomic.AddInt32(&pool.numPending, 1)
   415  
   416  	err := request.Start()
   417  	if err != nil {
   418  		request.Logger.Error("Error starting request", "err", err)
   419  	}
   420  }
   421  
   422  func (pool *BlockPool) requestersLen() int64 {
   423  	return int64(len(pool.requesters))
   424  }
   425  
   426  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   427  	if !pool.IsRunning() {
   428  		return
   429  	}
   430  	pool.requestsCh <- BlockRequest{height, peerID}
   431  }
   432  
   433  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   434  	if !pool.IsRunning() {
   435  		return
   436  	}
   437  	pool.errorsCh <- peerError{err, peerID}
   438  }
   439  
   440  // for debugging purposes
   441  //nolint:unused
   442  func (pool *BlockPool) debug() string {
   443  	pool.mtx.Lock()
   444  	defer pool.mtx.Unlock()
   445  
   446  	str := ""
   447  	nextHeight := pool.height + pool.requestersLen()
   448  	for h := pool.height; h < nextHeight; h++ {
   449  		if pool.requesters[h] == nil {
   450  			str += fmt.Sprintf("H(%v):X ", h)
   451  		} else {
   452  			str += fmt.Sprintf("H(%v):", h)
   453  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   454  		}
   455  	}
   456  	return str
   457  }
   458  
   459  //-------------------------------------
   460  
   461  type bpPeer struct {
   462  	didTimeout  bool
   463  	numPending  int32
   464  	height      int64
   465  	base        int64
   466  	pool        *BlockPool
   467  	id          p2p.ID
   468  	recvMonitor *flow.Monitor
   469  
   470  	timeout *time.Timer
   471  
   472  	logger log.Logger
   473  }
   474  
   475  func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer {
   476  	peer := &bpPeer{
   477  		pool:       pool,
   478  		id:         peerID,
   479  		base:       base,
   480  		height:     height,
   481  		numPending: 0,
   482  		logger:     log.NewNopLogger(),
   483  	}
   484  	return peer
   485  }
   486  
   487  func (peer *bpPeer) setLogger(l log.Logger) {
   488  	peer.logger = l
   489  }
   490  
   491  func (peer *bpPeer) resetMonitor() {
   492  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   493  	initialValue := float64(minRecvRate) * math.E
   494  	peer.recvMonitor.SetREMA(initialValue)
   495  }
   496  
   497  func (peer *bpPeer) resetTimeout() {
   498  	if peer.timeout == nil {
   499  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   500  	} else {
   501  		peer.timeout.Reset(peerTimeout)
   502  	}
   503  }
   504  
   505  func (peer *bpPeer) incrPending() {
   506  	if peer.numPending == 0 {
   507  		peer.resetMonitor()
   508  		peer.resetTimeout()
   509  	}
   510  	peer.numPending++
   511  }
   512  
   513  func (peer *bpPeer) decrPending(recvSize int) {
   514  	peer.numPending--
   515  	if peer.numPending == 0 {
   516  		peer.timeout.Stop()
   517  	} else {
   518  		peer.recvMonitor.Update(recvSize)
   519  		peer.resetTimeout()
   520  	}
   521  }
   522  
   523  func (peer *bpPeer) onTimeout() {
   524  	peer.pool.mtx.Lock()
   525  	defer peer.pool.mtx.Unlock()
   526  
   527  	err := errors.New("peer did not send us anything")
   528  	peer.pool.sendError(err, peer.id)
   529  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   530  	peer.didTimeout = true
   531  }
   532  
   533  //-------------------------------------
   534  
   535  type bpRequester struct {
   536  	service.BaseService
   537  	pool       *BlockPool
   538  	height     int64
   539  	gotBlockCh chan struct{}
   540  	redoCh     chan p2p.ID //redo may send multitime, add peerId to identify repeat
   541  
   542  	mtx        sync.Mutex
   543  	peerID     p2p.ID
   544  	block      *types.Block
   545  	blockParts *types.PartSet
   546  }
   547  
   548  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   549  	bpr := &bpRequester{
   550  		pool:       pool,
   551  		height:     height,
   552  		gotBlockCh: make(chan struct{}, 1),
   553  		redoCh:     make(chan p2p.ID, 1),
   554  
   555  		peerID: "",
   556  		block:  nil,
   557  	}
   558  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   559  	return bpr
   560  }
   561  
   562  func (bpr *bpRequester) OnStart() error {
   563  	go bpr.requestRoutine()
   564  	return nil
   565  }
   566  
   567  // Returns true if the peer matches and block doesn't already exist.
   568  func (bpr *bpRequester) setBlock(block *types.Block, exInfo *types.BlockExInfo, peerID p2p.ID) bool {
   569  	bpr.mtx.Lock()
   570  	if bpr.block != nil || bpr.peerID != peerID {
   571  		bpr.mtx.Unlock()
   572  		return false
   573  	}
   574  	bpr.block = block
   575  	bpr.blockParts = block.MakePartSetByExInfo(exInfo)
   576  
   577  	bpr.mtx.Unlock()
   578  
   579  	select {
   580  	case bpr.gotBlockCh <- struct{}{}:
   581  	default:
   582  	}
   583  	return true
   584  }
   585  
   586  func (bpr *bpRequester) getBlock() (*types.Block, *types.PartSet) {
   587  	bpr.mtx.Lock()
   588  	defer bpr.mtx.Unlock()
   589  	return bpr.block, bpr.blockParts
   590  }
   591  
   592  func (bpr *bpRequester) getPeerID() p2p.ID {
   593  	bpr.mtx.Lock()
   594  	defer bpr.mtx.Unlock()
   595  	return bpr.peerID
   596  }
   597  
   598  // This is called from the requestRoutine, upon redo().
   599  func (bpr *bpRequester) reset() {
   600  	bpr.mtx.Lock()
   601  	defer bpr.mtx.Unlock()
   602  
   603  	if bpr.block != nil {
   604  		atomic.AddInt32(&bpr.pool.numPending, 1)
   605  	}
   606  
   607  	bpr.peerID = ""
   608  	bpr.block = nil
   609  }
   610  
   611  // Tells bpRequester to pick another peer and try again.
   612  // NOTE: Nonblocking, and does nothing if another redo
   613  // was already requested.
   614  func (bpr *bpRequester) redo(peerID p2p.ID) {
   615  	select {
   616  	case bpr.redoCh <- peerID:
   617  	default:
   618  	}
   619  }
   620  
   621  // Responsible for making more requests as necessary
   622  // Returns only when a block is found (e.g. AddBlock() is called)
   623  func (bpr *bpRequester) requestRoutine() {
   624  OUTER_LOOP:
   625  	for {
   626  		// Pick a peer to send request to.
   627  		var peer *bpPeer
   628  	PICK_PEER_LOOP:
   629  		for {
   630  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   631  				return
   632  			}
   633  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   634  			if peer == nil {
   635  				//log.Info("No peers available", "height", height)
   636  				time.Sleep(requestIntervalMS * time.Millisecond)
   637  				continue PICK_PEER_LOOP
   638  			}
   639  			break PICK_PEER_LOOP
   640  		}
   641  		bpr.mtx.Lock()
   642  		bpr.peerID = peer.id
   643  		bpr.mtx.Unlock()
   644  
   645  		// Send request and wait.
   646  		bpr.pool.sendRequest(bpr.height, peer.id)
   647  	WAIT_LOOP:
   648  		for {
   649  			select {
   650  			case <-bpr.pool.Quit():
   651  				bpr.Stop()
   652  				return
   653  			case <-bpr.Quit():
   654  				return
   655  			case peerID := <-bpr.redoCh:
   656  				if peerID == bpr.peerID {
   657  					bpr.reset()
   658  					continue OUTER_LOOP
   659  				} else {
   660  					continue WAIT_LOOP
   661  				}
   662  			case <-bpr.gotBlockCh:
   663  				// We got a block!
   664  				// Continue the for-loop and wait til Quit.
   665  				continue WAIT_LOOP
   666  			}
   667  		}
   668  	}
   669  }
   670  
   671  // BlockRequest stores a block request identified by the block Height and the PeerID responsible for
   672  // delivering the block
   673  type BlockRequest struct {
   674  	Height int64
   675  	PeerID p2p.ID
   676  }