github.com/DFWallet/tendermint-cosmos@v0.0.2/blockchain/v0/pool.go (about)

     1  package v0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	flow "github.com/DFWallet/tendermint-cosmos/libs/flowrate"
    11  	"github.com/DFWallet/tendermint-cosmos/libs/log"
    12  	"github.com/DFWallet/tendermint-cosmos/libs/service"
    13  	tmsync "github.com/DFWallet/tendermint-cosmos/libs/sync"
    14  	"github.com/DFWallet/tendermint-cosmos/p2p"
    15  	"github.com/DFWallet/tendermint-cosmos/types"
    16  )
    17  
    18  /*
    19  eg, L = latency = 0.1s
    20  	P = num peers = 10
    21  	FN = num full nodes
    22  	BS = 1kB block size
    23  	CB = 1 Mbit/s = 128 kB/s
    24  	CB/P = 12.8 kB
    25  	B/S = CB/P/BS = 12.8 blocks/s
    26  
    27  	12.8 * 0.1 = 1.28 blocks on conn
    28  */
    29  
    30  const (
    31  	requestIntervalMS         = 2
    32  	maxTotalRequesters        = 600
    33  	maxPendingRequests        = maxTotalRequesters
    34  	maxPendingRequestsPerPeer = 20
    35  
    36  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    37  	// enough. If a peer is not sending us data at at least that rate, we
    38  	// consider them to have timedout and we disconnect.
    39  	//
    40  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    41  	// sending data across atlantic ~ 7.5 KB/s.
    42  	minRecvRate = 7680
    43  
    44  	// Maximum difference between current and new block's height.
    45  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    46  )
    47  
    48  var peerTimeout = 15 * time.Second // not const so we can override with tests
    49  
    50  /*
    51  	Peers self report their heights when we join the block pool.
    52  	Starting from our latest pool.height, we request blocks
    53  	in sequence from peers that reported higher heights than ours.
    54  	Every so often we ask peers what height they're on so we can keep going.
    55  
    56  	Requests are continuously made for blocks of higher heights until
    57  	the limit is reached. If most of the requests have no available peers, and we
    58  	are not at peer limits, we can probably switch to consensus reactor
    59  */
    60  
    61  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    62  type BlockPool struct {
    63  	service.BaseService
    64  	startTime time.Time
    65  
    66  	mtx tmsync.Mutex
    67  	// block requests
    68  	requesters map[int64]*bpRequester
    69  	height     int64 // the lowest key in requesters.
    70  	// peers
    71  	peers         map[p2p.ID]*bpPeer
    72  	maxPeerHeight int64 // the biggest reported height
    73  
    74  	// atomic
    75  	numPending int32 // number of requests pending assignment or block response
    76  
    77  	requestsCh chan<- BlockRequest
    78  	errorsCh   chan<- peerError
    79  }
    80  
    81  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    82  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    83  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    84  	bp := &BlockPool{
    85  		peers: make(map[p2p.ID]*bpPeer),
    86  
    87  		requesters: make(map[int64]*bpRequester),
    88  		height:     start,
    89  		numPending: 0,
    90  
    91  		requestsCh: requestsCh,
    92  		errorsCh:   errorsCh,
    93  	}
    94  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
    95  	return bp
    96  }
    97  
    98  // OnStart implements service.Service by spawning requesters routine and recording
    99  // pool's start time.
   100  func (pool *BlockPool) OnStart() error {
   101  	go pool.makeRequestersRoutine()
   102  	pool.startTime = time.Now()
   103  	return nil
   104  }
   105  
   106  // spawns requesters as needed
   107  func (pool *BlockPool) makeRequestersRoutine() {
   108  	for {
   109  		if !pool.IsRunning() {
   110  			break
   111  		}
   112  
   113  		_, numPending, lenRequesters := pool.GetStatus()
   114  		switch {
   115  		case numPending >= maxPendingRequests:
   116  			// sleep for a bit.
   117  			time.Sleep(requestIntervalMS * time.Millisecond)
   118  			// check for timed out peers
   119  			pool.removeTimedoutPeers()
   120  		case lenRequesters >= maxTotalRequesters:
   121  			// sleep for a bit.
   122  			time.Sleep(requestIntervalMS * time.Millisecond)
   123  			// check for timed out peers
   124  			pool.removeTimedoutPeers()
   125  		default:
   126  			// request for more blocks.
   127  			pool.makeNextRequester()
   128  		}
   129  	}
   130  }
   131  
   132  func (pool *BlockPool) removeTimedoutPeers() {
   133  	pool.mtx.Lock()
   134  	defer pool.mtx.Unlock()
   135  
   136  	for _, peer := range pool.peers {
   137  		if !peer.didTimeout && peer.numPending > 0 {
   138  			curRate := peer.recvMonitor.Status().CurRate
   139  			// curRate can be 0 on start
   140  			if curRate != 0 && curRate < minRecvRate {
   141  				err := errors.New("peer is not sending us data fast enough")
   142  				pool.sendError(err, peer.id)
   143  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   144  					"reason", err,
   145  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   146  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   147  				peer.didTimeout = true
   148  			}
   149  		}
   150  		if peer.didTimeout {
   151  			pool.removePeer(peer.id)
   152  		}
   153  	}
   154  }
   155  
   156  // GetStatus returns pool's height, numPending requests and the number of
   157  // requesters.
   158  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   159  	pool.mtx.Lock()
   160  	defer pool.mtx.Unlock()
   161  
   162  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   163  }
   164  
   165  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   166  // TODO: relax conditions, prevent abuse.
   167  func (pool *BlockPool) IsCaughtUp() bool {
   168  	pool.mtx.Lock()
   169  	defer pool.mtx.Unlock()
   170  
   171  	// Need at least 1 peer to be considered caught up.
   172  	if len(pool.peers) == 0 {
   173  		pool.Logger.Debug("Blockpool has no peers")
   174  		return false
   175  	}
   176  
   177  	// Some conditions to determine if we're caught up.
   178  	// Ensures we've either received a block or waited some amount of time,
   179  	// and that we're synced to the highest known height.
   180  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   181  	// to verify the LastCommit.
   182  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   183  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   184  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   185  	return isCaughtUp
   186  }
   187  
   188  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   189  // We need to see the second block's Commit to validate the first block.
   190  // So we peek two blocks at a time.
   191  // The caller will verify the commit.
   192  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   193  	pool.mtx.Lock()
   194  	defer pool.mtx.Unlock()
   195  
   196  	if r := pool.requesters[pool.height]; r != nil {
   197  		first = r.getBlock()
   198  	}
   199  	if r := pool.requesters[pool.height+1]; r != nil {
   200  		second = r.getBlock()
   201  	}
   202  	return
   203  }
   204  
   205  // PopRequest pops the first block at pool.height.
   206  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   207  func (pool *BlockPool) PopRequest() {
   208  	pool.mtx.Lock()
   209  	defer pool.mtx.Unlock()
   210  
   211  	if r := pool.requesters[pool.height]; r != nil {
   212  		/*  The block can disappear at any time, due to removePeer().
   213  		if r := pool.requesters[pool.height]; r == nil || r.block == nil {
   214  			PanicSanity("PopRequest() requires a valid block")
   215  		}
   216  		*/
   217  		if err := r.Stop(); err != nil {
   218  			pool.Logger.Error("Error stopping requester", "err", err)
   219  		}
   220  		delete(pool.requesters, pool.height)
   221  		pool.height++
   222  	} else {
   223  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   224  	}
   225  }
   226  
   227  // RedoRequest invalidates the block at pool.height,
   228  // Remove the peer and redo request from others.
   229  // Returns the ID of the removed peer.
   230  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   231  	pool.mtx.Lock()
   232  	defer pool.mtx.Unlock()
   233  
   234  	request := pool.requesters[height]
   235  	peerID := request.getPeerID()
   236  	if peerID != p2p.ID("") {
   237  		// RemovePeer will redo all requesters associated with this peer.
   238  		pool.removePeer(peerID)
   239  	}
   240  	return peerID
   241  }
   242  
   243  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   244  // TODO: ensure that blocks come in order for each peer.
   245  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) {
   246  	pool.mtx.Lock()
   247  	defer pool.mtx.Unlock()
   248  
   249  	requester := pool.requesters[block.Height]
   250  	if requester == nil {
   251  		pool.Logger.Info(
   252  			"peer sent us a block we didn't expect",
   253  			"peer",
   254  			peerID,
   255  			"curHeight",
   256  			pool.height,
   257  			"blockHeight",
   258  			block.Height)
   259  		diff := pool.height - block.Height
   260  		if diff < 0 {
   261  			diff *= -1
   262  		}
   263  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   264  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   265  		}
   266  		return
   267  	}
   268  
   269  	if requester.setBlock(block, peerID) {
   270  		atomic.AddInt32(&pool.numPending, -1)
   271  		peer := pool.peers[peerID]
   272  		if peer != nil {
   273  			peer.decrPending(blockSize)
   274  		}
   275  	} else {
   276  		pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height)
   277  		pool.sendError(errors.New("invalid peer"), peerID)
   278  	}
   279  }
   280  
   281  // MaxPeerHeight returns the highest reported height.
   282  func (pool *BlockPool) MaxPeerHeight() int64 {
   283  	pool.mtx.Lock()
   284  	defer pool.mtx.Unlock()
   285  	return pool.maxPeerHeight
   286  }
   287  
   288  // SetPeerRange sets the peer's alleged blockchain base and height.
   289  func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) {
   290  	pool.mtx.Lock()
   291  	defer pool.mtx.Unlock()
   292  
   293  	peer := pool.peers[peerID]
   294  	if peer != nil {
   295  		peer.base = base
   296  		peer.height = height
   297  	} else {
   298  		peer = newBPPeer(pool, peerID, base, height)
   299  		peer.setLogger(pool.Logger.With("peer", peerID))
   300  		pool.peers[peerID] = peer
   301  	}
   302  
   303  	if height > pool.maxPeerHeight {
   304  		pool.maxPeerHeight = height
   305  	}
   306  }
   307  
   308  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   309  // with peerID, function is a no-op.
   310  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   311  	pool.mtx.Lock()
   312  	defer pool.mtx.Unlock()
   313  
   314  	pool.removePeer(peerID)
   315  }
   316  
   317  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   318  	for _, requester := range pool.requesters {
   319  		if requester.getPeerID() == peerID {
   320  			requester.redo(peerID)
   321  		}
   322  	}
   323  
   324  	peer, ok := pool.peers[peerID]
   325  	if ok {
   326  		if peer.timeout != nil {
   327  			peer.timeout.Stop()
   328  		}
   329  
   330  		delete(pool.peers, peerID)
   331  
   332  		// Find a new peer with the biggest height and update maxPeerHeight if the
   333  		// peer's height was the biggest.
   334  		if peer.height == pool.maxPeerHeight {
   335  			pool.updateMaxPeerHeight()
   336  		}
   337  	}
   338  }
   339  
   340  // If no peers are left, maxPeerHeight is set to 0.
   341  func (pool *BlockPool) updateMaxPeerHeight() {
   342  	var max int64
   343  	for _, peer := range pool.peers {
   344  		if peer.height > max {
   345  			max = peer.height
   346  		}
   347  	}
   348  	pool.maxPeerHeight = max
   349  }
   350  
   351  // Pick an available peer with the given height available.
   352  // If no peers are available, returns nil.
   353  func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer {
   354  	pool.mtx.Lock()
   355  	defer pool.mtx.Unlock()
   356  
   357  	for _, peer := range pool.peers {
   358  		if peer.didTimeout {
   359  			pool.removePeer(peer.id)
   360  			continue
   361  		}
   362  		if peer.numPending >= maxPendingRequestsPerPeer {
   363  			continue
   364  		}
   365  		if height < peer.base || height > peer.height {
   366  			continue
   367  		}
   368  		peer.incrPending()
   369  		return peer
   370  	}
   371  	return nil
   372  }
   373  
   374  func (pool *BlockPool) makeNextRequester() {
   375  	pool.mtx.Lock()
   376  	defer pool.mtx.Unlock()
   377  
   378  	nextHeight := pool.height + pool.requestersLen()
   379  	if nextHeight > pool.maxPeerHeight {
   380  		return
   381  	}
   382  
   383  	request := newBPRequester(pool, nextHeight)
   384  
   385  	pool.requesters[nextHeight] = request
   386  	atomic.AddInt32(&pool.numPending, 1)
   387  
   388  	err := request.Start()
   389  	if err != nil {
   390  		request.Logger.Error("Error starting request", "err", err)
   391  	}
   392  }
   393  
   394  func (pool *BlockPool) requestersLen() int64 {
   395  	return int64(len(pool.requesters))
   396  }
   397  
   398  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   399  	if !pool.IsRunning() {
   400  		return
   401  	}
   402  	pool.requestsCh <- BlockRequest{height, peerID}
   403  }
   404  
   405  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   406  	if !pool.IsRunning() {
   407  		return
   408  	}
   409  	pool.errorsCh <- peerError{err, peerID}
   410  }
   411  
   412  // for debugging purposes
   413  //nolint:unused
   414  func (pool *BlockPool) debug() string {
   415  	pool.mtx.Lock()
   416  	defer pool.mtx.Unlock()
   417  
   418  	str := ""
   419  	nextHeight := pool.height + pool.requestersLen()
   420  	for h := pool.height; h < nextHeight; h++ {
   421  		if pool.requesters[h] == nil {
   422  			str += fmt.Sprintf("H(%v):X ", h)
   423  		} else {
   424  			str += fmt.Sprintf("H(%v):", h)
   425  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   426  		}
   427  	}
   428  	return str
   429  }
   430  
   431  //-------------------------------------
   432  
   433  type bpPeer struct {
   434  	didTimeout  bool
   435  	numPending  int32
   436  	height      int64
   437  	base        int64
   438  	pool        *BlockPool
   439  	id          p2p.ID
   440  	recvMonitor *flow.Monitor
   441  
   442  	timeout *time.Timer
   443  
   444  	logger log.Logger
   445  }
   446  
   447  func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer {
   448  	peer := &bpPeer{
   449  		pool:       pool,
   450  		id:         peerID,
   451  		base:       base,
   452  		height:     height,
   453  		numPending: 0,
   454  		logger:     log.NewNopLogger(),
   455  	}
   456  	return peer
   457  }
   458  
   459  func (peer *bpPeer) setLogger(l log.Logger) {
   460  	peer.logger = l
   461  }
   462  
   463  func (peer *bpPeer) resetMonitor() {
   464  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   465  	initialValue := float64(minRecvRate) * math.E
   466  	peer.recvMonitor.SetREMA(initialValue)
   467  }
   468  
   469  func (peer *bpPeer) resetTimeout() {
   470  	if peer.timeout == nil {
   471  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   472  	} else {
   473  		peer.timeout.Reset(peerTimeout)
   474  	}
   475  }
   476  
   477  func (peer *bpPeer) incrPending() {
   478  	if peer.numPending == 0 {
   479  		peer.resetMonitor()
   480  		peer.resetTimeout()
   481  	}
   482  	peer.numPending++
   483  }
   484  
   485  func (peer *bpPeer) decrPending(recvSize int) {
   486  	peer.numPending--
   487  	if peer.numPending == 0 {
   488  		peer.timeout.Stop()
   489  	} else {
   490  		peer.recvMonitor.Update(recvSize)
   491  		peer.resetTimeout()
   492  	}
   493  }
   494  
   495  func (peer *bpPeer) onTimeout() {
   496  	peer.pool.mtx.Lock()
   497  	defer peer.pool.mtx.Unlock()
   498  
   499  	err := errors.New("peer did not send us anything")
   500  	peer.pool.sendError(err, peer.id)
   501  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   502  	peer.didTimeout = true
   503  }
   504  
   505  //-------------------------------------
   506  
   507  type bpRequester struct {
   508  	service.BaseService
   509  	pool       *BlockPool
   510  	height     int64
   511  	gotBlockCh chan struct{}
   512  	redoCh     chan p2p.ID // redo may send multitime, add peerId to identify repeat
   513  
   514  	mtx    tmsync.Mutex
   515  	peerID p2p.ID
   516  	block  *types.Block
   517  }
   518  
   519  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   520  	bpr := &bpRequester{
   521  		pool:       pool,
   522  		height:     height,
   523  		gotBlockCh: make(chan struct{}, 1),
   524  		redoCh:     make(chan p2p.ID, 1),
   525  
   526  		peerID: "",
   527  		block:  nil,
   528  	}
   529  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   530  	return bpr
   531  }
   532  
   533  func (bpr *bpRequester) OnStart() error {
   534  	go bpr.requestRoutine()
   535  	return nil
   536  }
   537  
   538  // Returns true if the peer matches and block doesn't already exist.
   539  func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool {
   540  	bpr.mtx.Lock()
   541  	if bpr.block != nil || bpr.peerID != peerID {
   542  		bpr.mtx.Unlock()
   543  		return false
   544  	}
   545  	bpr.block = block
   546  	bpr.mtx.Unlock()
   547  
   548  	select {
   549  	case bpr.gotBlockCh <- struct{}{}:
   550  	default:
   551  	}
   552  	return true
   553  }
   554  
   555  func (bpr *bpRequester) getBlock() *types.Block {
   556  	bpr.mtx.Lock()
   557  	defer bpr.mtx.Unlock()
   558  	return bpr.block
   559  }
   560  
   561  func (bpr *bpRequester) getPeerID() p2p.ID {
   562  	bpr.mtx.Lock()
   563  	defer bpr.mtx.Unlock()
   564  	return bpr.peerID
   565  }
   566  
   567  // This is called from the requestRoutine, upon redo().
   568  func (bpr *bpRequester) reset() {
   569  	bpr.mtx.Lock()
   570  	defer bpr.mtx.Unlock()
   571  
   572  	if bpr.block != nil {
   573  		atomic.AddInt32(&bpr.pool.numPending, 1)
   574  	}
   575  
   576  	bpr.peerID = ""
   577  	bpr.block = nil
   578  }
   579  
   580  // Tells bpRequester to pick another peer and try again.
   581  // NOTE: Nonblocking, and does nothing if another redo
   582  // was already requested.
   583  func (bpr *bpRequester) redo(peerID p2p.ID) {
   584  	select {
   585  	case bpr.redoCh <- peerID:
   586  	default:
   587  	}
   588  }
   589  
   590  // Responsible for making more requests as necessary
   591  // Returns only when a block is found (e.g. AddBlock() is called)
   592  func (bpr *bpRequester) requestRoutine() {
   593  OUTER_LOOP:
   594  	for {
   595  		// Pick a peer to send request to.
   596  		var peer *bpPeer
   597  	PICK_PEER_LOOP:
   598  		for {
   599  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   600  				return
   601  			}
   602  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   603  			if peer == nil {
   604  				// log.Info("No peers available", "height", height)
   605  				time.Sleep(requestIntervalMS * time.Millisecond)
   606  				continue PICK_PEER_LOOP
   607  			}
   608  			break PICK_PEER_LOOP
   609  		}
   610  		bpr.mtx.Lock()
   611  		bpr.peerID = peer.id
   612  		bpr.mtx.Unlock()
   613  
   614  		// Send request and wait.
   615  		bpr.pool.sendRequest(bpr.height, peer.id)
   616  	WAIT_LOOP:
   617  		for {
   618  			select {
   619  			case <-bpr.pool.Quit():
   620  				if err := bpr.Stop(); err != nil {
   621  					bpr.Logger.Error("Error stopped requester", "err", err)
   622  				}
   623  				return
   624  			case <-bpr.Quit():
   625  				return
   626  			case peerID := <-bpr.redoCh:
   627  				if peerID == bpr.peerID {
   628  					bpr.reset()
   629  					continue OUTER_LOOP
   630  				} else {
   631  					continue WAIT_LOOP
   632  				}
   633  			case <-bpr.gotBlockCh:
   634  				// We got a block!
   635  				// Continue the for-loop and wait til Quit.
   636  				continue WAIT_LOOP
   637  			}
   638  		}
   639  	}
   640  }
   641  
   642  // BlockRequest stores a block request identified by the block Height and the PeerID responsible for
   643  // delivering the block
   644  type BlockRequest struct {
   645  	Height int64
   646  	PeerID p2p.ID
   647  }