github.com/evdatsion/aphelion-dpos-bft@v0.32.1/blockchain/pool.go (about)

     1  package blockchain
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common"
    12  	flow "github.com/evdatsion/aphelion-dpos-bft/libs/flowrate"
    13  	"github.com/evdatsion/aphelion-dpos-bft/libs/log"
    14  
    15  	"github.com/evdatsion/aphelion-dpos-bft/p2p"
    16  	"github.com/evdatsion/aphelion-dpos-bft/types"
    17  )
    18  
    19  /*
    20  eg, L = latency = 0.1s
    21  	P = num peers = 10
    22  	FN = num full nodes
    23  	BS = 1kB block size
    24  	CB = 1 Mbit/s = 128 kB/s
    25  	CB/P = 12.8 kB
    26  	B/S = CB/P/BS = 12.8 blocks/s
    27  
    28  	12.8 * 0.1 = 1.28 blocks on conn
    29  */
    30  
    31  const (
    32  	requestIntervalMS         = 2
    33  	maxTotalRequesters        = 600
    34  	maxPendingRequests        = maxTotalRequesters
    35  	maxPendingRequestsPerPeer = 20
    36  
    37  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    38  	// enough. If a peer is not sending us data at at least that rate, we
    39  	// consider them to have timedout and we disconnect.
    40  	//
    41  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    42  	// sending data across atlantic ~ 7.5 KB/s.
    43  	minRecvRate = 7680
    44  
    45  	// Maximum difference between current and new block's height.
    46  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    47  )
    48  
    49  var peerTimeout = 15 * time.Second // not const so we can override with tests
    50  
    51  /*
    52  	Peers self report their heights when we join the block pool.
    53  	Starting from our latest pool.height, we request blocks
    54  	in sequence from peers that reported higher heights than ours.
    55  	Every so often we ask peers what height they're on so we can keep going.
    56  
    57  	Requests are continuously made for blocks of higher heights until
    58  	the limit is reached. If most of the requests have no available peers, and we
    59  	are not at peer limits, we can probably switch to consensus reactor
    60  */
    61  
    62  type BlockPool struct {
    63  	cmn.BaseService
    64  	startTime time.Time
    65  
    66  	mtx sync.Mutex
    67  	// block requests
    68  	requesters map[int64]*bpRequester
    69  	height     int64 // the lowest key in requesters.
    70  	// peers
    71  	peers         map[p2p.ID]*bpPeer
    72  	maxPeerHeight int64 // the biggest reported height
    73  
    74  	// atomic
    75  	numPending int32 // number of requests pending assignment or block response
    76  
    77  	requestsCh chan<- BlockRequest
    78  	errorsCh   chan<- peerError
    79  }
    80  
    81  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    82  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    83  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    84  	bp := &BlockPool{
    85  		peers: make(map[p2p.ID]*bpPeer),
    86  
    87  		requesters: make(map[int64]*bpRequester),
    88  		height:     start,
    89  		numPending: 0,
    90  
    91  		requestsCh: requestsCh,
    92  		errorsCh:   errorsCh,
    93  	}
    94  	bp.BaseService = *cmn.NewBaseService(nil, "BlockPool", bp)
    95  	return bp
    96  }
    97  
    98  // OnStart implements cmn.Service by spawning requesters routine and recording
    99  // pool's start time.
   100  func (pool *BlockPool) OnStart() error {
   101  	go pool.makeRequestersRoutine()
   102  	pool.startTime = time.Now()
   103  	return nil
   104  }
   105  
   106  // spawns requesters as needed
   107  func (pool *BlockPool) makeRequestersRoutine() {
   108  	for {
   109  		if !pool.IsRunning() {
   110  			break
   111  		}
   112  
   113  		_, numPending, lenRequesters := pool.GetStatus()
   114  		if numPending >= maxPendingRequests {
   115  			// sleep for a bit.
   116  			time.Sleep(requestIntervalMS * time.Millisecond)
   117  			// check for timed out peers
   118  			pool.removeTimedoutPeers()
   119  		} else if lenRequesters >= maxTotalRequesters {
   120  			// sleep for a bit.
   121  			time.Sleep(requestIntervalMS * time.Millisecond)
   122  			// check for timed out peers
   123  			pool.removeTimedoutPeers()
   124  		} else {
   125  			// request for more blocks.
   126  			pool.makeNextRequester()
   127  		}
   128  	}
   129  }
   130  
   131  func (pool *BlockPool) removeTimedoutPeers() {
   132  	pool.mtx.Lock()
   133  	defer pool.mtx.Unlock()
   134  
   135  	for _, peer := range pool.peers {
   136  		if !peer.didTimeout && peer.numPending > 0 {
   137  			curRate := peer.recvMonitor.Status().CurRate
   138  			// curRate can be 0 on start
   139  			if curRate != 0 && curRate < minRecvRate {
   140  				err := errors.New("peer is not sending us data fast enough")
   141  				pool.sendError(err, peer.id)
   142  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   143  					"reason", err,
   144  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   145  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   146  				peer.didTimeout = true
   147  			}
   148  		}
   149  		if peer.didTimeout {
   150  			pool.removePeer(peer.id)
   151  		}
   152  	}
   153  }
   154  
   155  // GetStatus returns pool's height, numPending requests and the number of
   156  // requesters.
   157  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   158  	pool.mtx.Lock()
   159  	defer pool.mtx.Unlock()
   160  
   161  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   162  }
   163  
   164  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   165  // TODO: relax conditions, prevent abuse.
   166  func (pool *BlockPool) IsCaughtUp() bool {
   167  	pool.mtx.Lock()
   168  	defer pool.mtx.Unlock()
   169  
   170  	// Need at least 1 peer to be considered caught up.
   171  	if len(pool.peers) == 0 {
   172  		pool.Logger.Debug("Blockpool has no peers")
   173  		return false
   174  	}
   175  
   176  	// Some conditions to determine if we're caught up.
   177  	// Ensures we've either received a block or waited some amount of time,
   178  	// and that we're synced to the highest known height.
   179  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   180  	// to verify the LastCommit.
   181  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   182  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   183  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   184  	return isCaughtUp
   185  }
   186  
   187  // We need to see the second block's Commit to validate the first block.
   188  // So we peek two blocks at a time.
   189  // The caller will verify the commit.
   190  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   191  	pool.mtx.Lock()
   192  	defer pool.mtx.Unlock()
   193  
   194  	if r := pool.requesters[pool.height]; r != nil {
   195  		first = r.getBlock()
   196  	}
   197  	if r := pool.requesters[pool.height+1]; r != nil {
   198  		second = r.getBlock()
   199  	}
   200  	return
   201  }
   202  
   203  // Pop the first block at pool.height
   204  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   205  func (pool *BlockPool) PopRequest() {
   206  	pool.mtx.Lock()
   207  	defer pool.mtx.Unlock()
   208  
   209  	if r := pool.requesters[pool.height]; r != nil {
   210  		/*  The block can disappear at any time, due to removePeer().
   211  		if r := pool.requesters[pool.height]; r == nil || r.block == nil {
   212  			PanicSanity("PopRequest() requires a valid block")
   213  		}
   214  		*/
   215  		r.Stop()
   216  		delete(pool.requesters, pool.height)
   217  		pool.height++
   218  	} else {
   219  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   220  	}
   221  }
   222  
   223  // Invalidates the block at pool.height,
   224  // Remove the peer and redo request from others.
   225  // Returns the ID of the removed peer.
   226  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   227  	pool.mtx.Lock()
   228  	defer pool.mtx.Unlock()
   229  
   230  	request := pool.requesters[height]
   231  	peerID := request.getPeerID()
   232  	if peerID != p2p.ID("") {
   233  		// RemovePeer will redo all requesters associated with this peer.
   234  		pool.removePeer(peerID)
   235  	}
   236  	return peerID
   237  }
   238  
   239  // TODO: ensure that blocks come in order for each peer.
   240  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) {
   241  	pool.mtx.Lock()
   242  	defer pool.mtx.Unlock()
   243  
   244  	requester := pool.requesters[block.Height]
   245  	if requester == nil {
   246  		pool.Logger.Info("peer sent us a block we didn't expect", "peer", peerID, "curHeight", pool.height, "blockHeight", block.Height)
   247  		diff := pool.height - block.Height
   248  		if diff < 0 {
   249  			diff *= -1
   250  		}
   251  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   252  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   253  		}
   254  		return
   255  	}
   256  
   257  	if requester.setBlock(block, peerID) {
   258  		atomic.AddInt32(&pool.numPending, -1)
   259  		peer := pool.peers[peerID]
   260  		if peer != nil {
   261  			peer.decrPending(blockSize)
   262  		}
   263  	} else {
   264  		pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height)
   265  		pool.sendError(errors.New("invalid peer"), peerID)
   266  	}
   267  }
   268  
   269  // MaxPeerHeight returns the highest reported height.
   270  func (pool *BlockPool) MaxPeerHeight() int64 {
   271  	pool.mtx.Lock()
   272  	defer pool.mtx.Unlock()
   273  	return pool.maxPeerHeight
   274  }
   275  
   276  // SetPeerHeight sets the peer's alleged blockchain height.
   277  func (pool *BlockPool) SetPeerHeight(peerID p2p.ID, height int64) {
   278  	pool.mtx.Lock()
   279  	defer pool.mtx.Unlock()
   280  
   281  	peer := pool.peers[peerID]
   282  	if peer != nil {
   283  		peer.height = height
   284  	} else {
   285  		peer = newBPPeer(pool, peerID, height)
   286  		peer.setLogger(pool.Logger.With("peer", peerID))
   287  		pool.peers[peerID] = peer
   288  	}
   289  
   290  	if height > pool.maxPeerHeight {
   291  		pool.maxPeerHeight = height
   292  	}
   293  }
   294  
   295  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   296  // with peerID, function is a no-op.
   297  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   298  	pool.mtx.Lock()
   299  	defer pool.mtx.Unlock()
   300  
   301  	pool.removePeer(peerID)
   302  }
   303  
   304  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   305  	for _, requester := range pool.requesters {
   306  		if requester.getPeerID() == peerID {
   307  			requester.redo(peerID)
   308  		}
   309  	}
   310  
   311  	peer, ok := pool.peers[peerID]
   312  	if ok {
   313  		if peer.timeout != nil {
   314  			peer.timeout.Stop()
   315  		}
   316  
   317  		delete(pool.peers, peerID)
   318  
   319  		// Find a new peer with the biggest height and update maxPeerHeight if the
   320  		// peer's height was the biggest.
   321  		if peer.height == pool.maxPeerHeight {
   322  			pool.updateMaxPeerHeight()
   323  		}
   324  	}
   325  }
   326  
   327  // If no peers are left, maxPeerHeight is set to 0.
   328  func (pool *BlockPool) updateMaxPeerHeight() {
   329  	var max int64
   330  	for _, peer := range pool.peers {
   331  		if peer.height > max {
   332  			max = peer.height
   333  		}
   334  	}
   335  	pool.maxPeerHeight = max
   336  }
   337  
   338  // Pick an available peer with at least the given minHeight.
   339  // If no peers are available, returns nil.
   340  func (pool *BlockPool) pickIncrAvailablePeer(minHeight int64) *bpPeer {
   341  	pool.mtx.Lock()
   342  	defer pool.mtx.Unlock()
   343  
   344  	for _, peer := range pool.peers {
   345  		if peer.didTimeout {
   346  			pool.removePeer(peer.id)
   347  			continue
   348  		}
   349  		if peer.numPending >= maxPendingRequestsPerPeer {
   350  			continue
   351  		}
   352  		if peer.height < minHeight {
   353  			continue
   354  		}
   355  		peer.incrPending()
   356  		return peer
   357  	}
   358  	return nil
   359  }
   360  
   361  func (pool *BlockPool) makeNextRequester() {
   362  	pool.mtx.Lock()
   363  	defer pool.mtx.Unlock()
   364  
   365  	nextHeight := pool.height + pool.requestersLen()
   366  	if nextHeight > pool.maxPeerHeight {
   367  		return
   368  	}
   369  
   370  	request := newBPRequester(pool, nextHeight)
   371  
   372  	pool.requesters[nextHeight] = request
   373  	atomic.AddInt32(&pool.numPending, 1)
   374  
   375  	err := request.Start()
   376  	if err != nil {
   377  		request.Logger.Error("Error starting request", "err", err)
   378  	}
   379  }
   380  
   381  func (pool *BlockPool) requestersLen() int64 {
   382  	return int64(len(pool.requesters))
   383  }
   384  
   385  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   386  	if !pool.IsRunning() {
   387  		return
   388  	}
   389  	pool.requestsCh <- BlockRequest{height, peerID}
   390  }
   391  
   392  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   393  	if !pool.IsRunning() {
   394  		return
   395  	}
   396  	pool.errorsCh <- peerError{err, peerID}
   397  }
   398  
   399  // for debugging purposes
   400  //nolint:unused
   401  func (pool *BlockPool) debug() string {
   402  	pool.mtx.Lock()
   403  	defer pool.mtx.Unlock()
   404  
   405  	str := ""
   406  	nextHeight := pool.height + pool.requestersLen()
   407  	for h := pool.height; h < nextHeight; h++ {
   408  		if pool.requesters[h] == nil {
   409  			str += fmt.Sprintf("H(%v):X ", h)
   410  		} else {
   411  			str += fmt.Sprintf("H(%v):", h)
   412  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   413  		}
   414  	}
   415  	return str
   416  }
   417  
   418  //-------------------------------------
   419  
   420  type bpPeer struct {
   421  	pool        *BlockPool
   422  	id          p2p.ID
   423  	recvMonitor *flow.Monitor
   424  
   425  	height     int64
   426  	numPending int32
   427  	timeout    *time.Timer
   428  	didTimeout bool
   429  
   430  	logger log.Logger
   431  }
   432  
   433  func newBPPeer(pool *BlockPool, peerID p2p.ID, height int64) *bpPeer {
   434  	peer := &bpPeer{
   435  		pool:       pool,
   436  		id:         peerID,
   437  		height:     height,
   438  		numPending: 0,
   439  		logger:     log.NewNopLogger(),
   440  	}
   441  	return peer
   442  }
   443  
   444  func (peer *bpPeer) setLogger(l log.Logger) {
   445  	peer.logger = l
   446  }
   447  
   448  func (peer *bpPeer) resetMonitor() {
   449  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   450  	initialValue := float64(minRecvRate) * math.E
   451  	peer.recvMonitor.SetREMA(initialValue)
   452  }
   453  
   454  func (peer *bpPeer) resetTimeout() {
   455  	if peer.timeout == nil {
   456  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   457  	} else {
   458  		peer.timeout.Reset(peerTimeout)
   459  	}
   460  }
   461  
   462  func (peer *bpPeer) incrPending() {
   463  	if peer.numPending == 0 {
   464  		peer.resetMonitor()
   465  		peer.resetTimeout()
   466  	}
   467  	peer.numPending++
   468  }
   469  
   470  func (peer *bpPeer) decrPending(recvSize int) {
   471  	peer.numPending--
   472  	if peer.numPending == 0 {
   473  		peer.timeout.Stop()
   474  	} else {
   475  		peer.recvMonitor.Update(recvSize)
   476  		peer.resetTimeout()
   477  	}
   478  }
   479  
   480  func (peer *bpPeer) onTimeout() {
   481  	peer.pool.mtx.Lock()
   482  	defer peer.pool.mtx.Unlock()
   483  
   484  	err := errors.New("peer did not send us anything")
   485  	peer.pool.sendError(err, peer.id)
   486  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   487  	peer.didTimeout = true
   488  }
   489  
   490  //-------------------------------------
   491  
   492  type bpRequester struct {
   493  	cmn.BaseService
   494  	pool       *BlockPool
   495  	height     int64
   496  	gotBlockCh chan struct{}
   497  	redoCh     chan p2p.ID //redo may send multitime, add peerId to identify repeat
   498  
   499  	mtx    sync.Mutex
   500  	peerID p2p.ID
   501  	block  *types.Block
   502  }
   503  
   504  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   505  	bpr := &bpRequester{
   506  		pool:       pool,
   507  		height:     height,
   508  		gotBlockCh: make(chan struct{}, 1),
   509  		redoCh:     make(chan p2p.ID, 1),
   510  
   511  		peerID: "",
   512  		block:  nil,
   513  	}
   514  	bpr.BaseService = *cmn.NewBaseService(nil, "bpRequester", bpr)
   515  	return bpr
   516  }
   517  
   518  func (bpr *bpRequester) OnStart() error {
   519  	go bpr.requestRoutine()
   520  	return nil
   521  }
   522  
   523  // Returns true if the peer matches and block doesn't already exist.
   524  func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool {
   525  	bpr.mtx.Lock()
   526  	if bpr.block != nil || bpr.peerID != peerID {
   527  		bpr.mtx.Unlock()
   528  		return false
   529  	}
   530  	bpr.block = block
   531  	bpr.mtx.Unlock()
   532  
   533  	select {
   534  	case bpr.gotBlockCh <- struct{}{}:
   535  	default:
   536  	}
   537  	return true
   538  }
   539  
   540  func (bpr *bpRequester) getBlock() *types.Block {
   541  	bpr.mtx.Lock()
   542  	defer bpr.mtx.Unlock()
   543  	return bpr.block
   544  }
   545  
   546  func (bpr *bpRequester) getPeerID() p2p.ID {
   547  	bpr.mtx.Lock()
   548  	defer bpr.mtx.Unlock()
   549  	return bpr.peerID
   550  }
   551  
   552  // This is called from the requestRoutine, upon redo().
   553  func (bpr *bpRequester) reset() {
   554  	bpr.mtx.Lock()
   555  	defer bpr.mtx.Unlock()
   556  
   557  	if bpr.block != nil {
   558  		atomic.AddInt32(&bpr.pool.numPending, 1)
   559  	}
   560  
   561  	bpr.peerID = ""
   562  	bpr.block = nil
   563  }
   564  
   565  // Tells bpRequester to pick another peer and try again.
   566  // NOTE: Nonblocking, and does nothing if another redo
   567  // was already requested.
   568  func (bpr *bpRequester) redo(peerId p2p.ID) {
   569  	select {
   570  	case bpr.redoCh <- peerId:
   571  	default:
   572  	}
   573  }
   574  
   575  // Responsible for making more requests as necessary
   576  // Returns only when a block is found (e.g. AddBlock() is called)
   577  func (bpr *bpRequester) requestRoutine() {
   578  OUTER_LOOP:
   579  	for {
   580  		// Pick a peer to send request to.
   581  		var peer *bpPeer
   582  	PICK_PEER_LOOP:
   583  		for {
   584  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   585  				return
   586  			}
   587  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   588  			if peer == nil {
   589  				//log.Info("No peers available", "height", height)
   590  				time.Sleep(requestIntervalMS * time.Millisecond)
   591  				continue PICK_PEER_LOOP
   592  			}
   593  			break PICK_PEER_LOOP
   594  		}
   595  		bpr.mtx.Lock()
   596  		bpr.peerID = peer.id
   597  		bpr.mtx.Unlock()
   598  
   599  		// Send request and wait.
   600  		bpr.pool.sendRequest(bpr.height, peer.id)
   601  	WAIT_LOOP:
   602  		for {
   603  			select {
   604  			case <-bpr.pool.Quit():
   605  				bpr.Stop()
   606  				return
   607  			case <-bpr.Quit():
   608  				return
   609  			case peerID := <-bpr.redoCh:
   610  				if peerID == bpr.peerID {
   611  					bpr.reset()
   612  					continue OUTER_LOOP
   613  				} else {
   614  					continue WAIT_LOOP
   615  				}
   616  			case <-bpr.gotBlockCh:
   617  				// We got a block!
   618  				// Continue the for-loop and wait til Quit.
   619  				continue WAIT_LOOP
   620  			}
   621  		}
   622  	}
   623  }
   624  
   625  //-------------------------------------
   626  
   627  type BlockRequest struct {
   628  	Height int64
   629  	PeerID p2p.ID
   630  }