github.com/lazyledger/lazyledger-core@v0.35.0-dev.0.20210613111200-4c651f053571/blockchain/v0/pool.go (about)

     1  package v0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	flow "github.com/lazyledger/lazyledger-core/libs/flowrate"
    11  	"github.com/lazyledger/lazyledger-core/libs/log"
    12  	"github.com/lazyledger/lazyledger-core/libs/service"
    13  	tmsync "github.com/lazyledger/lazyledger-core/libs/sync"
    14  	"github.com/lazyledger/lazyledger-core/p2p"
    15  	"github.com/lazyledger/lazyledger-core/types"
    16  )
    17  
    18  /*
    19  eg, L = latency = 0.1s
    20  	P = num peers = 10
    21  	FN = num full nodes
    22  	BS = 1kB block size
    23  	CB = 1 Mbit/s = 128 kB/s
    24  	CB/P = 12.8 kB
    25  	B/S = CB/P/BS = 12.8 blocks/s
    26  
    27  	12.8 * 0.1 = 1.28 blocks on conn
    28  */
    29  
    30  const (
    31  	requestIntervalMS         = 2
    32  	maxTotalRequesters        = 600
    33  	maxPendingRequests        = maxTotalRequesters
    34  	maxPendingRequestsPerPeer = 20
    35  
    36  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    37  	// enough. If a peer is not sending us data at at least that rate, we
    38  	// consider them to have timedout and we disconnect.
    39  	//
    40  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    41  	// sending data across atlantic ~ 7.5 KB/s.
    42  	minRecvRate = 7680
    43  
    44  	// Maximum difference between current and new block's height.
    45  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    46  )
    47  
    48  var peerTimeout = 15 * time.Second // not const so we can override with tests
    49  
    50  /*
    51  	Peers self report their heights when we join the block pool.
    52  	Starting from our latest pool.height, we request blocks
    53  	in sequence from peers that reported higher heights than ours.
    54  	Every so often we ask peers what height they're on so we can keep going.
    55  
    56  	Requests are continuously made for blocks of higher heights until
    57  	the limit is reached. If most of the requests have no available peers, and we
    58  	are not at peer limits, we can probably switch to consensus reactor
    59  */
    60  
    61  // BlockRequest stores a block request identified by the block Height and the
    62  // PeerID responsible for delivering the block.
    63  type BlockRequest struct {
    64  	Height int64
    65  	PeerID p2p.ID
    66  }
    67  
    68  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    69  type BlockPool struct {
    70  	service.BaseService
    71  	lastAdvance time.Time
    72  
    73  	mtx tmsync.RWMutex
    74  	// block requests
    75  	requesters map[int64]*bpRequester
    76  	height     int64 // the lowest key in requesters.
    77  	// peers
    78  	peers         map[p2p.ID]*bpPeer
    79  	maxPeerHeight int64 // the biggest reported height
    80  
    81  	// atomic
    82  	numPending int32 // number of requests pending assignment or block response
    83  
    84  	requestsCh chan<- BlockRequest
    85  	errorsCh   chan<- peerError
    86  }
    87  
    88  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    89  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    90  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    91  	bp := &BlockPool{
    92  		peers: make(map[p2p.ID]*bpPeer),
    93  
    94  		requesters: make(map[int64]*bpRequester),
    95  		height:     start,
    96  		numPending: 0,
    97  
    98  		requestsCh: requestsCh,
    99  		errorsCh:   errorsCh,
   100  	}
   101  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
   102  	return bp
   103  }
   104  
   105  // OnStart implements service.Service by spawning requesters routine and recording
   106  // pool's start time.
   107  func (pool *BlockPool) OnStart() error {
   108  	pool.lastAdvance = time.Now()
   109  	go pool.makeRequestersRoutine()
   110  	return nil
   111  }
   112  
   113  // spawns requesters as needed
   114  func (pool *BlockPool) makeRequestersRoutine() {
   115  	for {
   116  		if !pool.IsRunning() {
   117  			break
   118  		}
   119  
   120  		_, numPending, lenRequesters := pool.GetStatus()
   121  		switch {
   122  		case numPending >= maxPendingRequests:
   123  			// sleep for a bit.
   124  			time.Sleep(requestIntervalMS * time.Millisecond)
   125  			// check for timed out peers
   126  			pool.removeTimedoutPeers()
   127  		case lenRequesters >= maxTotalRequesters:
   128  			// sleep for a bit.
   129  			time.Sleep(requestIntervalMS * time.Millisecond)
   130  			// check for timed out peers
   131  			pool.removeTimedoutPeers()
   132  		default:
   133  			// request for more blocks.
   134  			pool.makeNextRequester()
   135  		}
   136  	}
   137  }
   138  
   139  func (pool *BlockPool) removeTimedoutPeers() {
   140  	pool.mtx.Lock()
   141  	defer pool.mtx.Unlock()
   142  
   143  	for _, peer := range pool.peers {
   144  		// check if peer timed out
   145  		if !peer.didTimeout && peer.numPending > 0 {
   146  			curRate := peer.recvMonitor.Status().CurRate
   147  			// curRate can be 0 on start
   148  			if curRate != 0 && curRate < minRecvRate {
   149  				err := errors.New("peer is not sending us data fast enough")
   150  				pool.sendError(err, peer.id)
   151  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   152  					"reason", err,
   153  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   154  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   155  				peer.didTimeout = true
   156  			}
   157  		}
   158  
   159  		if peer.didTimeout {
   160  			pool.removePeer(peer.id)
   161  		}
   162  	}
   163  }
   164  
   165  // GetStatus returns pool's height, numPending requests and the number of
   166  // requesters.
   167  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   168  	pool.mtx.RLock()
   169  	defer pool.mtx.RUnlock()
   170  
   171  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   172  }
   173  
   174  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   175  func (pool *BlockPool) IsCaughtUp() bool {
   176  	pool.mtx.RLock()
   177  	defer pool.mtx.RUnlock()
   178  
   179  	// Need at least 1 peer to be considered caught up.
   180  	if len(pool.peers) == 0 {
   181  		return false
   182  	}
   183  	// NOTE: we use maxPeerHeight - 1 because to sync block H requires block H+1
   184  	// to verify the LastCommit.
   185  	return pool.height >= (pool.maxPeerHeight - 1)
   186  }
   187  
   188  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   189  // We need to see the second block's Commit to validate the first block.
   190  // So we peek two blocks at a time.
   191  // The caller will verify the commit.
   192  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   193  	pool.mtx.RLock()
   194  	defer pool.mtx.RUnlock()
   195  
   196  	if r := pool.requesters[pool.height]; r != nil {
   197  		first = r.getBlock()
   198  	}
   199  	if r := pool.requesters[pool.height+1]; r != nil {
   200  		second = r.getBlock()
   201  	}
   202  	return
   203  }
   204  
   205  // PopRequest pops the first block at pool.height.
   206  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   207  func (pool *BlockPool) PopRequest() {
   208  	pool.mtx.Lock()
   209  	defer pool.mtx.Unlock()
   210  
   211  	if r := pool.requesters[pool.height]; r != nil {
   212  		if err := r.Stop(); err != nil {
   213  			pool.Logger.Error("Error stopping requester", "err", err)
   214  		}
   215  		delete(pool.requesters, pool.height)
   216  		pool.height++
   217  		pool.lastAdvance = time.Now()
   218  	} else {
   219  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   220  	}
   221  }
   222  
   223  // RedoRequest invalidates the block at pool.height,
   224  // Remove the peer and redo request from others.
   225  // Returns the ID of the removed peer.
   226  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   227  	pool.mtx.Lock()
   228  	defer pool.mtx.Unlock()
   229  
   230  	request := pool.requesters[height]
   231  	peerID := request.getPeerID()
   232  	if peerID != p2p.ID("") {
   233  		// RemovePeer will redo all requesters associated with this peer.
   234  		pool.removePeer(peerID)
   235  	}
   236  	return peerID
   237  }
   238  
   239  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   240  // TODO: ensure that blocks come in order for each peer.
   241  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) {
   242  	pool.mtx.Lock()
   243  	defer pool.mtx.Unlock()
   244  
   245  	requester := pool.requesters[block.Height]
   246  	if requester == nil {
   247  		pool.Logger.Error("peer sent us a block we didn't expect",
   248  			"peer", peerID, "curHeight", pool.height, "blockHeight", block.Height)
   249  		diff := pool.height - block.Height
   250  		if diff < 0 {
   251  			diff *= -1
   252  		}
   253  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   254  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   255  		}
   256  		return
   257  	}
   258  
   259  	if requester.setBlock(block, peerID) {
   260  		atomic.AddInt32(&pool.numPending, -1)
   261  		peer := pool.peers[peerID]
   262  		if peer != nil {
   263  			peer.decrPending(blockSize)
   264  		}
   265  	} else {
   266  		err := errors.New("requester is different or block already exists")
   267  		pool.Logger.Error(err.Error(), "peer", peerID, "requester", requester.getPeerID(), "blockHeight", block.Height)
   268  		pool.sendError(err, peerID)
   269  	}
   270  }
   271  
   272  // MaxPeerHeight returns the highest reported height.
   273  func (pool *BlockPool) MaxPeerHeight() int64 {
   274  	pool.mtx.RLock()
   275  	defer pool.mtx.RUnlock()
   276  	return pool.maxPeerHeight
   277  }
   278  
   279  // LastAdvance returns the time when the last block was processed (or start
   280  // time if no blocks were processed).
   281  func (pool *BlockPool) LastAdvance() time.Time {
   282  	pool.mtx.RLock()
   283  	defer pool.mtx.RUnlock()
   284  	return pool.lastAdvance
   285  }
   286  
   287  // SetPeerRange sets the peer's alleged blockchain base and height.
   288  func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) {
   289  	pool.mtx.Lock()
   290  	defer pool.mtx.Unlock()
   291  
   292  	peer := pool.peers[peerID]
   293  	if peer != nil {
   294  		peer.base = base
   295  		peer.height = height
   296  	} else {
   297  		peer = newBPPeer(pool, peerID, base, height)
   298  		peer.setLogger(pool.Logger.With("peer", peerID))
   299  		pool.peers[peerID] = peer
   300  	}
   301  
   302  	if height > pool.maxPeerHeight {
   303  		pool.maxPeerHeight = height
   304  	}
   305  }
   306  
   307  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   308  // with peerID, function is a no-op.
   309  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   310  	pool.mtx.Lock()
   311  	defer pool.mtx.Unlock()
   312  
   313  	pool.removePeer(peerID)
   314  }
   315  
   316  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   317  	for _, requester := range pool.requesters {
   318  		if requester.getPeerID() == peerID {
   319  			requester.redo(peerID)
   320  		}
   321  	}
   322  
   323  	peer, ok := pool.peers[peerID]
   324  	if ok {
   325  		if peer.timeout != nil {
   326  			peer.timeout.Stop()
   327  		}
   328  
   329  		delete(pool.peers, peerID)
   330  
   331  		// Find a new peer with the biggest height and update maxPeerHeight if the
   332  		// peer's height was the biggest.
   333  		if peer.height == pool.maxPeerHeight {
   334  			pool.updateMaxPeerHeight()
   335  		}
   336  	}
   337  }
   338  
   339  // If no peers are left, maxPeerHeight is set to 0.
   340  func (pool *BlockPool) updateMaxPeerHeight() {
   341  	var max int64
   342  	for _, peer := range pool.peers {
   343  		if peer.height > max {
   344  			max = peer.height
   345  		}
   346  	}
   347  	pool.maxPeerHeight = max
   348  }
   349  
   350  // Pick an available peer with the given height available.
   351  // If no peers are available, returns nil.
   352  func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer {
   353  	pool.mtx.Lock()
   354  	defer pool.mtx.Unlock()
   355  
   356  	for _, peer := range pool.peers {
   357  		if peer.didTimeout {
   358  			pool.removePeer(peer.id)
   359  			continue
   360  		}
   361  		if peer.numPending >= maxPendingRequestsPerPeer {
   362  			continue
   363  		}
   364  		if height < peer.base || height > peer.height {
   365  			continue
   366  		}
   367  		peer.incrPending()
   368  		return peer
   369  	}
   370  	return nil
   371  }
   372  
   373  func (pool *BlockPool) makeNextRequester() {
   374  	pool.mtx.Lock()
   375  	defer pool.mtx.Unlock()
   376  
   377  	nextHeight := pool.height + pool.requestersLen()
   378  	if nextHeight > pool.maxPeerHeight {
   379  		return
   380  	}
   381  
   382  	request := newBPRequester(pool, nextHeight)
   383  
   384  	pool.requesters[nextHeight] = request
   385  	atomic.AddInt32(&pool.numPending, 1)
   386  
   387  	err := request.Start()
   388  	if err != nil {
   389  		request.Logger.Error("Error starting request", "err", err)
   390  	}
   391  }
   392  
   393  func (pool *BlockPool) requestersLen() int64 {
   394  	return int64(len(pool.requesters))
   395  }
   396  
   397  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   398  	if !pool.IsRunning() {
   399  		return
   400  	}
   401  	pool.requestsCh <- BlockRequest{height, peerID}
   402  }
   403  
   404  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   405  	if !pool.IsRunning() {
   406  		return
   407  	}
   408  	pool.errorsCh <- peerError{err, peerID}
   409  }
   410  
   411  // for debugging purposes
   412  //nolint:unused
   413  func (pool *BlockPool) debug() string {
   414  	pool.mtx.Lock()
   415  	defer pool.mtx.Unlock()
   416  
   417  	str := ""
   418  	nextHeight := pool.height + pool.requestersLen()
   419  	for h := pool.height; h < nextHeight; h++ {
   420  		if pool.requesters[h] == nil {
   421  			str += fmt.Sprintf("H(%v):X ", h)
   422  		} else {
   423  			str += fmt.Sprintf("H(%v):", h)
   424  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   425  		}
   426  	}
   427  	return str
   428  }
   429  
   430  //-------------------------------------
   431  
   432  type bpPeer struct {
   433  	didTimeout  bool
   434  	numPending  int32
   435  	height      int64
   436  	base        int64
   437  	pool        *BlockPool
   438  	id          p2p.ID
   439  	recvMonitor *flow.Monitor
   440  
   441  	timeout *time.Timer
   442  
   443  	logger log.Logger
   444  }
   445  
   446  func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer {
   447  	peer := &bpPeer{
   448  		pool:       pool,
   449  		id:         peerID,
   450  		base:       base,
   451  		height:     height,
   452  		numPending: 0,
   453  		logger:     log.NewNopLogger(),
   454  	}
   455  	return peer
   456  }
   457  
   458  func (peer *bpPeer) setLogger(l log.Logger) {
   459  	peer.logger = l
   460  }
   461  
   462  func (peer *bpPeer) resetMonitor() {
   463  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   464  	initialValue := float64(minRecvRate) * math.E
   465  	peer.recvMonitor.SetREMA(initialValue)
   466  }
   467  
   468  func (peer *bpPeer) resetTimeout() {
   469  	if peer.timeout == nil {
   470  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   471  	} else {
   472  		peer.timeout.Reset(peerTimeout)
   473  	}
   474  }
   475  
   476  func (peer *bpPeer) incrPending() {
   477  	if peer.numPending == 0 {
   478  		peer.resetMonitor()
   479  		peer.resetTimeout()
   480  	}
   481  	peer.numPending++
   482  }
   483  
   484  func (peer *bpPeer) decrPending(recvSize int) {
   485  	peer.numPending--
   486  	if peer.numPending == 0 {
   487  		peer.timeout.Stop()
   488  	} else {
   489  		peer.recvMonitor.Update(recvSize)
   490  		peer.resetTimeout()
   491  	}
   492  }
   493  
   494  func (peer *bpPeer) onTimeout() {
   495  	peer.pool.mtx.Lock()
   496  	defer peer.pool.mtx.Unlock()
   497  
   498  	err := errors.New("peer did not send us anything")
   499  	peer.pool.sendError(err, peer.id)
   500  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   501  	peer.didTimeout = true
   502  }
   503  
   504  //-------------------------------------
   505  
   506  type bpRequester struct {
   507  	service.BaseService
   508  	pool       *BlockPool
   509  	height     int64
   510  	gotBlockCh chan struct{}
   511  	redoCh     chan p2p.ID // redo may send multitime, add peerId to identify repeat
   512  
   513  	mtx    tmsync.Mutex
   514  	peerID p2p.ID
   515  	block  *types.Block
   516  }
   517  
   518  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   519  	bpr := &bpRequester{
   520  		pool:       pool,
   521  		height:     height,
   522  		gotBlockCh: make(chan struct{}, 1),
   523  		redoCh:     make(chan p2p.ID, 1),
   524  
   525  		peerID: "",
   526  		block:  nil,
   527  	}
   528  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   529  	return bpr
   530  }
   531  
   532  func (bpr *bpRequester) OnStart() error {
   533  	go bpr.requestRoutine()
   534  	return nil
   535  }
   536  
   537  // Returns true if the peer matches and block doesn't already exist.
   538  func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool {
   539  	bpr.mtx.Lock()
   540  	if bpr.block != nil || bpr.peerID != peerID {
   541  		bpr.mtx.Unlock()
   542  		return false
   543  	}
   544  	bpr.block = block
   545  	bpr.mtx.Unlock()
   546  
   547  	select {
   548  	case bpr.gotBlockCh <- struct{}{}:
   549  	default:
   550  	}
   551  	return true
   552  }
   553  
   554  func (bpr *bpRequester) getBlock() *types.Block {
   555  	bpr.mtx.Lock()
   556  	defer bpr.mtx.Unlock()
   557  	return bpr.block
   558  }
   559  
   560  func (bpr *bpRequester) getPeerID() p2p.ID {
   561  	bpr.mtx.Lock()
   562  	defer bpr.mtx.Unlock()
   563  	return bpr.peerID
   564  }
   565  
   566  // This is called from the requestRoutine, upon redo().
   567  func (bpr *bpRequester) reset() {
   568  	bpr.mtx.Lock()
   569  	defer bpr.mtx.Unlock()
   570  
   571  	if bpr.block != nil {
   572  		atomic.AddInt32(&bpr.pool.numPending, 1)
   573  	}
   574  
   575  	bpr.peerID = ""
   576  	bpr.block = nil
   577  }
   578  
   579  // Tells bpRequester to pick another peer and try again.
   580  // NOTE: Nonblocking, and does nothing if another redo
   581  // was already requested.
   582  func (bpr *bpRequester) redo(peerID p2p.ID) {
   583  	select {
   584  	case bpr.redoCh <- peerID:
   585  	default:
   586  	}
   587  }
   588  
   589  // Responsible for making more requests as necessary
   590  // Returns only when a block is found (e.g. AddBlock() is called)
   591  func (bpr *bpRequester) requestRoutine() {
   592  OUTER_LOOP:
   593  	for {
   594  		// Pick a peer to send request to.
   595  		var peer *bpPeer
   596  	PICK_PEER_LOOP:
   597  		for {
   598  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   599  				return
   600  			}
   601  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   602  			if peer == nil {
   603  				time.Sleep(requestIntervalMS * time.Millisecond)
   604  				continue PICK_PEER_LOOP
   605  			}
   606  			break PICK_PEER_LOOP
   607  		}
   608  		bpr.mtx.Lock()
   609  		bpr.peerID = peer.id
   610  		bpr.mtx.Unlock()
   611  
   612  		// Send request and wait.
   613  		bpr.pool.sendRequest(bpr.height, peer.id)
   614  	WAIT_LOOP:
   615  		for {
   616  			select {
   617  			case <-bpr.pool.Quit():
   618  				if err := bpr.Stop(); err != nil {
   619  					bpr.Logger.Error("Error stopped requester", "err", err)
   620  				}
   621  				return
   622  			case <-bpr.Quit():
   623  				return
   624  			case peerID := <-bpr.redoCh:
   625  				if peerID == bpr.peerID {
   626  					bpr.reset()
   627  					continue OUTER_LOOP
   628  				} else {
   629  					continue WAIT_LOOP
   630  				}
   631  			case <-bpr.gotBlockCh:
   632  				// We got a block!
   633  				// Continue the for-loop and wait til Quit.
   634  				continue WAIT_LOOP
   635  			}
   636  		}
   637  	}
   638  }