github.com/vipernet-xyz/tm@v0.34.24/blockchain/v0/pool.go (about)

     1  package v0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	flow "github.com/vipernet-xyz/tm/libs/flowrate"
    11  	"github.com/vipernet-xyz/tm/libs/log"
    12  	"github.com/vipernet-xyz/tm/libs/service"
    13  	tmsync "github.com/vipernet-xyz/tm/libs/sync"
    14  	"github.com/vipernet-xyz/tm/p2p"
    15  	"github.com/vipernet-xyz/tm/types"
    16  )
    17  
    18  /*
    19  eg, L = latency = 0.1s
    20  	P = num peers = 10
    21  	FN = num full nodes
    22  	BS = 1kB block size
    23  	CB = 1 Mbit/s = 128 kB/s
    24  	CB/P = 12.8 kB
    25  	B/S = CB/P/BS = 12.8 blocks/s
    26  
    27  	12.8 * 0.1 = 1.28 blocks on conn
    28  */
    29  
    30  const (
    31  	requestIntervalMS         = 2
    32  	maxTotalRequesters        = 600
    33  	maxPendingRequests        = maxTotalRequesters
    34  	maxPendingRequestsPerPeer = 20
    35  	requestRetrySeconds       = 30
    36  
    37  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    38  	// enough. If a peer is not sending us data at at least that rate, we
    39  	// consider them to have timedout and we disconnect.
    40  	//
    41  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    42  	// sending data across atlantic ~ 7.5 KB/s.
    43  	minRecvRate = 7680
    44  
    45  	// Maximum difference between current and new block's height.
    46  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    47  )
    48  
    49  var peerTimeout = 15 * time.Second // not const so we can override with tests
    50  
    51  /*
    52  	Peers self report their heights when we join the block pool.
    53  	Starting from our latest pool.height, we request blocks
    54  	in sequence from peers that reported higher heights than ours.
    55  	Every so often we ask peers what height they're on so we can keep going.
    56  
    57  	Requests are continuously made for blocks of higher heights until
    58  	the limit is reached. If most of the requests have no available peers, and we
    59  	are not at peer limits, we can probably switch to consensus reactor
    60  */
    61  
    62  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    63  type BlockPool struct {
    64  	service.BaseService
    65  	startTime time.Time
    66  
    67  	mtx tmsync.Mutex
    68  	// block requests
    69  	requesters map[int64]*bpRequester
    70  	height     int64 // the lowest key in requesters.
    71  	// peers
    72  	peers         map[p2p.ID]*bpPeer
    73  	maxPeerHeight int64 // the biggest reported height
    74  
    75  	// atomic
    76  	numPending int32 // number of requests pending assignment or block response
    77  
    78  	requestsCh chan<- BlockRequest
    79  	errorsCh   chan<- peerError
    80  }
    81  
    82  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    83  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    84  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    85  	bp := &BlockPool{
    86  		peers: make(map[p2p.ID]*bpPeer),
    87  
    88  		requesters: make(map[int64]*bpRequester),
    89  		height:     start,
    90  		numPending: 0,
    91  
    92  		requestsCh: requestsCh,
    93  		errorsCh:   errorsCh,
    94  	}
    95  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
    96  	return bp
    97  }
    98  
    99  // OnStart implements service.Service by spawning requesters routine and recording
   100  // pool's start time.
   101  func (pool *BlockPool) OnStart() error {
   102  	go pool.makeRequestersRoutine()
   103  	pool.startTime = time.Now()
   104  	return nil
   105  }
   106  
   107  // spawns requesters as needed
   108  func (pool *BlockPool) makeRequestersRoutine() {
   109  	for {
   110  		if !pool.IsRunning() {
   111  			break
   112  		}
   113  
   114  		_, numPending, lenRequesters := pool.GetStatus()
   115  		switch {
   116  		case numPending >= maxPendingRequests:
   117  			// sleep for a bit.
   118  			time.Sleep(requestIntervalMS * time.Millisecond)
   119  			// check for timed out peers
   120  			pool.removeTimedoutPeers()
   121  		case lenRequesters >= maxTotalRequesters:
   122  			// sleep for a bit.
   123  			time.Sleep(requestIntervalMS * time.Millisecond)
   124  			// check for timed out peers
   125  			pool.removeTimedoutPeers()
   126  		default:
   127  			// request for more blocks.
   128  			pool.makeNextRequester()
   129  		}
   130  	}
   131  }
   132  
   133  func (pool *BlockPool) removeTimedoutPeers() {
   134  	pool.mtx.Lock()
   135  	defer pool.mtx.Unlock()
   136  
   137  	for _, peer := range pool.peers {
   138  		if !peer.didTimeout && peer.numPending > 0 {
   139  			curRate := peer.recvMonitor.Status().CurRate
   140  			// curRate can be 0 on start
   141  			if curRate != 0 && curRate < minRecvRate {
   142  				err := errors.New("peer is not sending us data fast enough")
   143  				pool.sendError(err, peer.id)
   144  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   145  					"reason", err,
   146  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   147  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   148  				peer.didTimeout = true
   149  			}
   150  		}
   151  		if peer.didTimeout {
   152  			pool.removePeer(peer.id)
   153  		}
   154  	}
   155  }
   156  
   157  // GetStatus returns pool's height, numPending requests and the number of
   158  // requesters.
   159  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   160  	pool.mtx.Lock()
   161  	defer pool.mtx.Unlock()
   162  
   163  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   164  }
   165  
   166  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   167  // TODO: relax conditions, prevent abuse.
   168  func (pool *BlockPool) IsCaughtUp() bool {
   169  	pool.mtx.Lock()
   170  	defer pool.mtx.Unlock()
   171  
   172  	// Need at least 1 peer to be considered caught up.
   173  	if len(pool.peers) == 0 {
   174  		pool.Logger.Debug("Blockpool has no peers")
   175  		return false
   176  	}
   177  
   178  	// Some conditions to determine if we're caught up.
   179  	// Ensures we've either received a block or waited some amount of time,
   180  	// and that we're synced to the highest known height.
   181  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   182  	// to verify the LastCommit.
   183  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   184  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   185  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   186  	return isCaughtUp
   187  }
   188  
   189  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   190  // We need to see the second block's Commit to validate the first block.
   191  // So we peek two blocks at a time.
   192  // The caller will verify the commit.
   193  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   194  	pool.mtx.Lock()
   195  	defer pool.mtx.Unlock()
   196  
   197  	if r := pool.requesters[pool.height]; r != nil {
   198  		first = r.getBlock()
   199  	}
   200  	if r := pool.requesters[pool.height+1]; r != nil {
   201  		second = r.getBlock()
   202  	}
   203  	return
   204  }
   205  
   206  // PopRequest pops the first block at pool.height.
   207  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   208  func (pool *BlockPool) PopRequest() {
   209  	pool.mtx.Lock()
   210  	defer pool.mtx.Unlock()
   211  
   212  	if r := pool.requesters[pool.height]; r != nil {
   213  		/*  The block can disappear at any time, due to removePeer().
   214  		if r := pool.requesters[pool.height]; r == nil || r.block == nil {
   215  			PanicSanity("PopRequest() requires a valid block")
   216  		}
   217  		*/
   218  		if err := r.Stop(); err != nil {
   219  			pool.Logger.Error("Error stopping requester", "err", err)
   220  		}
   221  		delete(pool.requesters, pool.height)
   222  		pool.height++
   223  	} else {
   224  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   225  	}
   226  }
   227  
   228  // RedoRequest invalidates the block at pool.height,
   229  // Remove the peer and redo request from others.
   230  // Returns the ID of the removed peer.
   231  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   232  	pool.mtx.Lock()
   233  	defer pool.mtx.Unlock()
   234  
   235  	request := pool.requesters[height]
   236  	peerID := request.getPeerID()
   237  	if peerID != p2p.ID("") {
   238  		// RemovePeer will redo all requesters associated with this peer.
   239  		pool.removePeer(peerID)
   240  	}
   241  	return peerID
   242  }
   243  
   244  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   245  // TODO: ensure that blocks come in order for each peer.
   246  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) {
   247  	pool.mtx.Lock()
   248  	defer pool.mtx.Unlock()
   249  
   250  	requester := pool.requesters[block.Height]
   251  	if requester == nil {
   252  		pool.Logger.Info(
   253  			"peer sent us a block we didn't expect",
   254  			"peer",
   255  			peerID,
   256  			"curHeight",
   257  			pool.height,
   258  			"blockHeight",
   259  			block.Height)
   260  		diff := pool.height - block.Height
   261  		if diff < 0 {
   262  			diff *= -1
   263  		}
   264  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   265  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   266  		}
   267  		return
   268  	}
   269  
   270  	if requester.setBlock(block, peerID) {
   271  		atomic.AddInt32(&pool.numPending, -1)
   272  		peer := pool.peers[peerID]
   273  		if peer != nil {
   274  			peer.decrPending(blockSize)
   275  		}
   276  	} else {
   277  		pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height)
   278  		pool.sendError(errors.New("invalid peer"), peerID)
   279  	}
   280  }
   281  
   282  // MaxPeerHeight returns the highest reported height.
   283  func (pool *BlockPool) MaxPeerHeight() int64 {
   284  	pool.mtx.Lock()
   285  	defer pool.mtx.Unlock()
   286  	return pool.maxPeerHeight
   287  }
   288  
   289  // SetPeerRange sets the peer's alleged blockchain base and height.
   290  func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) {
   291  	pool.mtx.Lock()
   292  	defer pool.mtx.Unlock()
   293  
   294  	peer := pool.peers[peerID]
   295  	if peer != nil {
   296  		peer.base = base
   297  		peer.height = height
   298  	} else {
   299  		peer = newBPPeer(pool, peerID, base, height)
   300  		peer.setLogger(pool.Logger.With("peer", peerID))
   301  		pool.peers[peerID] = peer
   302  	}
   303  
   304  	if height > pool.maxPeerHeight {
   305  		pool.maxPeerHeight = height
   306  	}
   307  }
   308  
   309  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   310  // with peerID, function is a no-op.
   311  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   312  	pool.mtx.Lock()
   313  	defer pool.mtx.Unlock()
   314  
   315  	pool.removePeer(peerID)
   316  }
   317  
   318  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   319  	for _, requester := range pool.requesters {
   320  		if requester.getPeerID() == peerID {
   321  			requester.redo(peerID)
   322  		}
   323  	}
   324  
   325  	peer, ok := pool.peers[peerID]
   326  	if ok {
   327  		if peer.timeout != nil {
   328  			peer.timeout.Stop()
   329  		}
   330  
   331  		delete(pool.peers, peerID)
   332  
   333  		// Find a new peer with the biggest height and update maxPeerHeight if the
   334  		// peer's height was the biggest.
   335  		if peer.height == pool.maxPeerHeight {
   336  			pool.updateMaxPeerHeight()
   337  		}
   338  	}
   339  }
   340  
   341  // If no peers are left, maxPeerHeight is set to 0.
   342  func (pool *BlockPool) updateMaxPeerHeight() {
   343  	var max int64
   344  	for _, peer := range pool.peers {
   345  		if peer.height > max {
   346  			max = peer.height
   347  		}
   348  	}
   349  	pool.maxPeerHeight = max
   350  }
   351  
   352  // Pick an available peer with the given height available.
   353  // If no peers are available, returns nil.
   354  func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer {
   355  	pool.mtx.Lock()
   356  	defer pool.mtx.Unlock()
   357  
   358  	for _, peer := range pool.peers {
   359  		if peer.didTimeout {
   360  			pool.removePeer(peer.id)
   361  			continue
   362  		}
   363  		if peer.numPending >= maxPendingRequestsPerPeer {
   364  			continue
   365  		}
   366  		if height < peer.base || height > peer.height {
   367  			continue
   368  		}
   369  		peer.incrPending()
   370  		return peer
   371  	}
   372  	return nil
   373  }
   374  
   375  func (pool *BlockPool) makeNextRequester() {
   376  	pool.mtx.Lock()
   377  	defer pool.mtx.Unlock()
   378  
   379  	nextHeight := pool.height + pool.requestersLen()
   380  	if nextHeight > pool.maxPeerHeight {
   381  		return
   382  	}
   383  
   384  	request := newBPRequester(pool, nextHeight)
   385  
   386  	pool.requesters[nextHeight] = request
   387  	atomic.AddInt32(&pool.numPending, 1)
   388  
   389  	err := request.Start()
   390  	if err != nil {
   391  		request.Logger.Error("Error starting request", "err", err)
   392  	}
   393  }
   394  
   395  func (pool *BlockPool) requestersLen() int64 {
   396  	return int64(len(pool.requesters))
   397  }
   398  
   399  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   400  	if !pool.IsRunning() {
   401  		return
   402  	}
   403  	pool.requestsCh <- BlockRequest{height, peerID}
   404  }
   405  
   406  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   407  	if !pool.IsRunning() {
   408  		return
   409  	}
   410  	pool.errorsCh <- peerError{err, peerID}
   411  }
   412  
   413  // for debugging purposes
   414  //
   415  //nolint:unused
   416  func (pool *BlockPool) debug() string {
   417  	pool.mtx.Lock()
   418  	defer pool.mtx.Unlock()
   419  
   420  	str := ""
   421  	nextHeight := pool.height + pool.requestersLen()
   422  	for h := pool.height; h < nextHeight; h++ {
   423  		if pool.requesters[h] == nil {
   424  			str += fmt.Sprintf("H(%v):X ", h)
   425  		} else {
   426  			str += fmt.Sprintf("H(%v):", h)
   427  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   428  		}
   429  	}
   430  	return str
   431  }
   432  
   433  //-------------------------------------
   434  
   435  type bpPeer struct {
   436  	didTimeout  bool
   437  	numPending  int32
   438  	height      int64
   439  	base        int64
   440  	pool        *BlockPool
   441  	id          p2p.ID
   442  	recvMonitor *flow.Monitor
   443  
   444  	timeout *time.Timer
   445  
   446  	logger log.Logger
   447  }
   448  
   449  func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer {
   450  	peer := &bpPeer{
   451  		pool:       pool,
   452  		id:         peerID,
   453  		base:       base,
   454  		height:     height,
   455  		numPending: 0,
   456  		logger:     log.NewNopLogger(),
   457  	}
   458  	return peer
   459  }
   460  
   461  func (peer *bpPeer) setLogger(l log.Logger) {
   462  	peer.logger = l
   463  }
   464  
   465  func (peer *bpPeer) resetMonitor() {
   466  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   467  	initialValue := float64(minRecvRate) * math.E
   468  	peer.recvMonitor.SetREMA(initialValue)
   469  }
   470  
   471  func (peer *bpPeer) resetTimeout() {
   472  	if peer.timeout == nil {
   473  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   474  	} else {
   475  		peer.timeout.Reset(peerTimeout)
   476  	}
   477  }
   478  
   479  func (peer *bpPeer) incrPending() {
   480  	if peer.numPending == 0 {
   481  		peer.resetMonitor()
   482  		peer.resetTimeout()
   483  	}
   484  	peer.numPending++
   485  }
   486  
   487  func (peer *bpPeer) decrPending(recvSize int) {
   488  	peer.numPending--
   489  	if peer.numPending == 0 {
   490  		peer.timeout.Stop()
   491  	} else {
   492  		peer.recvMonitor.Update(recvSize)
   493  		peer.resetTimeout()
   494  	}
   495  }
   496  
   497  func (peer *bpPeer) onTimeout() {
   498  	peer.pool.mtx.Lock()
   499  	defer peer.pool.mtx.Unlock()
   500  
   501  	err := errors.New("peer did not send us anything")
   502  	peer.pool.sendError(err, peer.id)
   503  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   504  	peer.didTimeout = true
   505  }
   506  
   507  //-------------------------------------
   508  
   509  type bpRequester struct {
   510  	service.BaseService
   511  	pool       *BlockPool
   512  	height     int64
   513  	gotBlockCh chan struct{}
   514  	redoCh     chan p2p.ID // redo may send multitime, add peerId to identify repeat
   515  
   516  	mtx    tmsync.Mutex
   517  	peerID p2p.ID
   518  	block  *types.Block
   519  }
   520  
   521  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   522  	bpr := &bpRequester{
   523  		pool:       pool,
   524  		height:     height,
   525  		gotBlockCh: make(chan struct{}, 1),
   526  		redoCh:     make(chan p2p.ID, 1),
   527  
   528  		peerID: "",
   529  		block:  nil,
   530  	}
   531  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   532  	return bpr
   533  }
   534  
   535  func (bpr *bpRequester) OnStart() error {
   536  	go bpr.requestRoutine()
   537  	return nil
   538  }
   539  
   540  // Returns true if the peer matches and block doesn't already exist.
   541  func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool {
   542  	bpr.mtx.Lock()
   543  	if bpr.block != nil || bpr.peerID != peerID {
   544  		bpr.mtx.Unlock()
   545  		return false
   546  	}
   547  	bpr.block = block
   548  	bpr.mtx.Unlock()
   549  
   550  	select {
   551  	case bpr.gotBlockCh <- struct{}{}:
   552  	default:
   553  	}
   554  	return true
   555  }
   556  
   557  func (bpr *bpRequester) getBlock() *types.Block {
   558  	bpr.mtx.Lock()
   559  	defer bpr.mtx.Unlock()
   560  	return bpr.block
   561  }
   562  
   563  func (bpr *bpRequester) getPeerID() p2p.ID {
   564  	bpr.mtx.Lock()
   565  	defer bpr.mtx.Unlock()
   566  	return bpr.peerID
   567  }
   568  
   569  // This is called from the requestRoutine, upon redo().
   570  func (bpr *bpRequester) reset() {
   571  	bpr.mtx.Lock()
   572  	defer bpr.mtx.Unlock()
   573  
   574  	if bpr.block != nil {
   575  		atomic.AddInt32(&bpr.pool.numPending, 1)
   576  	}
   577  
   578  	bpr.peerID = ""
   579  	bpr.block = nil
   580  }
   581  
   582  // Tells bpRequester to pick another peer and try again.
   583  // NOTE: Nonblocking, and does nothing if another redo
   584  // was already requested.
   585  func (bpr *bpRequester) redo(peerID p2p.ID) {
   586  	select {
   587  	case bpr.redoCh <- peerID:
   588  	default:
   589  	}
   590  }
   591  
   592  // Responsible for making more requests as necessary
   593  // Returns only when a block is found (e.g. AddBlock() is called)
   594  func (bpr *bpRequester) requestRoutine() {
   595  OUTER_LOOP:
   596  	for {
   597  		// Pick a peer to send request to.
   598  		var peer *bpPeer
   599  	PICK_PEER_LOOP:
   600  		for {
   601  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   602  				return
   603  			}
   604  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   605  			if peer == nil {
   606  				bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height)
   607  				time.Sleep(requestIntervalMS * time.Millisecond)
   608  				continue PICK_PEER_LOOP
   609  			}
   610  			break PICK_PEER_LOOP
   611  		}
   612  		bpr.mtx.Lock()
   613  		bpr.peerID = peer.id
   614  		bpr.mtx.Unlock()
   615  
   616  		to := time.NewTimer(requestRetrySeconds * time.Second)
   617  		// Send request and wait.
   618  		bpr.pool.sendRequest(bpr.height, peer.id)
   619  	WAIT_LOOP:
   620  		for {
   621  			select {
   622  			case <-bpr.pool.Quit():
   623  				if err := bpr.Stop(); err != nil {
   624  					bpr.Logger.Error("Error stopped requester", "err", err)
   625  				}
   626  				return
   627  			case <-bpr.Quit():
   628  				return
   629  			case <-to.C:
   630  				bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID)
   631  				// Simulate a redo
   632  				bpr.reset()
   633  				continue OUTER_LOOP
   634  			case peerID := <-bpr.redoCh:
   635  				if peerID == bpr.peerID {
   636  					bpr.reset()
   637  					continue OUTER_LOOP
   638  				} else {
   639  					continue WAIT_LOOP
   640  				}
   641  			case <-bpr.gotBlockCh:
   642  				// We got a block!
   643  				// Continue the for-loop and wait til Quit.
   644  				continue WAIT_LOOP
   645  			}
   646  		}
   647  	}
   648  }
   649  
   650  // BlockRequest stores a block request identified by the block Height and the PeerID responsible for
   651  // delivering the block
   652  type BlockRequest struct {
   653  	Height int64
   654  	PeerID p2p.ID
   655  }