github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/tm2/pkg/bft/blockchain/pool.go (about)

     1  package blockchain
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"log/slog"
     7  	"math"
     8  	"sync"
     9  	"sync/atomic"
    10  	"time"
    11  
    12  	"github.com/gnolang/gno/tm2/pkg/bft/types"
    13  	"github.com/gnolang/gno/tm2/pkg/flow"
    14  	"github.com/gnolang/gno/tm2/pkg/log"
    15  	"github.com/gnolang/gno/tm2/pkg/p2p"
    16  	"github.com/gnolang/gno/tm2/pkg/service"
    17  )
    18  
    19  /*
    20  eg, L = latency = 0.1s
    21  	P = num peers = 10
    22  	FN = num full nodes
    23  	BS = 1kB block size
    24  	CB = 1 Mbit/s = 128 kB/s
    25  	CB/P = 12.8 kB
    26  	B/S = CB/P/BS = 12.8 blocks/s
    27  
    28  	12.8 * 0.1 = 1.28 blocks on conn
    29  */
    30  
    31  const (
    32  	requestIntervalMS         = 2
    33  	maxTotalRequesters        = 600
    34  	maxPendingRequests        = maxTotalRequesters
    35  	maxPendingRequestsPerPeer = 20
    36  
    37  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    38  	// enough. If a peer is not sending us data at at least that rate, we
    39  	// consider them to have timedout and we disconnect.
    40  	//
    41  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    42  	// sending data across atlantic ~ 7.5 KB/s.
    43  	minRecvRate = 7680
    44  
    45  	// Maximum difference between current and new block's height.
    46  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    47  )
    48  
    49  var peerTimeout = 15 * time.Second // not const so we can override with tests
    50  
    51  /*
    52  	Peers self report their heights when we join the block pool.
    53  	Starting from our latest pool.height, we request blocks
    54  	in sequence from peers that reported higher heights than ours.
    55  	Every so often we ask peers what height they're on so we can keep going.
    56  
    57  	Requests are continuously made for blocks of higher heights until
    58  	the limit is reached. If most of the requests have no available peers, and we
    59  	are not at peer limits, we can probably switch to consensus reactor
    60  */
    61  
    62  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    63  type BlockPool struct {
    64  	service.BaseService
    65  	startTime time.Time
    66  
    67  	mtx sync.Mutex
    68  	// block requests
    69  	requesters map[int64]*bpRequester
    70  	height     int64 // the lowest key in requesters.
    71  	// peers
    72  	peers         map[p2p.ID]*bpPeer
    73  	maxPeerHeight int64 // the biggest reported height
    74  
    75  	// atomic
    76  	numPending int32 // number of requests pending assignment or block response
    77  
    78  	requestsCh chan<- BlockRequest
    79  	errorsCh   chan<- peerError
    80  }
    81  
    82  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    83  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    84  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    85  	bp := &BlockPool{
    86  		peers: make(map[p2p.ID]*bpPeer),
    87  
    88  		requesters: make(map[int64]*bpRequester),
    89  		height:     start,
    90  		numPending: 0,
    91  
    92  		requestsCh: requestsCh,
    93  		errorsCh:   errorsCh,
    94  	}
    95  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
    96  	return bp
    97  }
    98  
    99  // OnStart implements service.Service by spawning requesters routine and recording
   100  // pool's start time.
   101  func (pool *BlockPool) OnStart() error {
   102  	go pool.makeRequestersRoutine()
   103  	pool.startTime = time.Now()
   104  	return nil
   105  }
   106  
   107  // spawns requesters as needed
   108  func (pool *BlockPool) makeRequestersRoutine() {
   109  	for {
   110  		if !pool.IsRunning() {
   111  			break
   112  		}
   113  
   114  		_, numPending, lenRequesters := pool.GetStatus()
   115  		switch {
   116  		case numPending >= maxPendingRequests:
   117  			// sleep for a bit.
   118  			time.Sleep(requestIntervalMS * time.Millisecond)
   119  			// check for timed out peers
   120  			pool.removeTimedoutPeers()
   121  		case lenRequesters >= maxTotalRequesters:
   122  			// sleep for a bit.
   123  			time.Sleep(requestIntervalMS * time.Millisecond)
   124  			// check for timed out peers
   125  			pool.removeTimedoutPeers()
   126  		default:
   127  			// request for more blocks.
   128  			pool.makeNextRequester()
   129  		}
   130  	}
   131  }
   132  
   133  func (pool *BlockPool) removeTimedoutPeers() {
   134  	pool.mtx.Lock()
   135  	defer pool.mtx.Unlock()
   136  
   137  	for _, peer := range pool.peers {
   138  		if !peer.didTimeout && peer.numPending > 0 {
   139  			curRate := peer.recvMonitor.Status().CurRate
   140  			// curRate can be 0 on start
   141  			if curRate != 0 && curRate < minRecvRate {
   142  				err := errors.New("peer is not sending us data fast enough")
   143  				pool.sendError(err, peer.id)
   144  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   145  					"reason", err,
   146  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   147  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   148  				peer.didTimeout = true
   149  			}
   150  		}
   151  		if peer.didTimeout {
   152  			pool.removePeer(peer.id)
   153  		}
   154  	}
   155  }
   156  
   157  // GetStatus returns pool's height, numPending requests and the number of
   158  // requesters.
   159  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   160  	pool.mtx.Lock()
   161  	defer pool.mtx.Unlock()
   162  
   163  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   164  }
   165  
   166  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   167  // TODO: relax conditions, prevent abuse.
   168  func (pool *BlockPool) IsCaughtUp() bool {
   169  	pool.mtx.Lock()
   170  	defer pool.mtx.Unlock()
   171  
   172  	// Need at least 1 peer to be considered caught up.
   173  	if len(pool.peers) == 0 {
   174  		pool.Logger.Debug("Blockpool has no peers")
   175  		return false
   176  	}
   177  
   178  	// Some conditions to determine if we're caught up.
   179  	// Ensures we've either received a block or waited some amount of time,
   180  	// and that we're synced to the highest known height.
   181  	// Note we use maxPeerHeight - 1 because to sync block H requires block H+1
   182  	// to verify the LastCommit.
   183  	receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second
   184  	ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1)
   185  	isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers
   186  	return isCaughtUp
   187  }
   188  
   189  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   190  // We need to see the second block's Commit to validate the first block.
   191  // So we peek two blocks at a time.
   192  // The caller will verify the commit.
   193  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   194  	pool.mtx.Lock()
   195  	defer pool.mtx.Unlock()
   196  
   197  	if r := pool.requesters[pool.height]; r != nil {
   198  		first = r.getBlock()
   199  	}
   200  	if r := pool.requesters[pool.height+1]; r != nil {
   201  		second = r.getBlock()
   202  	}
   203  	return
   204  }
   205  
   206  // PopRequest pops the first block at pool.height.
   207  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   208  func (pool *BlockPool) PopRequest() {
   209  	pool.mtx.Lock()
   210  	defer pool.mtx.Unlock()
   211  
   212  	if r := pool.requesters[pool.height]; r != nil {
   213  		/*  The block can disappear at any time, due to removePeer().
   214  		if r := pool.requesters[pool.height]; r == nil || r.block == nil {
   215  			PanicSanity("PopRequest() requires a valid block")
   216  		}
   217  		*/
   218  		r.Stop()
   219  		delete(pool.requesters, pool.height)
   220  		pool.height++
   221  	} else {
   222  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   223  	}
   224  }
   225  
   226  // RedoRequest invalidates the block at pool.height,
   227  // Remove the peer and redo request from others.
   228  // Returns the ID of the removed peer.
   229  func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
   230  	pool.mtx.Lock()
   231  	defer pool.mtx.Unlock()
   232  
   233  	request := pool.requesters[height]
   234  	peerID := request.getPeerID()
   235  	if peerID != p2p.ID("") {
   236  		// RemovePeer will redo all requesters associated with this peer.
   237  		pool.removePeer(peerID)
   238  	}
   239  	return peerID
   240  }
   241  
   242  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   243  // TODO: ensure that blocks come in order for each peer.
   244  func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) {
   245  	pool.mtx.Lock()
   246  	defer pool.mtx.Unlock()
   247  
   248  	requester := pool.requesters[block.Height]
   249  	if requester == nil {
   250  		pool.Logger.Info("peer sent us a block we didn't expect", "peer", peerID, "curHeight", pool.height, "blockHeight", block.Height)
   251  		diff := pool.height - block.Height
   252  		if diff < 0 {
   253  			diff *= -1
   254  		}
   255  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   256  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   257  		}
   258  		return
   259  	}
   260  
   261  	if requester.setBlock(block, peerID) {
   262  		atomic.AddInt32(&pool.numPending, -1)
   263  		peer := pool.peers[peerID]
   264  		if peer != nil {
   265  			peer.decrPending(blockSize)
   266  		}
   267  	} else {
   268  		pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height)
   269  		pool.sendError(errors.New("invalid peer"), peerID)
   270  	}
   271  }
   272  
   273  // MaxPeerHeight returns the highest reported height.
   274  func (pool *BlockPool) MaxPeerHeight() int64 {
   275  	pool.mtx.Lock()
   276  	defer pool.mtx.Unlock()
   277  	return pool.maxPeerHeight
   278  }
   279  
   280  // SetPeerHeight sets the peer's alleged blockchain height.
   281  func (pool *BlockPool) SetPeerHeight(peerID p2p.ID, height int64) {
   282  	pool.mtx.Lock()
   283  	defer pool.mtx.Unlock()
   284  
   285  	peer := pool.peers[peerID]
   286  	if peer != nil {
   287  		peer.height = height
   288  	} else {
   289  		peer = newBPPeer(pool, peerID, height)
   290  		peer.setLogger(pool.Logger.With("peer", peerID))
   291  		pool.peers[peerID] = peer
   292  	}
   293  
   294  	if height > pool.maxPeerHeight {
   295  		pool.maxPeerHeight = height
   296  	}
   297  }
   298  
   299  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   300  // with peerID, function is a no-op.
   301  func (pool *BlockPool) RemovePeer(peerID p2p.ID) {
   302  	pool.mtx.Lock()
   303  	defer pool.mtx.Unlock()
   304  
   305  	pool.removePeer(peerID)
   306  }
   307  
   308  func (pool *BlockPool) removePeer(peerID p2p.ID) {
   309  	for _, requester := range pool.requesters {
   310  		if requester.getPeerID() == peerID {
   311  			requester.redo(peerID)
   312  		}
   313  	}
   314  
   315  	peer, ok := pool.peers[peerID]
   316  	if ok {
   317  		if peer.timeout != nil {
   318  			peer.timeout.Stop()
   319  		}
   320  
   321  		delete(pool.peers, peerID)
   322  
   323  		// Find a new peer with the biggest height and update maxPeerHeight if the
   324  		// peer's height was the biggest.
   325  		if peer.height == pool.maxPeerHeight {
   326  			pool.updateMaxPeerHeight()
   327  		}
   328  	}
   329  }
   330  
   331  // If no peers are left, maxPeerHeight is set to 0.
   332  func (pool *BlockPool) updateMaxPeerHeight() {
   333  	var max int64
   334  	for _, peer := range pool.peers {
   335  		if peer.height > max {
   336  			max = peer.height
   337  		}
   338  	}
   339  	pool.maxPeerHeight = max
   340  }
   341  
   342  // Pick an available peer with at least the given minHeight.
   343  // If no peers are available, returns nil.
   344  func (pool *BlockPool) pickIncrAvailablePeer(minHeight int64) *bpPeer {
   345  	pool.mtx.Lock()
   346  	defer pool.mtx.Unlock()
   347  
   348  	for _, peer := range pool.peers {
   349  		if peer.didTimeout {
   350  			pool.removePeer(peer.id)
   351  			continue
   352  		}
   353  		if peer.numPending >= maxPendingRequestsPerPeer {
   354  			continue
   355  		}
   356  		if peer.height < minHeight {
   357  			continue
   358  		}
   359  		peer.incrPending()
   360  		return peer
   361  	}
   362  	return nil
   363  }
   364  
   365  func (pool *BlockPool) makeNextRequester() {
   366  	pool.mtx.Lock()
   367  	defer pool.mtx.Unlock()
   368  
   369  	nextHeight := pool.height + pool.requestersLen()
   370  	if nextHeight > pool.maxPeerHeight {
   371  		return
   372  	}
   373  
   374  	request := newBPRequester(pool, nextHeight)
   375  
   376  	pool.requesters[nextHeight] = request
   377  	atomic.AddInt32(&pool.numPending, 1)
   378  
   379  	err := request.Start()
   380  	if err != nil {
   381  		request.Logger.Error("Error starting request", "err", err)
   382  	}
   383  }
   384  
   385  func (pool *BlockPool) requestersLen() int64 {
   386  	return int64(len(pool.requesters))
   387  }
   388  
   389  func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) {
   390  	if !pool.IsRunning() {
   391  		return
   392  	}
   393  	pool.requestsCh <- BlockRequest{height, peerID}
   394  }
   395  
   396  func (pool *BlockPool) sendError(err error, peerID p2p.ID) {
   397  	if !pool.IsRunning() {
   398  		return
   399  	}
   400  	pool.errorsCh <- peerError{err, peerID}
   401  }
   402  
   403  // for debugging purposes
   404  //
   405  //nolint:unused
   406  func (pool *BlockPool) debug() string {
   407  	pool.mtx.Lock()
   408  	defer pool.mtx.Unlock()
   409  
   410  	str := ""
   411  	nextHeight := pool.height + pool.requestersLen()
   412  	for h := pool.height; h < nextHeight; h++ {
   413  		if pool.requesters[h] == nil {
   414  			str += fmt.Sprintf("H(%v):X ", h)
   415  		} else {
   416  			str += fmt.Sprintf("H(%v):", h)
   417  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   418  		}
   419  	}
   420  	return str
   421  }
   422  
   423  // -------------------------------------
   424  
   425  type bpPeer struct {
   426  	pool        *BlockPool
   427  	id          p2p.ID
   428  	recvMonitor *flow.Monitor
   429  
   430  	height     int64
   431  	numPending int32
   432  	timeout    *time.Timer
   433  	didTimeout bool
   434  
   435  	logger *slog.Logger
   436  }
   437  
   438  func newBPPeer(pool *BlockPool, peerID p2p.ID, height int64) *bpPeer {
   439  	peer := &bpPeer{
   440  		pool:       pool,
   441  		id:         peerID,
   442  		height:     height,
   443  		numPending: 0,
   444  		logger:     log.NewNoopLogger(),
   445  	}
   446  	return peer
   447  }
   448  
   449  func (peer *bpPeer) setLogger(l *slog.Logger) {
   450  	peer.logger = l
   451  }
   452  
   453  func (peer *bpPeer) resetMonitor() {
   454  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   455  	initialValue := float64(minRecvRate) * math.E
   456  	peer.recvMonitor.SetREMA(initialValue)
   457  }
   458  
   459  func (peer *bpPeer) resetTimeout() {
   460  	if peer.timeout == nil {
   461  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   462  	} else {
   463  		peer.timeout.Reset(peerTimeout)
   464  	}
   465  }
   466  
   467  func (peer *bpPeer) incrPending() {
   468  	if peer.numPending == 0 {
   469  		peer.resetMonitor()
   470  		peer.resetTimeout()
   471  	}
   472  	peer.numPending++
   473  }
   474  
   475  func (peer *bpPeer) decrPending(recvSize int) {
   476  	peer.numPending--
   477  	if peer.numPending == 0 {
   478  		peer.timeout.Stop()
   479  	} else {
   480  		peer.recvMonitor.Update(recvSize)
   481  		peer.resetTimeout()
   482  	}
   483  }
   484  
   485  func (peer *bpPeer) onTimeout() {
   486  	peer.pool.mtx.Lock()
   487  	defer peer.pool.mtx.Unlock()
   488  
   489  	err := errors.New("peer did not send us anything")
   490  	peer.pool.sendError(err, peer.id)
   491  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   492  	peer.didTimeout = true
   493  }
   494  
   495  // -------------------------------------
   496  
   497  type bpRequester struct {
   498  	service.BaseService
   499  	pool       *BlockPool
   500  	height     int64
   501  	gotBlockCh chan struct{}
   502  	redoCh     chan p2p.ID // redo may send multitime, add peerId to identify repeat
   503  
   504  	mtx    sync.Mutex
   505  	peerID p2p.ID
   506  	block  *types.Block
   507  }
   508  
   509  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   510  	bpr := &bpRequester{
   511  		pool:       pool,
   512  		height:     height,
   513  		gotBlockCh: make(chan struct{}, 1),
   514  		redoCh:     make(chan p2p.ID, 1),
   515  
   516  		peerID: "",
   517  		block:  nil,
   518  	}
   519  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   520  	return bpr
   521  }
   522  
   523  func (bpr *bpRequester) OnStart() error {
   524  	go bpr.requestRoutine()
   525  	return nil
   526  }
   527  
   528  // Returns true if the peer matches and block doesn't already exist.
   529  func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool {
   530  	bpr.mtx.Lock()
   531  	if bpr.block != nil || bpr.peerID != peerID {
   532  		bpr.mtx.Unlock()
   533  		return false
   534  	}
   535  	bpr.block = block
   536  	bpr.mtx.Unlock()
   537  
   538  	select {
   539  	case bpr.gotBlockCh <- struct{}{}:
   540  	default:
   541  	}
   542  	return true
   543  }
   544  
   545  func (bpr *bpRequester) getBlock() *types.Block {
   546  	bpr.mtx.Lock()
   547  	defer bpr.mtx.Unlock()
   548  	return bpr.block
   549  }
   550  
   551  func (bpr *bpRequester) getPeerID() p2p.ID {
   552  	bpr.mtx.Lock()
   553  	defer bpr.mtx.Unlock()
   554  	return bpr.peerID
   555  }
   556  
   557  // This is called from the requestRoutine, upon redo().
   558  func (bpr *bpRequester) reset() {
   559  	bpr.mtx.Lock()
   560  	defer bpr.mtx.Unlock()
   561  
   562  	if bpr.block != nil {
   563  		atomic.AddInt32(&bpr.pool.numPending, 1)
   564  	}
   565  
   566  	bpr.peerID = ""
   567  	bpr.block = nil
   568  }
   569  
   570  // Tells bpRequester to pick another peer and try again.
   571  // NOTE: Nonblocking, and does nothing if another redo
   572  // was already requested.
   573  func (bpr *bpRequester) redo(peerID p2p.ID) {
   574  	select {
   575  	case bpr.redoCh <- peerID:
   576  	default:
   577  	}
   578  }
   579  
   580  // Responsible for making more requests as necessary
   581  // Returns only when a block is found (e.g. AddBlock() is called)
   582  func (bpr *bpRequester) requestRoutine() {
   583  OUTER_LOOP:
   584  	for {
   585  		// Pick a peer to send request to.
   586  		var peer *bpPeer
   587  	PICK_PEER_LOOP:
   588  		for {
   589  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   590  				return
   591  			}
   592  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   593  			if peer == nil {
   594  				// log.Info("No peers available", "height", height)
   595  				time.Sleep(requestIntervalMS * time.Millisecond)
   596  				continue PICK_PEER_LOOP
   597  			}
   598  			break PICK_PEER_LOOP
   599  		}
   600  		bpr.mtx.Lock()
   601  		bpr.peerID = peer.id
   602  		bpr.mtx.Unlock()
   603  
   604  		// Send request and wait.
   605  		bpr.pool.sendRequest(bpr.height, peer.id)
   606  	WAIT_LOOP:
   607  		for {
   608  			select {
   609  			case <-bpr.pool.Quit():
   610  				bpr.Stop()
   611  				return
   612  			case <-bpr.Quit():
   613  				return
   614  			case peerID := <-bpr.redoCh:
   615  				if peerID == bpr.peerID {
   616  					bpr.reset()
   617  					continue OUTER_LOOP
   618  				} else {
   619  					continue WAIT_LOOP
   620  				}
   621  			case <-bpr.gotBlockCh:
   622  				// We got a block!
   623  				// Continue the for-loop and wait til Quit.
   624  				continue WAIT_LOOP
   625  			}
   626  		}
   627  	}
   628  }
   629  
   630  // BlockRequest stores a block request identified by the block Height and the PeerID responsible for
   631  // delivering the block
   632  type BlockRequest struct {
   633  	Height int64
   634  	PeerID p2p.ID
   635  }