github.com/number571/tendermint@v0.34.11-gost/internal/blockchain/v0/pool.go (about)

     1  package v0
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	flow "github.com/number571/tendermint/internal/libs/flowrate"
    11  	tmsync "github.com/number571/tendermint/internal/libs/sync"
    12  	"github.com/number571/tendermint/libs/log"
    13  	"github.com/number571/tendermint/libs/service"
    14  	"github.com/number571/tendermint/types"
    15  )
    16  
    17  /*
    18  eg, L = latency = 0.1s
    19  	P = num peers = 10
    20  	FN = num full nodes
    21  	BS = 1kB block size
    22  	CB = 1 Mbit/s = 128 kB/s
    23  	CB/P = 12.8 kB
    24  	B/S = CB/P/BS = 12.8 blocks/s
    25  
    26  	12.8 * 0.1 = 1.28 blocks on conn
    27  */
    28  
    29  const (
    30  	requestIntervalMS         = 2
    31  	maxTotalRequesters        = 600
    32  	maxPeerErrBuffer          = 1000
    33  	maxPendingRequests        = maxTotalRequesters
    34  	maxPendingRequestsPerPeer = 20
    35  
    36  	// Minimum recv rate to ensure we're receiving blocks from a peer fast
    37  	// enough. If a peer is not sending us data at at least that rate, we
    38  	// consider them to have timedout and we disconnect.
    39  	//
    40  	// Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s,
    41  	// sending data across atlantic ~ 7.5 KB/s.
    42  	minRecvRate = 7680
    43  
    44  	// Maximum difference between current and new block's height.
    45  	maxDiffBetweenCurrentAndReceivedBlockHeight = 100
    46  )
    47  
    48  var peerTimeout = 15 * time.Second // not const so we can override with tests
    49  
    50  /*
    51  	Peers self report their heights when we join the block pool.
    52  	Starting from our latest pool.height, we request blocks
    53  	in sequence from peers that reported higher heights than ours.
    54  	Every so often we ask peers what height they're on so we can keep going.
    55  
    56  	Requests are continuously made for blocks of higher heights until
    57  	the limit is reached. If most of the requests have no available peers, and we
    58  	are not at peer limits, we can probably switch to consensus reactor
    59  */
    60  
    61  // BlockRequest stores a block request identified by the block Height and the
    62  // PeerID responsible for delivering the block.
    63  type BlockRequest struct {
    64  	Height int64
    65  	PeerID types.NodeID
    66  }
    67  
    68  // BlockPool keeps track of the fast sync peers, block requests and block responses.
    69  type BlockPool struct {
    70  	service.BaseService
    71  	lastAdvance time.Time
    72  
    73  	mtx tmsync.RWMutex
    74  	// block requests
    75  	requesters map[int64]*bpRequester
    76  	height     int64 // the lowest key in requesters.
    77  	// peers
    78  	peers         map[types.NodeID]*bpPeer
    79  	maxPeerHeight int64 // the biggest reported height
    80  
    81  	// atomic
    82  	numPending int32 // number of requests pending assignment or block response
    83  
    84  	requestsCh chan<- BlockRequest
    85  	errorsCh   chan<- peerError
    86  
    87  	startHeight               int64
    88  	lastHundredBlockTimeStamp time.Time
    89  	lastSyncRate              float64
    90  }
    91  
    92  // NewBlockPool returns a new BlockPool with the height equal to start. Block
    93  // requests and errors will be sent to requestsCh and errorsCh accordingly.
    94  func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool {
    95  	bp := &BlockPool{
    96  		peers: make(map[types.NodeID]*bpPeer),
    97  
    98  		requesters:  make(map[int64]*bpRequester),
    99  		height:      start,
   100  		startHeight: start,
   101  		numPending:  0,
   102  
   103  		requestsCh:   requestsCh,
   104  		errorsCh:     errorsCh,
   105  		lastSyncRate: 0,
   106  	}
   107  	bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp)
   108  	return bp
   109  }
   110  
   111  // OnStart implements service.Service by spawning requesters routine and recording
   112  // pool's start time.
   113  func (pool *BlockPool) OnStart() error {
   114  	pool.lastAdvance = time.Now()
   115  	pool.lastHundredBlockTimeStamp = pool.lastAdvance
   116  	go pool.makeRequestersRoutine()
   117  	return nil
   118  }
   119  
   120  // spawns requesters as needed
   121  func (pool *BlockPool) makeRequestersRoutine() {
   122  	for {
   123  		if !pool.IsRunning() {
   124  			break
   125  		}
   126  
   127  		_, numPending, lenRequesters := pool.GetStatus()
   128  		switch {
   129  		case numPending >= maxPendingRequests:
   130  			// sleep for a bit.
   131  			time.Sleep(requestIntervalMS * time.Millisecond)
   132  			// check for timed out peers
   133  			pool.removeTimedoutPeers()
   134  		case lenRequesters >= maxTotalRequesters:
   135  			// sleep for a bit.
   136  			time.Sleep(requestIntervalMS * time.Millisecond)
   137  			// check for timed out peers
   138  			pool.removeTimedoutPeers()
   139  		default:
   140  			// request for more blocks.
   141  			pool.makeNextRequester()
   142  		}
   143  	}
   144  }
   145  
   146  func (pool *BlockPool) removeTimedoutPeers() {
   147  	pool.mtx.Lock()
   148  	defer pool.mtx.Unlock()
   149  
   150  	for _, peer := range pool.peers {
   151  		// check if peer timed out
   152  		if !peer.didTimeout && peer.numPending > 0 {
   153  			curRate := peer.recvMonitor.Status().CurRate
   154  			// curRate can be 0 on start
   155  			if curRate != 0 && curRate < minRecvRate {
   156  				err := errors.New("peer is not sending us data fast enough")
   157  				pool.sendError(err, peer.id)
   158  				pool.Logger.Error("SendTimeout", "peer", peer.id,
   159  					"reason", err,
   160  					"curRate", fmt.Sprintf("%d KB/s", curRate/1024),
   161  					"minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024))
   162  				peer.didTimeout = true
   163  			}
   164  		}
   165  
   166  		if peer.didTimeout {
   167  			pool.removePeer(peer.id)
   168  		}
   169  	}
   170  }
   171  
   172  // GetStatus returns pool's height, numPending requests and the number of
   173  // requesters.
   174  func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) {
   175  	pool.mtx.RLock()
   176  	defer pool.mtx.RUnlock()
   177  
   178  	return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters)
   179  }
   180  
   181  // IsCaughtUp returns true if this node is caught up, false - otherwise.
   182  func (pool *BlockPool) IsCaughtUp() bool {
   183  	pool.mtx.RLock()
   184  	defer pool.mtx.RUnlock()
   185  
   186  	// Need at least 1 peer to be considered caught up.
   187  	if len(pool.peers) == 0 {
   188  		return false
   189  	}
   190  
   191  	// NOTE: we use maxPeerHeight - 1 because to sync block H requires block H+1
   192  	// to verify the LastCommit.
   193  	return pool.height >= (pool.maxPeerHeight - 1)
   194  }
   195  
   196  // PeekTwoBlocks returns blocks at pool.height and pool.height+1.
   197  // We need to see the second block's Commit to validate the first block.
   198  // So we peek two blocks at a time.
   199  // The caller will verify the commit.
   200  func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) {
   201  	pool.mtx.RLock()
   202  	defer pool.mtx.RUnlock()
   203  
   204  	if r := pool.requesters[pool.height]; r != nil {
   205  		first = r.getBlock()
   206  	}
   207  	if r := pool.requesters[pool.height+1]; r != nil {
   208  		second = r.getBlock()
   209  	}
   210  	return
   211  }
   212  
   213  // PopRequest pops the first block at pool.height.
   214  // It must have been validated by 'second'.Commit from PeekTwoBlocks().
   215  func (pool *BlockPool) PopRequest() {
   216  	pool.mtx.Lock()
   217  	defer pool.mtx.Unlock()
   218  
   219  	if r := pool.requesters[pool.height]; r != nil {
   220  		if err := r.Stop(); err != nil {
   221  			pool.Logger.Error("Error stopping requester", "err", err)
   222  		}
   223  		delete(pool.requesters, pool.height)
   224  		pool.height++
   225  		pool.lastAdvance = time.Now()
   226  
   227  		// the lastSyncRate will be updated every 100 blocks, it uses the adaptive filter
   228  		// to smooth the block sync rate and the unit represents the number of blocks per second.
   229  		if (pool.height-pool.startHeight)%100 == 0 {
   230  			newSyncRate := 100 / time.Since(pool.lastHundredBlockTimeStamp).Seconds()
   231  			if pool.lastSyncRate == 0 {
   232  				pool.lastSyncRate = newSyncRate
   233  			} else {
   234  				pool.lastSyncRate = 0.9*pool.lastSyncRate + 0.1*newSyncRate
   235  			}
   236  			pool.lastHundredBlockTimeStamp = time.Now()
   237  		}
   238  
   239  	} else {
   240  		panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height))
   241  	}
   242  }
   243  
   244  // RedoRequest invalidates the block at pool.height,
   245  // Remove the peer and redo request from others.
   246  // Returns the ID of the removed peer.
   247  func (pool *BlockPool) RedoRequest(height int64) types.NodeID {
   248  	pool.mtx.Lock()
   249  	defer pool.mtx.Unlock()
   250  
   251  	request := pool.requesters[height]
   252  	peerID := request.getPeerID()
   253  	if peerID != types.NodeID("") {
   254  		// RemovePeer will redo all requesters associated with this peer.
   255  		pool.removePeer(peerID)
   256  	}
   257  	return peerID
   258  }
   259  
   260  // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it.
   261  // TODO: ensure that blocks come in order for each peer.
   262  func (pool *BlockPool) AddBlock(peerID types.NodeID, block *types.Block, blockSize int) {
   263  	pool.mtx.Lock()
   264  	defer pool.mtx.Unlock()
   265  
   266  	requester := pool.requesters[block.Height]
   267  	if requester == nil {
   268  		pool.Logger.Error("peer sent us a block we didn't expect",
   269  			"peer", peerID, "curHeight", pool.height, "blockHeight", block.Height)
   270  		diff := pool.height - block.Height
   271  		if diff < 0 {
   272  			diff *= -1
   273  		}
   274  		if diff > maxDiffBetweenCurrentAndReceivedBlockHeight {
   275  			pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID)
   276  		}
   277  		return
   278  	}
   279  
   280  	if requester.setBlock(block, peerID) {
   281  		atomic.AddInt32(&pool.numPending, -1)
   282  		peer := pool.peers[peerID]
   283  		if peer != nil {
   284  			peer.decrPending(blockSize)
   285  		}
   286  	} else {
   287  		err := errors.New("requester is different or block already exists")
   288  		pool.Logger.Error(err.Error(), "peer", peerID, "requester", requester.getPeerID(), "blockHeight", block.Height)
   289  		pool.sendError(err, peerID)
   290  	}
   291  }
   292  
   293  // MaxPeerHeight returns the highest reported height.
   294  func (pool *BlockPool) MaxPeerHeight() int64 {
   295  	pool.mtx.RLock()
   296  	defer pool.mtx.RUnlock()
   297  	return pool.maxPeerHeight
   298  }
   299  
   300  // LastAdvance returns the time when the last block was processed (or start
   301  // time if no blocks were processed).
   302  func (pool *BlockPool) LastAdvance() time.Time {
   303  	pool.mtx.RLock()
   304  	defer pool.mtx.RUnlock()
   305  	return pool.lastAdvance
   306  }
   307  
   308  // SetPeerRange sets the peer's alleged blockchain base and height.
   309  func (pool *BlockPool) SetPeerRange(peerID types.NodeID, base int64, height int64) {
   310  	pool.mtx.Lock()
   311  	defer pool.mtx.Unlock()
   312  
   313  	peer := pool.peers[peerID]
   314  	if peer != nil {
   315  		peer.base = base
   316  		peer.height = height
   317  	} else {
   318  		peer = newBPPeer(pool, peerID, base, height)
   319  		peer.setLogger(pool.Logger.With("peer", peerID))
   320  		pool.peers[peerID] = peer
   321  	}
   322  
   323  	if height > pool.maxPeerHeight {
   324  		pool.maxPeerHeight = height
   325  	}
   326  }
   327  
   328  // RemovePeer removes the peer with peerID from the pool. If there's no peer
   329  // with peerID, function is a no-op.
   330  func (pool *BlockPool) RemovePeer(peerID types.NodeID) {
   331  	pool.mtx.Lock()
   332  	defer pool.mtx.Unlock()
   333  
   334  	pool.removePeer(peerID)
   335  }
   336  
   337  func (pool *BlockPool) removePeer(peerID types.NodeID) {
   338  	for _, requester := range pool.requesters {
   339  		if requester.getPeerID() == peerID {
   340  			requester.redo(peerID)
   341  		}
   342  	}
   343  
   344  	peer, ok := pool.peers[peerID]
   345  	if ok {
   346  		if peer.timeout != nil {
   347  			peer.timeout.Stop()
   348  		}
   349  
   350  		delete(pool.peers, peerID)
   351  
   352  		// Find a new peer with the biggest height and update maxPeerHeight if the
   353  		// peer's height was the biggest.
   354  		if peer.height == pool.maxPeerHeight {
   355  			pool.updateMaxPeerHeight()
   356  		}
   357  	}
   358  }
   359  
   360  // If no peers are left, maxPeerHeight is set to 0.
   361  func (pool *BlockPool) updateMaxPeerHeight() {
   362  	var max int64
   363  	for _, peer := range pool.peers {
   364  		if peer.height > max {
   365  			max = peer.height
   366  		}
   367  	}
   368  	pool.maxPeerHeight = max
   369  }
   370  
   371  // Pick an available peer with the given height available.
   372  // If no peers are available, returns nil.
   373  func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer {
   374  	pool.mtx.Lock()
   375  	defer pool.mtx.Unlock()
   376  
   377  	for _, peer := range pool.peers {
   378  		if peer.didTimeout {
   379  			pool.removePeer(peer.id)
   380  			continue
   381  		}
   382  		if peer.numPending >= maxPendingRequestsPerPeer {
   383  			continue
   384  		}
   385  		if height < peer.base || height > peer.height {
   386  			continue
   387  		}
   388  		peer.incrPending()
   389  		return peer
   390  	}
   391  	return nil
   392  }
   393  
   394  func (pool *BlockPool) makeNextRequester() {
   395  	pool.mtx.Lock()
   396  	defer pool.mtx.Unlock()
   397  
   398  	nextHeight := pool.height + pool.requestersLen()
   399  	if nextHeight > pool.maxPeerHeight {
   400  		return
   401  	}
   402  
   403  	request := newBPRequester(pool, nextHeight)
   404  
   405  	pool.requesters[nextHeight] = request
   406  	atomic.AddInt32(&pool.numPending, 1)
   407  
   408  	err := request.Start()
   409  	if err != nil {
   410  		request.Logger.Error("Error starting request", "err", err)
   411  	}
   412  }
   413  
   414  func (pool *BlockPool) requestersLen() int64 {
   415  	return int64(len(pool.requesters))
   416  }
   417  
   418  func (pool *BlockPool) sendRequest(height int64, peerID types.NodeID) {
   419  	if !pool.IsRunning() {
   420  		return
   421  	}
   422  	pool.requestsCh <- BlockRequest{height, peerID}
   423  }
   424  
   425  func (pool *BlockPool) sendError(err error, peerID types.NodeID) {
   426  	if !pool.IsRunning() {
   427  		return
   428  	}
   429  	pool.errorsCh <- peerError{err, peerID}
   430  }
   431  
   432  // for debugging purposes
   433  //nolint:unused
   434  func (pool *BlockPool) debug() string {
   435  	pool.mtx.Lock()
   436  	defer pool.mtx.Unlock()
   437  
   438  	str := ""
   439  	nextHeight := pool.height + pool.requestersLen()
   440  	for h := pool.height; h < nextHeight; h++ {
   441  		if pool.requesters[h] == nil {
   442  			str += fmt.Sprintf("H(%v):X ", h)
   443  		} else {
   444  			str += fmt.Sprintf("H(%v):", h)
   445  			str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil)
   446  		}
   447  	}
   448  	return str
   449  }
   450  
   451  func (pool *BlockPool) targetSyncBlocks() int64 {
   452  	pool.mtx.RLock()
   453  	defer pool.mtx.RUnlock()
   454  
   455  	return pool.maxPeerHeight - pool.startHeight + 1
   456  }
   457  
   458  func (pool *BlockPool) getLastSyncRate() float64 {
   459  	pool.mtx.RLock()
   460  	defer pool.mtx.RUnlock()
   461  
   462  	return pool.lastSyncRate
   463  }
   464  
   465  //-------------------------------------
   466  
   467  type bpPeer struct {
   468  	didTimeout  bool
   469  	numPending  int32
   470  	height      int64
   471  	base        int64
   472  	pool        *BlockPool
   473  	id          types.NodeID
   474  	recvMonitor *flow.Monitor
   475  
   476  	timeout *time.Timer
   477  
   478  	logger log.Logger
   479  }
   480  
   481  func newBPPeer(pool *BlockPool, peerID types.NodeID, base int64, height int64) *bpPeer {
   482  	peer := &bpPeer{
   483  		pool:       pool,
   484  		id:         peerID,
   485  		base:       base,
   486  		height:     height,
   487  		numPending: 0,
   488  		logger:     log.NewNopLogger(),
   489  	}
   490  	return peer
   491  }
   492  
   493  func (peer *bpPeer) setLogger(l log.Logger) {
   494  	peer.logger = l
   495  }
   496  
   497  func (peer *bpPeer) resetMonitor() {
   498  	peer.recvMonitor = flow.New(time.Second, time.Second*40)
   499  	initialValue := float64(minRecvRate) * math.E
   500  	peer.recvMonitor.SetREMA(initialValue)
   501  }
   502  
   503  func (peer *bpPeer) resetTimeout() {
   504  	if peer.timeout == nil {
   505  		peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout)
   506  	} else {
   507  		peer.timeout.Reset(peerTimeout)
   508  	}
   509  }
   510  
   511  func (peer *bpPeer) incrPending() {
   512  	if peer.numPending == 0 {
   513  		peer.resetMonitor()
   514  		peer.resetTimeout()
   515  	}
   516  	peer.numPending++
   517  }
   518  
   519  func (peer *bpPeer) decrPending(recvSize int) {
   520  	peer.numPending--
   521  	if peer.numPending == 0 {
   522  		peer.timeout.Stop()
   523  	} else {
   524  		peer.recvMonitor.Update(recvSize)
   525  		peer.resetTimeout()
   526  	}
   527  }
   528  
   529  func (peer *bpPeer) onTimeout() {
   530  	peer.pool.mtx.Lock()
   531  	defer peer.pool.mtx.Unlock()
   532  
   533  	err := errors.New("peer did not send us anything")
   534  	peer.pool.sendError(err, peer.id)
   535  	peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout)
   536  	peer.didTimeout = true
   537  }
   538  
   539  //-------------------------------------
   540  
   541  type bpRequester struct {
   542  	service.BaseService
   543  	pool       *BlockPool
   544  	height     int64
   545  	gotBlockCh chan struct{}
   546  	redoCh     chan types.NodeID // redo may send multitime, add peerId to identify repeat
   547  
   548  	mtx    tmsync.Mutex
   549  	peerID types.NodeID
   550  	block  *types.Block
   551  }
   552  
   553  func newBPRequester(pool *BlockPool, height int64) *bpRequester {
   554  	bpr := &bpRequester{
   555  		pool:       pool,
   556  		height:     height,
   557  		gotBlockCh: make(chan struct{}, 1),
   558  		redoCh:     make(chan types.NodeID, 1),
   559  
   560  		peerID: "",
   561  		block:  nil,
   562  	}
   563  	bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr)
   564  	return bpr
   565  }
   566  
   567  func (bpr *bpRequester) OnStart() error {
   568  	go bpr.requestRoutine()
   569  	return nil
   570  }
   571  
   572  // Returns true if the peer matches and block doesn't already exist.
   573  func (bpr *bpRequester) setBlock(block *types.Block, peerID types.NodeID) bool {
   574  	bpr.mtx.Lock()
   575  	if bpr.block != nil || bpr.peerID != peerID {
   576  		bpr.mtx.Unlock()
   577  		return false
   578  	}
   579  	bpr.block = block
   580  	bpr.mtx.Unlock()
   581  
   582  	select {
   583  	case bpr.gotBlockCh <- struct{}{}:
   584  	default:
   585  	}
   586  	return true
   587  }
   588  
   589  func (bpr *bpRequester) getBlock() *types.Block {
   590  	bpr.mtx.Lock()
   591  	defer bpr.mtx.Unlock()
   592  	return bpr.block
   593  }
   594  
   595  func (bpr *bpRequester) getPeerID() types.NodeID {
   596  	bpr.mtx.Lock()
   597  	defer bpr.mtx.Unlock()
   598  	return bpr.peerID
   599  }
   600  
   601  // This is called from the requestRoutine, upon redo().
   602  func (bpr *bpRequester) reset() {
   603  	bpr.mtx.Lock()
   604  	defer bpr.mtx.Unlock()
   605  
   606  	if bpr.block != nil {
   607  		atomic.AddInt32(&bpr.pool.numPending, 1)
   608  	}
   609  
   610  	bpr.peerID = ""
   611  	bpr.block = nil
   612  }
   613  
   614  // Tells bpRequester to pick another peer and try again.
   615  // NOTE: Nonblocking, and does nothing if another redo
   616  // was already requested.
   617  func (bpr *bpRequester) redo(peerID types.NodeID) {
   618  	select {
   619  	case bpr.redoCh <- peerID:
   620  	default:
   621  	}
   622  }
   623  
   624  // Responsible for making more requests as necessary
   625  // Returns only when a block is found (e.g. AddBlock() is called)
   626  func (bpr *bpRequester) requestRoutine() {
   627  OUTER_LOOP:
   628  	for {
   629  		// Pick a peer to send request to.
   630  		var peer *bpPeer
   631  	PICK_PEER_LOOP:
   632  		for {
   633  			if !bpr.IsRunning() || !bpr.pool.IsRunning() {
   634  				return
   635  			}
   636  			peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
   637  			if peer == nil {
   638  				time.Sleep(requestIntervalMS * time.Millisecond)
   639  				continue PICK_PEER_LOOP
   640  			}
   641  			break PICK_PEER_LOOP
   642  		}
   643  		bpr.mtx.Lock()
   644  		bpr.peerID = peer.id
   645  		bpr.mtx.Unlock()
   646  
   647  		// Send request and wait.
   648  		bpr.pool.sendRequest(bpr.height, peer.id)
   649  	WAIT_LOOP:
   650  		for {
   651  			select {
   652  			case <-bpr.pool.Quit():
   653  				if err := bpr.Stop(); err != nil {
   654  					bpr.Logger.Error("Error stopped requester", "err", err)
   655  				}
   656  				return
   657  			case <-bpr.Quit():
   658  				return
   659  			case peerID := <-bpr.redoCh:
   660  				if peerID == bpr.peerID {
   661  					bpr.reset()
   662  					continue OUTER_LOOP
   663  				} else {
   664  					continue WAIT_LOOP
   665  				}
   666  			case <-bpr.gotBlockCh:
   667  				// We got a block!
   668  				// Continue the for-loop and wait til Quit.
   669  				continue WAIT_LOOP
   670  			}
   671  		}
   672  	}
   673  }