github.com/vipernet-xyz/tendermint-core@v0.32.0/blockchain/v1/reactor_fsm.go (about)

     1  package v1
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/tendermint/tendermint/libs/log"
    10  	"github.com/tendermint/tendermint/p2p"
    11  	"github.com/tendermint/tendermint/types"
    12  )
    13  
    14  // Blockchain Reactor State
    15  type bcReactorFSMState struct {
    16  	name string
    17  
    18  	// called when transitioning out of current state
    19  	handle func(*BcReactorFSM, bReactorEvent, bReactorEventData) (next *bcReactorFSMState, err error)
    20  	// called when entering the state
    21  	enter func(fsm *BcReactorFSM)
    22  
    23  	// timeout to ensure FSM is not stuck in a state forever
    24  	// the timer is owned and run by the fsm instance
    25  	timeout time.Duration
    26  }
    27  
    28  func (s *bcReactorFSMState) String() string {
    29  	return s.name
    30  }
    31  
    32  // BcReactorFSM is the datastructure for the Blockchain Reactor State Machine
    33  type BcReactorFSM struct {
    34  	logger log.Logger
    35  	mtx    sync.Mutex
    36  
    37  	startTime time.Time
    38  
    39  	state      *bcReactorFSMState
    40  	stateTimer *time.Timer
    41  	pool       *BlockPool
    42  
    43  	// interface used to call the Blockchain reactor to send StatusRequest, BlockRequest, reporting errors, etc.
    44  	toBcR bcReactor
    45  }
    46  
    47  // NewFSM creates a new reactor FSM.
    48  func NewFSM(height int64, toBcR bcReactor) *BcReactorFSM {
    49  	return &BcReactorFSM{
    50  		state:     unknown,
    51  		startTime: time.Now(),
    52  		pool:      NewBlockPool(height, toBcR),
    53  		toBcR:     toBcR,
    54  	}
    55  }
    56  
    57  // bReactorEventData is part of the message sent by the reactor to the FSM and used by the state handlers.
    58  type bReactorEventData struct {
    59  	peerID         p2p.ID
    60  	err            error        // for peer error: timeout, slow; for processed block event if error occurred
    61  	base           int64        // for status response
    62  	height         int64        // for status response; for processed block event
    63  	block          *types.Block // for block response
    64  	stateName      string       // for state timeout events
    65  	length         int          // for block response event, length of received block, used to detect slow peers
    66  	maxNumRequests int          // for request needed event, maximum number of pending requests
    67  }
    68  
    69  // Blockchain Reactor Events (the input to the state machine)
    70  type bReactorEvent uint
    71  
    72  const (
    73  	// message type events
    74  	startFSMEv = iota + 1
    75  	statusResponseEv
    76  	blockResponseEv
    77  	processedBlockEv
    78  	makeRequestsEv
    79  	stopFSMEv
    80  
    81  	// other events
    82  	peerRemoveEv = iota + 256
    83  	stateTimeoutEv
    84  )
    85  
    86  func (msg *bcReactorMessage) String() string {
    87  	var dataStr string
    88  
    89  	switch msg.event {
    90  	case startFSMEv:
    91  		dataStr = ""
    92  	case statusResponseEv:
    93  		dataStr = fmt.Sprintf("peer=%v base=%v height=%v", msg.data.peerID, msg.data.base, msg.data.height)
    94  	case blockResponseEv:
    95  		dataStr = fmt.Sprintf("peer=%v block.height=%v length=%v",
    96  			msg.data.peerID, msg.data.block.Height, msg.data.length)
    97  	case processedBlockEv:
    98  		dataStr = fmt.Sprintf("error=%v", msg.data.err)
    99  	case makeRequestsEv:
   100  		dataStr = ""
   101  	case stopFSMEv:
   102  		dataStr = ""
   103  	case peerRemoveEv:
   104  		dataStr = fmt.Sprintf("peer: %v is being removed by the switch", msg.data.peerID)
   105  	case stateTimeoutEv:
   106  		dataStr = fmt.Sprintf("state=%v", msg.data.stateName)
   107  	default:
   108  		dataStr = fmt.Sprintf("cannot interpret message data")
   109  	}
   110  
   111  	return fmt.Sprintf("%v: %v", msg.event, dataStr)
   112  }
   113  
   114  func (ev bReactorEvent) String() string {
   115  	switch ev {
   116  	case startFSMEv:
   117  		return "startFSMEv"
   118  	case statusResponseEv:
   119  		return "statusResponseEv"
   120  	case blockResponseEv:
   121  		return "blockResponseEv"
   122  	case processedBlockEv:
   123  		return "processedBlockEv"
   124  	case makeRequestsEv:
   125  		return "makeRequestsEv"
   126  	case stopFSMEv:
   127  		return "stopFSMEv"
   128  	case peerRemoveEv:
   129  		return "peerRemoveEv"
   130  	case stateTimeoutEv:
   131  		return "stateTimeoutEv"
   132  	default:
   133  		return "event unknown"
   134  	}
   135  
   136  }
   137  
   138  // states
   139  var (
   140  	unknown      *bcReactorFSMState
   141  	waitForPeer  *bcReactorFSMState
   142  	waitForBlock *bcReactorFSMState
   143  	finished     *bcReactorFSMState
   144  )
   145  
   146  // timeouts for state timers
   147  const (
   148  	waitForPeerTimeout                 = 10 * time.Second
   149  	waitForBlockAtCurrentHeightTimeout = 60 * time.Second
   150  )
   151  
   152  // errors
   153  var (
   154  	// internal to the package
   155  	errNoErrorFinished        = errors.New("fast sync is finished")
   156  	errInvalidEvent           = errors.New("invalid event in current state")
   157  	errMissingBlock           = errors.New("missing blocks")
   158  	errNilPeerForBlockRequest = errors.New("peer for block request does not exist in the switch")
   159  	errSendQueueFull          = errors.New("block request not made, send-queue is full")
   160  	errPeerTooShort           = errors.New("peer height too low, old peer removed/ new peer not added")
   161  	errSwitchRemovesPeer      = errors.New("switch is removing peer")
   162  	errTimeoutEventWrongState = errors.New("timeout event for a state different than the current one")
   163  	errNoTallerPeer           = errors.New("fast sync timed out on waiting for a peer taller than this node")
   164  
   165  	// reported eventually to the switch
   166  	// handle return
   167  	errPeerLowersItsHeight = errors.New("fast sync peer reports a height lower than previous")
   168  	// handle return
   169  	errNoPeerResponseForCurrentHeights = errors.New("fast sync timed out on peer block response for current heights")
   170  	errNoPeerResponse                  = errors.New("fast sync timed out on peer block response")               // xx
   171  	errBadDataFromPeer                 = errors.New("fast sync received block from wrong peer or block is bad") // xx
   172  	errDuplicateBlock                  = errors.New("fast sync received duplicate block from peer")
   173  	errBlockVerificationFailure        = errors.New("fast sync block verification failure")              // xx
   174  	errSlowPeer                        = errors.New("fast sync peer is not sending us data fast enough") // xx
   175  
   176  )
   177  
   178  func init() {
   179  	unknown = &bcReactorFSMState{
   180  		name: "unknown",
   181  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   182  			switch ev {
   183  			case startFSMEv:
   184  				// Broadcast Status message. Currently doesn't return non-nil error.
   185  				fsm.toBcR.sendStatusRequest()
   186  				return waitForPeer, nil
   187  
   188  			case stopFSMEv:
   189  				return finished, errNoErrorFinished
   190  
   191  			default:
   192  				return unknown, errInvalidEvent
   193  			}
   194  		},
   195  	}
   196  
   197  	waitForPeer = &bcReactorFSMState{
   198  		name:    "waitForPeer",
   199  		timeout: waitForPeerTimeout,
   200  		enter: func(fsm *BcReactorFSM) {
   201  			// Stop when leaving the state.
   202  			fsm.resetStateTimer()
   203  		},
   204  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   205  			switch ev {
   206  			case stateTimeoutEv:
   207  				if data.stateName != "waitForPeer" {
   208  					fsm.logger.Error("received a state timeout event for different state",
   209  						"state", data.stateName)
   210  					return waitForPeer, errTimeoutEventWrongState
   211  				}
   212  				// There was no statusResponse received from any peer.
   213  				// Should we send status request again?
   214  				return finished, errNoTallerPeer
   215  
   216  			case statusResponseEv:
   217  				if err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height); err != nil {
   218  					if fsm.pool.NumPeers() == 0 {
   219  						return waitForPeer, err
   220  					}
   221  				}
   222  				if fsm.stateTimer != nil {
   223  					fsm.stateTimer.Stop()
   224  				}
   225  				return waitForBlock, nil
   226  
   227  			case stopFSMEv:
   228  				if fsm.stateTimer != nil {
   229  					fsm.stateTimer.Stop()
   230  				}
   231  				return finished, errNoErrorFinished
   232  
   233  			default:
   234  				return waitForPeer, errInvalidEvent
   235  			}
   236  		},
   237  	}
   238  
   239  	waitForBlock = &bcReactorFSMState{
   240  		name:    "waitForBlock",
   241  		timeout: waitForBlockAtCurrentHeightTimeout,
   242  		enter: func(fsm *BcReactorFSM) {
   243  			// Stop when leaving the state.
   244  			fsm.resetStateTimer()
   245  		},
   246  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   247  			switch ev {
   248  
   249  			case statusResponseEv:
   250  				err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height)
   251  				if fsm.pool.NumPeers() == 0 {
   252  					return waitForPeer, err
   253  				}
   254  				if fsm.pool.ReachedMaxHeight() {
   255  					return finished, err
   256  				}
   257  				return waitForBlock, err
   258  
   259  			case blockResponseEv:
   260  				fsm.logger.Debug("blockResponseEv", "H", data.block.Height)
   261  				err := fsm.pool.AddBlock(data.peerID, data.block, data.length)
   262  				if err != nil {
   263  					// A block was received that was unsolicited, from unexpected peer, or that we already have it.
   264  					// Ignore block, remove peer and send error to switch.
   265  					fsm.pool.RemovePeer(data.peerID, err)
   266  					fsm.toBcR.sendPeerError(err, data.peerID)
   267  				}
   268  				if fsm.pool.NumPeers() == 0 {
   269  					return waitForPeer, err
   270  				}
   271  				return waitForBlock, err
   272  
   273  			case processedBlockEv:
   274  				if data.err != nil {
   275  					first, second, _ := fsm.pool.FirstTwoBlocksAndPeers()
   276  					fsm.logger.Error("error processing block", "err", data.err,
   277  						"first", first.block.Height, "second", second.block.Height)
   278  					fsm.logger.Error("send peer error for", "peer", first.peer.ID)
   279  					fsm.toBcR.sendPeerError(data.err, first.peer.ID)
   280  					fsm.logger.Error("send peer error for", "peer", second.peer.ID)
   281  					fsm.toBcR.sendPeerError(data.err, second.peer.ID)
   282  					// Remove the first two blocks. This will also remove the peers
   283  					fsm.pool.InvalidateFirstTwoBlocks(data.err)
   284  				} else {
   285  					fsm.pool.ProcessedCurrentHeightBlock()
   286  					// Since we advanced one block reset the state timer
   287  					fsm.resetStateTimer()
   288  				}
   289  
   290  				// Both cases above may result in achieving maximum height.
   291  				if fsm.pool.ReachedMaxHeight() {
   292  					return finished, nil
   293  				}
   294  
   295  				return waitForBlock, data.err
   296  
   297  			case peerRemoveEv:
   298  				// This event is sent by the switch to remove disconnected and errored peers.
   299  				fsm.pool.RemovePeer(data.peerID, data.err)
   300  				if fsm.pool.NumPeers() == 0 {
   301  					return waitForPeer, nil
   302  				}
   303  				if fsm.pool.ReachedMaxHeight() {
   304  					return finished, nil
   305  				}
   306  				return waitForBlock, nil
   307  
   308  			case makeRequestsEv:
   309  				fsm.makeNextRequests(data.maxNumRequests)
   310  				return waitForBlock, nil
   311  
   312  			case stateTimeoutEv:
   313  				if data.stateName != "waitForBlock" {
   314  					fsm.logger.Error("received a state timeout event for different state",
   315  						"state", data.stateName)
   316  					return waitForBlock, errTimeoutEventWrongState
   317  				}
   318  				// We haven't received the block at current height or height+1. Remove peer.
   319  				fsm.pool.RemovePeerAtCurrentHeights(errNoPeerResponseForCurrentHeights)
   320  				fsm.resetStateTimer()
   321  				if fsm.pool.NumPeers() == 0 {
   322  					return waitForPeer, errNoPeerResponseForCurrentHeights
   323  				}
   324  				if fsm.pool.ReachedMaxHeight() {
   325  					return finished, nil
   326  				}
   327  				return waitForBlock, errNoPeerResponseForCurrentHeights
   328  
   329  			case stopFSMEv:
   330  				if fsm.stateTimer != nil {
   331  					fsm.stateTimer.Stop()
   332  				}
   333  				return finished, errNoErrorFinished
   334  
   335  			default:
   336  				return waitForBlock, errInvalidEvent
   337  			}
   338  		},
   339  	}
   340  
   341  	finished = &bcReactorFSMState{
   342  		name: "finished",
   343  		enter: func(fsm *BcReactorFSM) {
   344  			fsm.logger.Info("Time to switch to consensus reactor!", "height", fsm.pool.Height)
   345  			fsm.toBcR.switchToConsensus()
   346  			fsm.cleanup()
   347  		},
   348  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   349  			return finished, nil
   350  		},
   351  	}
   352  }
   353  
   354  // Interface used by FSM for sending Block and Status requests,
   355  // informing of peer errors and state timeouts
   356  // Implemented by BlockchainReactor and tests
   357  type bcReactor interface {
   358  	sendStatusRequest()
   359  	sendBlockRequest(peerID p2p.ID, height int64) error
   360  	sendPeerError(err error, peerID p2p.ID)
   361  	resetStateTimer(name string, timer **time.Timer, timeout time.Duration)
   362  	switchToConsensus()
   363  }
   364  
   365  // SetLogger sets the FSM logger.
   366  func (fsm *BcReactorFSM) SetLogger(l log.Logger) {
   367  	fsm.logger = l
   368  	fsm.pool.SetLogger(l)
   369  }
   370  
   371  // Start starts the FSM.
   372  func (fsm *BcReactorFSM) Start() {
   373  	_ = fsm.Handle(&bcReactorMessage{event: startFSMEv})
   374  }
   375  
   376  // Handle processes messages and events sent to the FSM.
   377  func (fsm *BcReactorFSM) Handle(msg *bcReactorMessage) error {
   378  	fsm.mtx.Lock()
   379  	defer fsm.mtx.Unlock()
   380  	fsm.logger.Debug("FSM received", "event", msg, "state", fsm.state)
   381  
   382  	if fsm.state == nil {
   383  		fsm.state = unknown
   384  	}
   385  	next, err := fsm.state.handle(fsm, msg.event, msg.data)
   386  	if err != nil {
   387  		fsm.logger.Error("FSM event handler returned", "err", err,
   388  			"state", fsm.state, "event", msg.event)
   389  	}
   390  
   391  	oldState := fsm.state.name
   392  	fsm.transition(next)
   393  	if oldState != fsm.state.name {
   394  		fsm.logger.Info("FSM changed state", "new_state", fsm.state)
   395  	}
   396  	return err
   397  }
   398  
   399  func (fsm *BcReactorFSM) transition(next *bcReactorFSMState) {
   400  	if next == nil {
   401  		return
   402  	}
   403  	if fsm.state != next {
   404  		fsm.state = next
   405  		if next.enter != nil {
   406  			next.enter(fsm)
   407  		}
   408  	}
   409  }
   410  
   411  // Called when entering an FSM state in order to detect lack of progress in the state machine.
   412  // Note the use of the 'bcr' interface to facilitate testing without timer expiring.
   413  func (fsm *BcReactorFSM) resetStateTimer() {
   414  	fsm.toBcR.resetStateTimer(fsm.state.name, &fsm.stateTimer, fsm.state.timeout)
   415  }
   416  
   417  func (fsm *BcReactorFSM) isCaughtUp() bool {
   418  	return fsm.state == finished
   419  }
   420  
   421  func (fsm *BcReactorFSM) makeNextRequests(maxNumRequests int) {
   422  	fsm.pool.MakeNextRequests(maxNumRequests)
   423  }
   424  
   425  func (fsm *BcReactorFSM) cleanup() {
   426  	fsm.pool.Cleanup()
   427  }
   428  
   429  // NeedsBlocks checks if more block requests are required.
   430  func (fsm *BcReactorFSM) NeedsBlocks() bool {
   431  	fsm.mtx.Lock()
   432  	defer fsm.mtx.Unlock()
   433  	return fsm.state.name == "waitForBlock" && fsm.pool.NeedsBlocks()
   434  }
   435  
   436  // FirstTwoBlocks returns the two blocks at pool height and height+1
   437  func (fsm *BcReactorFSM) FirstTwoBlocks() (first, second *types.Block, err error) {
   438  	fsm.mtx.Lock()
   439  	defer fsm.mtx.Unlock()
   440  	firstBP, secondBP, err := fsm.pool.FirstTwoBlocksAndPeers()
   441  	if err == nil {
   442  		first = firstBP.block
   443  		second = secondBP.block
   444  	}
   445  	return
   446  }
   447  
   448  // Status returns the pool's height and the maximum peer height.
   449  func (fsm *BcReactorFSM) Status() (height, maxPeerHeight int64) {
   450  	fsm.mtx.Lock()
   451  	defer fsm.mtx.Unlock()
   452  	return fsm.pool.Height, fsm.pool.MaxPeerHeight
   453  }