gitlab.com/gpdionisio/tendermint@v0.34.19-dev2/blockchain/v1/reactor_fsm.go (about)

     1  package v1
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/tendermint/tendermint/libs/log"
    10  	"github.com/tendermint/tendermint/p2p"
    11  	"github.com/tendermint/tendermint/types"
    12  )
    13  
    14  // Blockchain Reactor State
    15  type bcReactorFSMState struct {
    16  	name string
    17  
    18  	// called when transitioning out of current state
    19  	handle func(*BcReactorFSM, bReactorEvent, bReactorEventData) (next *bcReactorFSMState, err error)
    20  	// called when entering the state
    21  	enter func(fsm *BcReactorFSM)
    22  
    23  	// timeout to ensure FSM is not stuck in a state forever
    24  	// the timer is owned and run by the fsm instance
    25  	timeout time.Duration
    26  }
    27  
    28  func (s *bcReactorFSMState) String() string {
    29  	return s.name
    30  }
    31  
    32  // BcReactorFSM is the datastructure for the Blockchain Reactor State Machine
    33  type BcReactorFSM struct {
    34  	logger log.Logger
    35  	mtx    sync.Mutex
    36  
    37  	startTime time.Time
    38  
    39  	state      *bcReactorFSMState
    40  	stateTimer *time.Timer
    41  	pool       *BlockPool
    42  
    43  	// interface used to call the Blockchain reactor to send StatusRequest, BlockRequest, reporting errors, etc.
    44  	toBcR bcReactor
    45  }
    46  
    47  // NewFSM creates a new reactor FSM.
    48  func NewFSM(height int64, toBcR bcReactor) *BcReactorFSM {
    49  	return &BcReactorFSM{
    50  		state:     unknown,
    51  		startTime: time.Now(),
    52  		pool:      NewBlockPool(height, toBcR),
    53  		toBcR:     toBcR,
    54  	}
    55  }
    56  
    57  // bReactorEventData is part of the message sent by the reactor to the FSM and used by the state handlers.
    58  type bReactorEventData struct {
    59  	peerID         p2p.ID
    60  	err            error        // for peer error: timeout, slow; for processed block event if error occurred
    61  	base           int64        // for status response
    62  	height         int64        // for status response; for processed block event
    63  	block          *types.Block // for block response
    64  	stateName      string       // for state timeout events
    65  	length         int          // for block response event, length of received block, used to detect slow peers
    66  	maxNumRequests int          // for request needed event, maximum number of pending requests
    67  }
    68  
    69  // Blockchain Reactor Events (the input to the state machine)
    70  type bReactorEvent uint
    71  
    72  const (
    73  	// message type events
    74  	startFSMEv = iota + 1
    75  	statusResponseEv
    76  	blockResponseEv
    77  	noBlockResponseEv
    78  	processedBlockEv
    79  	makeRequestsEv
    80  	stopFSMEv
    81  
    82  	// other events
    83  	peerRemoveEv = iota + 256
    84  	stateTimeoutEv
    85  )
    86  
    87  func (msg *bcReactorMessage) String() string {
    88  	var dataStr string
    89  
    90  	switch msg.event {
    91  	case startFSMEv:
    92  		dataStr = ""
    93  	case statusResponseEv:
    94  		dataStr = fmt.Sprintf("peer=%v base=%v height=%v", msg.data.peerID, msg.data.base, msg.data.height)
    95  	case blockResponseEv:
    96  		dataStr = fmt.Sprintf("peer=%v block.height=%v length=%v",
    97  			msg.data.peerID, msg.data.block.Height, msg.data.length)
    98  	case noBlockResponseEv:
    99  		dataStr = fmt.Sprintf("peer=%v requested height=%v",
   100  			msg.data.peerID, msg.data.height)
   101  	case processedBlockEv:
   102  		dataStr = fmt.Sprintf("error=%v", msg.data.err)
   103  	case makeRequestsEv:
   104  		dataStr = ""
   105  	case stopFSMEv:
   106  		dataStr = ""
   107  	case peerRemoveEv:
   108  		dataStr = fmt.Sprintf("peer: %v is being removed by the switch", msg.data.peerID)
   109  	case stateTimeoutEv:
   110  		dataStr = fmt.Sprintf("state=%v", msg.data.stateName)
   111  	default:
   112  		dataStr = "cannot interpret message data"
   113  	}
   114  
   115  	return fmt.Sprintf("%v: %v", msg.event, dataStr)
   116  }
   117  
   118  func (ev bReactorEvent) String() string {
   119  	switch ev {
   120  	case startFSMEv:
   121  		return "startFSMEv"
   122  	case statusResponseEv:
   123  		return "statusResponseEv"
   124  	case blockResponseEv:
   125  		return "blockResponseEv"
   126  	case noBlockResponseEv:
   127  		return "noBlockResponseEv"
   128  	case processedBlockEv:
   129  		return "processedBlockEv"
   130  	case makeRequestsEv:
   131  		return "makeRequestsEv"
   132  	case stopFSMEv:
   133  		return "stopFSMEv"
   134  	case peerRemoveEv:
   135  		return "peerRemoveEv"
   136  	case stateTimeoutEv:
   137  		return "stateTimeoutEv"
   138  	default:
   139  		return "event unknown"
   140  	}
   141  
   142  }
   143  
   144  // states
   145  var (
   146  	unknown      *bcReactorFSMState
   147  	waitForPeer  *bcReactorFSMState
   148  	waitForBlock *bcReactorFSMState
   149  	finished     *bcReactorFSMState
   150  )
   151  
   152  // timeouts for state timers
   153  const (
   154  	waitForPeerTimeout                 = 3 * time.Second
   155  	waitForBlockAtCurrentHeightTimeout = 10 * time.Second
   156  )
   157  
   158  // errors
   159  var (
   160  	// internal to the package
   161  	errNoErrorFinished        = errors.New("fast sync is finished")
   162  	errInvalidEvent           = errors.New("invalid event in current state")
   163  	errMissingBlock           = errors.New("missing blocks")
   164  	errNilPeerForBlockRequest = errors.New("peer for block request does not exist in the switch")
   165  	errSendQueueFull          = errors.New("block request not made, send-queue is full")
   166  	errPeerTooShort           = errors.New("peer height too low, old peer removed/ new peer not added")
   167  	errSwitchRemovesPeer      = errors.New("switch is removing peer")
   168  	errTimeoutEventWrongState = errors.New("timeout event for a state different than the current one")
   169  	errNoTallerPeer           = errors.New("fast sync timed out on waiting for a peer taller than this node")
   170  
   171  	// reported eventually to the switch
   172  	// handle return
   173  	errPeerLowersItsHeight = errors.New("fast sync peer reports a height lower than previous")
   174  	// handle return
   175  	errNoPeerResponseForCurrentHeights = errors.New("fast sync timed out on peer block response for current heights")
   176  	errNoPeerResponse                  = errors.New("fast sync timed out on peer block response")               // xx
   177  	errBadDataFromPeer                 = errors.New("fast sync received block from wrong peer or block is bad") // xx
   178  	errDuplicateBlock                  = errors.New("fast sync received duplicate block from peer")
   179  	errBlockVerificationFailure        = errors.New("fast sync block verification failure")              // xx
   180  	errSlowPeer                        = errors.New("fast sync peer is not sending us data fast enough") // xx
   181  
   182  )
   183  
   184  func init() {
   185  	unknown = &bcReactorFSMState{
   186  		name: "unknown",
   187  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   188  			switch ev {
   189  			case startFSMEv:
   190  				// Broadcast Status message. Currently doesn't return non-nil error.
   191  				fsm.toBcR.sendStatusRequest()
   192  				return waitForPeer, nil
   193  
   194  			case stopFSMEv:
   195  				return finished, errNoErrorFinished
   196  
   197  			default:
   198  				return unknown, errInvalidEvent
   199  			}
   200  		},
   201  	}
   202  
   203  	waitForPeer = &bcReactorFSMState{
   204  		name:    "waitForPeer",
   205  		timeout: waitForPeerTimeout,
   206  		enter: func(fsm *BcReactorFSM) {
   207  			// Stop when leaving the state.
   208  			fsm.resetStateTimer()
   209  		},
   210  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   211  			switch ev {
   212  			case stateTimeoutEv:
   213  				if data.stateName != "waitForPeer" {
   214  					fsm.logger.Error("received a state timeout event for different state",
   215  						"state", data.stateName)
   216  					return waitForPeer, errTimeoutEventWrongState
   217  				}
   218  				// There was no statusResponse received from any peer.
   219  				// Should we send status request again?
   220  				return finished, errNoTallerPeer
   221  
   222  			case statusResponseEv:
   223  				if err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height); err != nil {
   224  					if fsm.pool.NumPeers() == 0 {
   225  						return waitForPeer, err
   226  					}
   227  				}
   228  				if fsm.stateTimer != nil {
   229  					fsm.stateTimer.Stop()
   230  				}
   231  				return waitForBlock, nil
   232  
   233  			case stopFSMEv:
   234  				if fsm.stateTimer != nil {
   235  					fsm.stateTimer.Stop()
   236  				}
   237  				return finished, errNoErrorFinished
   238  
   239  			default:
   240  				return waitForPeer, errInvalidEvent
   241  			}
   242  		},
   243  	}
   244  
   245  	waitForBlock = &bcReactorFSMState{
   246  		name:    "waitForBlock",
   247  		timeout: waitForBlockAtCurrentHeightTimeout,
   248  		enter: func(fsm *BcReactorFSM) {
   249  			// Stop when leaving the state.
   250  			fsm.resetStateTimer()
   251  		},
   252  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   253  			switch ev {
   254  
   255  			case statusResponseEv:
   256  				err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height)
   257  				if fsm.pool.NumPeers() == 0 {
   258  					return waitForPeer, err
   259  				}
   260  				if fsm.pool.ReachedMaxHeight() {
   261  					return finished, err
   262  				}
   263  				return waitForBlock, err
   264  
   265  			case blockResponseEv:
   266  				fsm.logger.Debug("blockResponseEv", "H", data.block.Height)
   267  				err := fsm.pool.AddBlock(data.peerID, data.block, data.length)
   268  				if err != nil {
   269  					// A block was received that was unsolicited, from unexpected peer, or that we already have it.
   270  					// Ignore block, remove peer and send error to switch.
   271  					fsm.pool.RemovePeer(data.peerID, err)
   272  					fsm.toBcR.sendPeerError(err, data.peerID)
   273  				}
   274  				if fsm.pool.NumPeers() == 0 {
   275  					return waitForPeer, err
   276  				}
   277  				return waitForBlock, err
   278  			case noBlockResponseEv:
   279  				fsm.logger.Error("peer does not have requested block", "peer", data.peerID)
   280  
   281  				return waitForBlock, nil
   282  			case processedBlockEv:
   283  				if data.err != nil {
   284  					first, second, _ := fsm.pool.FirstTwoBlocksAndPeers()
   285  					fsm.logger.Error("error processing block", "err", data.err,
   286  						"first", first.block.Height, "second", second.block.Height)
   287  					fsm.logger.Error("send peer error for", "peer", first.peer.ID)
   288  					fsm.toBcR.sendPeerError(data.err, first.peer.ID)
   289  					fsm.logger.Error("send peer error for", "peer", second.peer.ID)
   290  					fsm.toBcR.sendPeerError(data.err, second.peer.ID)
   291  					// Remove the first two blocks. This will also remove the peers
   292  					fsm.pool.InvalidateFirstTwoBlocks(data.err)
   293  				} else {
   294  					fsm.pool.ProcessedCurrentHeightBlock()
   295  					// Since we advanced one block reset the state timer
   296  					fsm.resetStateTimer()
   297  				}
   298  
   299  				// Both cases above may result in achieving maximum height.
   300  				if fsm.pool.ReachedMaxHeight() {
   301  					return finished, nil
   302  				}
   303  
   304  				return waitForBlock, data.err
   305  
   306  			case peerRemoveEv:
   307  				// This event is sent by the switch to remove disconnected and errored peers.
   308  				fsm.pool.RemovePeer(data.peerID, data.err)
   309  				if fsm.pool.NumPeers() == 0 {
   310  					return waitForPeer, nil
   311  				}
   312  				if fsm.pool.ReachedMaxHeight() {
   313  					return finished, nil
   314  				}
   315  				return waitForBlock, nil
   316  
   317  			case makeRequestsEv:
   318  				fsm.makeNextRequests(data.maxNumRequests)
   319  				return waitForBlock, nil
   320  
   321  			case stateTimeoutEv:
   322  				if data.stateName != "waitForBlock" {
   323  					fsm.logger.Error("received a state timeout event for different state",
   324  						"state", data.stateName)
   325  					return waitForBlock, errTimeoutEventWrongState
   326  				}
   327  				// We haven't received the block at current height or height+1. Remove peer.
   328  				fsm.pool.RemovePeerAtCurrentHeights(errNoPeerResponseForCurrentHeights)
   329  				fsm.resetStateTimer()
   330  				if fsm.pool.NumPeers() == 0 {
   331  					return waitForPeer, errNoPeerResponseForCurrentHeights
   332  				}
   333  				if fsm.pool.ReachedMaxHeight() {
   334  					return finished, nil
   335  				}
   336  				return waitForBlock, errNoPeerResponseForCurrentHeights
   337  
   338  			case stopFSMEv:
   339  				if fsm.stateTimer != nil {
   340  					fsm.stateTimer.Stop()
   341  				}
   342  				return finished, errNoErrorFinished
   343  
   344  			default:
   345  				return waitForBlock, errInvalidEvent
   346  			}
   347  		},
   348  	}
   349  
   350  	finished = &bcReactorFSMState{
   351  		name: "finished",
   352  		enter: func(fsm *BcReactorFSM) {
   353  			fsm.logger.Info("Time to switch to consensus reactor!", "height", fsm.pool.Height)
   354  			fsm.toBcR.switchToConsensus()
   355  			fsm.cleanup()
   356  		},
   357  		handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) {
   358  			return finished, nil
   359  		},
   360  	}
   361  }
   362  
   363  // Interface used by FSM for sending Block and Status requests,
   364  // informing of peer errors and state timeouts
   365  // Implemented by BlockchainReactor and tests
   366  type bcReactor interface {
   367  	sendStatusRequest()
   368  	sendBlockRequest(peerID p2p.ID, height int64) error
   369  	sendPeerError(err error, peerID p2p.ID)
   370  	resetStateTimer(name string, timer **time.Timer, timeout time.Duration)
   371  	switchToConsensus()
   372  }
   373  
   374  // SetLogger sets the FSM logger.
   375  func (fsm *BcReactorFSM) SetLogger(l log.Logger) {
   376  	fsm.logger = l
   377  	fsm.pool.SetLogger(l)
   378  }
   379  
   380  // Start starts the FSM.
   381  func (fsm *BcReactorFSM) Start() {
   382  	_ = fsm.Handle(&bcReactorMessage{event: startFSMEv})
   383  }
   384  
   385  // Handle processes messages and events sent to the FSM.
   386  func (fsm *BcReactorFSM) Handle(msg *bcReactorMessage) error {
   387  	fsm.mtx.Lock()
   388  	defer fsm.mtx.Unlock()
   389  	fsm.logger.Debug("FSM received", "event", msg, "state", fsm.state)
   390  
   391  	if fsm.state == nil {
   392  		fsm.state = unknown
   393  	}
   394  	next, err := fsm.state.handle(fsm, msg.event, msg.data)
   395  	if err != nil {
   396  		fsm.logger.Error("FSM event handler returned", "err", err,
   397  			"state", fsm.state, "event", msg.event)
   398  	}
   399  
   400  	oldState := fsm.state.name
   401  	fsm.transition(next)
   402  	if oldState != fsm.state.name {
   403  		fsm.logger.Info("FSM changed state", "new_state", fsm.state)
   404  	}
   405  	return err
   406  }
   407  
   408  func (fsm *BcReactorFSM) transition(next *bcReactorFSMState) {
   409  	if next == nil {
   410  		return
   411  	}
   412  	if fsm.state != next {
   413  		fsm.state = next
   414  		if next.enter != nil {
   415  			next.enter(fsm)
   416  		}
   417  	}
   418  }
   419  
   420  // Called when entering an FSM state in order to detect lack of progress in the state machine.
   421  // Note the use of the 'bcr' interface to facilitate testing without timer expiring.
   422  func (fsm *BcReactorFSM) resetStateTimer() {
   423  	fsm.toBcR.resetStateTimer(fsm.state.name, &fsm.stateTimer, fsm.state.timeout)
   424  }
   425  
   426  func (fsm *BcReactorFSM) isCaughtUp() bool {
   427  	return fsm.state == finished
   428  }
   429  
   430  func (fsm *BcReactorFSM) makeNextRequests(maxNumRequests int) {
   431  	fsm.pool.MakeNextRequests(maxNumRequests)
   432  }
   433  
   434  func (fsm *BcReactorFSM) cleanup() {
   435  	fsm.pool.Cleanup()
   436  }
   437  
   438  // NeedsBlocks checks if more block requests are required.
   439  func (fsm *BcReactorFSM) NeedsBlocks() bool {
   440  	fsm.mtx.Lock()
   441  	defer fsm.mtx.Unlock()
   442  	return fsm.state.name == "waitForBlock" && fsm.pool.NeedsBlocks()
   443  }
   444  
   445  // FirstTwoBlocks returns the two blocks at pool height and height+1
   446  func (fsm *BcReactorFSM) FirstTwoBlocks() (first, second *types.Block, err error) {
   447  	fsm.mtx.Lock()
   448  	defer fsm.mtx.Unlock()
   449  	firstBP, secondBP, err := fsm.pool.FirstTwoBlocksAndPeers()
   450  	if err == nil {
   451  		first = firstBP.block
   452  		second = secondBP.block
   453  	}
   454  	return
   455  }
   456  
   457  // Status returns the pool's height and the maximum peer height.
   458  func (fsm *BcReactorFSM) Status() (height, maxPeerHeight int64) {
   459  	fsm.mtx.Lock()
   460  	defer fsm.mtx.Unlock()
   461  	return fsm.pool.Height, fsm.pool.MaxPeerHeight
   462  }