github.com/fibonacci-chain/fbc@v0.0.0-20231124064014-c7636198c1e9/libs/tendermint/consensus/consensus_main_routine.go (about)

     1  package consensus
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"reflect"
     7  	"runtime/debug"
     8  	"time"
     9  
    10  	cfg "github.com/fibonacci-chain/fbc/libs/tendermint/config"
    11  	cstypes "github.com/fibonacci-chain/fbc/libs/tendermint/consensus/types"
    12  	"github.com/fibonacci-chain/fbc/libs/tendermint/libs/fail"
    13  	"github.com/fibonacci-chain/fbc/libs/tendermint/types"
    14  	tmtime "github.com/fibonacci-chain/fbc/libs/tendermint/types/time"
    15  )
    16  
    17  //-----------------------------------------
    18  // the main go routines
    19  
    20  // receiveRoutine handles messages which may cause state transitions.
    21  // it's argument (n) is the number of messages to process before exiting - use 0 to run forever
    22  // It keeps the RoundState and is the only thing that updates it.
    23  // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities.
    24  // State must be locked before any internal state is updated.
    25  func (cs *State) receiveRoutine(maxSteps int) {
    26  	onExit := func(cs *State) {
    27  		// NOTE: the internalMsgQueue may have signed messages from our
    28  		// priv_val that haven't hit the WAL, but its ok because
    29  		// priv_val tracks LastSig
    30  
    31  		// close wal now that we're done writing to it
    32  		cs.wal.Stop()
    33  		cs.wal.Wait()
    34  
    35  		close(cs.done)
    36  		cs.done = nil
    37  	}
    38  
    39  	defer func() {
    40  		if r := recover(); r != nil {
    41  			cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack()))
    42  			// stop gracefully
    43  			//
    44  			// NOTE: We most probably shouldn't be running any further when there is
    45  			// some unexpected panic. Some unknown error happened, and so we don't
    46  			// know if that will result in the validator signing an invalid thing. It
    47  			// might be worthwhile to explore a mechanism for manual resuming via
    48  			// some console or secure RPC system, but for now, halting the chain upon
    49  			// unexpected consensus bugs sounds like the better option.
    50  			onExit(cs)
    51  		}
    52  	}()
    53  
    54  	for {
    55  		if maxSteps > 0 {
    56  			if cs.nSteps >= maxSteps {
    57  				cs.Logger.Info("reached max steps. exiting receive routine")
    58  				cs.nSteps = 0
    59  				return
    60  			}
    61  		}
    62  		rs := cs.RoundState
    63  		var mi msgInfo
    64  
    65  		select {
    66  		case <-cs.txNotifier.TxsAvailable():
    67  			cs.handleTxsAvailable()
    68  		case mi = <-cs.peerMsgQueue:
    69  			// handles proposals, block parts, votes
    70  			// may generate internal events (votes, complete proposals, 2/3 majorities)
    71  			if cs.handleMsg(mi) {
    72  				cs.wal.Write(mi)
    73  			}
    74  		case mi = <-cs.internalMsgQueue:
    75  			err := cs.wal.WriteSync(mi) // NOTE: fsync
    76  			if err != nil {
    77  				panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err))
    78  			}
    79  
    80  			if _, ok := mi.Msg.(*VoteMessage); ok {
    81  				// we actually want to simulate failing during
    82  				// the previous WriteSync, but this isn't easy to do.
    83  				// Equivalent would be to fail here and manually remove
    84  				// some bytes from the end of the wal.
    85  				fail.Fail() // XXX
    86  			}
    87  
    88  			// handles proposals, block parts, votes
    89  			cs.handleMsg(mi)
    90  		case ti := <-cs.timeoutTicker.Chan(): // tockChan:
    91  			cs.wal.Write(ti)
    92  			// if the timeout is relevant to the rs
    93  			// go to the next step
    94  			cs.handleTimeout(ti, rs)
    95  		case <-cs.Quit():
    96  			onExit(cs)
    97  			return
    98  		}
    99  	}
   100  }
   101  
   102  func (cs *State) handleAVCProposal(proposal *types.Proposal) {
   103  	if !GetActiveVC() ||
   104  		cs.Height != proposal.Height || cs.Round != proposal.Round ||
   105  		len(cs.taskResultChan) == 0 {
   106  		return
   107  	}
   108  	res := cs.getPreBlockResult(proposal.Height)
   109  	if res == nil {
   110  		cs.Logger.Error("handleAVCProposal get block nil", "cs height", cs.Height, "proposal height", proposal.Height)
   111  		return
   112  	}
   113  	if !bytes.Equal(proposal.BlockID.PartsHeader.Hash, res.blockParts.Header().Hash) || proposal.Height != res.block.Height {
   114  		return
   115  	}
   116  	cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""})
   117  	for i := 0; i < res.blockParts.Total(); i++ {
   118  		part := res.blockParts.GetPart(i)
   119  		cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""})
   120  	}
   121  }
   122  
   123  // state transitions on complete-proposal, 2/3-any, 2/3-one
   124  func (cs *State) handleMsg(mi msgInfo) (added bool) {
   125  	cs.mtx.Lock()
   126  	defer cs.mtx.Unlock()
   127  
   128  	var (
   129  		err error
   130  	)
   131  	msg, peerID := mi.Msg, mi.PeerID
   132  	switch msg := msg.(type) {
   133  	case *ProposeResponseMessage:
   134  		cs.handleAVCProposal(msg.Proposal)
   135  
   136  	case *ViewChangeMessage:
   137  		if !GetActiveVC() {
   138  			return
   139  		}
   140  
   141  		// no need to handle duplicate vcMsg
   142  		if cs.vcMsg != nil && cs.vcMsg.Height >= msg.Height {
   143  			return
   144  		}
   145  
   146  		// enterNewHeight use cs.vcMsg
   147  		if msg.Height == cs.Height+1 {
   148  			cs.vcMsg = msg
   149  			cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg)
   150  		} else if msg.Height == cs.Height {
   151  			// ApplyBlock of height-1 has finished
   152  			// at this height, it has enterNewHeight
   153  			// vc immediately
   154  			cs.vcMsg = msg
   155  			cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg)
   156  			if cs.Step != cstypes.RoundStepNewHeight && cs.Round == 0 {
   157  				_, val := cs.Validators.GetByAddress(msg.NewProposer)
   158  				cs.enterNewRoundAVC(cs.Height, 0, val)
   159  			}
   160  		}
   161  
   162  	case *ProposalMessage:
   163  		// will not cause transition.
   164  		// once proposal is set, we can receive block parts
   165  		if added, err = cs.setProposal(msg.Proposal); added {
   166  			cs.handleAVCProposal(msg.Proposal)
   167  		}
   168  	case *BlockPartMessage:
   169  		// if avc and has 2/3 votes, it can use the blockPartsHeader from votes
   170  		if cs.HasVC && cs.ProposalBlockParts == nil && cs.Round == 0 {
   171  			prevotes := cs.Votes.Prevotes(cs.Round)
   172  			blockID, hasTwoThirds := prevotes.TwoThirdsMajority()
   173  			if hasTwoThirds && !blockID.IsZero() {
   174  				cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartsHeader)
   175  			}
   176  		}
   177  		// if the proposal is complete, we'll enterPrevote or tryFinalizeCommit
   178  		added, err = cs.addProposalBlockPart(msg, peerID)
   179  
   180  		// We unlock here to yield to any routines that need to read the the RoundState.
   181  		// Previously, this code held the lock from the point at which the final block
   182  		// part was received until the block executed against the application.
   183  		// This prevented the reactor from being able to retrieve the most updated
   184  		// version of the RoundState. The reactor needs the updated RoundState to
   185  		// gossip the now completed block.
   186  		//
   187  		// This code can be further improved by either always operating on a copy
   188  		// of RoundState and only locking when switching out State's copy of
   189  		// RoundState with the updated copy or by emitting RoundState events in
   190  		// more places for routines depending on it to listen for.
   191  
   192  		cs.mtx.Unlock()
   193  		cs.mtx.Lock()
   194  		if added && cs.ProposalBlockParts.IsComplete() {
   195  			cs.handleCompleteProposal(msg.Height)
   196  		}
   197  
   198  		if added {
   199  			cs.statsMsgQueue <- mi
   200  		}
   201  
   202  		if err != nil && msg.Round != cs.Round {
   203  			cs.Logger.Debug(
   204  				"Received block part from wrong round",
   205  				"height",
   206  				cs.Height,
   207  				"csRound",
   208  				cs.Round,
   209  				"blockRound",
   210  				msg.Round)
   211  			err = nil
   212  		}
   213  	case *VoteMessage:
   214  		// attempt to add the vote and dupeout the validator if its a duplicate signature
   215  		// if the vote gives us a 2/3-any or 2/3-one, we transition
   216  		added, err = cs.tryAddVote(msg.Vote, peerID)
   217  		if added {
   218  			cs.statsMsgQueue <- mi
   219  		}
   220  
   221  		// if err == ErrAddingVote {
   222  		// TODO: punish peer
   223  		// We probably don't want to stop the peer here. The vote does not
   224  		// necessarily comes from a malicious peer but can be just broadcasted by
   225  		// a typical peer.
   226  		// https://github.com/tendermint/tendermint/issues/1281
   227  		// }
   228  
   229  		// NOTE: the vote is broadcast to peers by the reactor listening
   230  		// for vote events
   231  
   232  		// TODO: If rs.Height == vote.Height && rs.Round < vote.Round,
   233  		// the peer is sending us CatchupCommit precommits.
   234  		// We could make note of this and help filter in broadcastHasVoteMessage().
   235  	default:
   236  		cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg))
   237  		return
   238  	}
   239  
   240  	if err != nil { // nolint:staticcheck
   241  		// Causes TestReactorValidatorSetChanges to timeout
   242  		// https://github.com/tendermint/tendermint/issues/3406
   243  		// cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round,
   244  		// 	"peer", peerID, "err", err, "msg", msg)
   245  	}
   246  	return
   247  }
   248  
   249  func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) {
   250  	cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step)
   251  
   252  	// timeouts must be for current height, round, step
   253  	if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) {
   254  		cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step)
   255  		return
   256  	}
   257  
   258  	// the timeout will now cause a state transition
   259  	cs.mtx.Lock()
   260  	defer cs.mtx.Unlock()
   261  
   262  	switch ti.Step {
   263  	case cstypes.RoundStepNewHeight:
   264  		// NewRound event fired from enterNewRound.
   265  		// XXX: should we fire timeout here (for timeout commit)?
   266  		cs.enterNewHeight(ti.Height)
   267  	case cstypes.RoundStepNewRound:
   268  		cs.enterPropose(ti.Height, 0)
   269  	case cstypes.RoundStepPropose:
   270  		cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent())
   271  		cs.enterPrevote(ti.Height, ti.Round)
   272  	case cstypes.RoundStepPrevoteWait:
   273  		cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent())
   274  		cs.enterPrecommit(ti.Height, ti.Round)
   275  	case cstypes.RoundStepPrecommitWait:
   276  		cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent())
   277  		cs.enterPrecommit(ti.Height, ti.Round)
   278  		cs.enterNewRound(ti.Height, ti.Round+1)
   279  	default:
   280  		panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step))
   281  	}
   282  
   283  }
   284  
   285  // enterNewRound(height, 0) at cs.StartTime.
   286  func (cs *State) scheduleRound0(rs *cstypes.RoundState) {
   287  	overDuration := tmtime.Now().Sub(cs.StartTime)
   288  	if overDuration < 0 {
   289  		overDuration = 0
   290  	}
   291  	sleepDuration := cfg.DynamicConfig.GetCsTimeoutCommit() - overDuration
   292  	if sleepDuration < 0 {
   293  		sleepDuration = 0
   294  	}
   295  
   296  	if !cs.config.Waiting {
   297  		sleepDuration = 0
   298  	}
   299  
   300  	if GetActiveVC() && cs.privValidator != nil {
   301  		select {
   302  		case cs.preBlockTaskChan <- &preBlockTask{cs.Height, sleepDuration}:
   303  		default:
   304  		}
   305  
   306  	}
   307  
   308  	cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight)
   309  }
   310  
   311  // requestForProposer FireEvent to broadcast ProposeRequestMessage
   312  func (cs *State) requestForProposer(prMsg ProposeRequestMessage) {
   313  	if signature, err := cs.privValidator.SignBytes(prMsg.SignBytes()); err == nil {
   314  		prMsg.Signature = signature
   315  		cs.evsw.FireEvent(types.EventProposeRequest, &prMsg)
   316  	} else {
   317  		cs.Logger.Error("requestForProposer", "err", err)
   318  	}
   319  }
   320  
   321  // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan)
   322  func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int, step cstypes.RoundStepType) {
   323  	cs.timeoutTicker.ScheduleTimeout(timeoutInfo{Duration: duration, Height: height, Round: round, Step: step})
   324  }
   325  
   326  // send a msg into the receiveRoutine regarding our own proposal, block part, or vote
   327  func (cs *State) sendInternalMessage(mi msgInfo) {
   328  	select {
   329  	case cs.internalMsgQueue <- mi:
   330  	default:
   331  		// NOTE: using the go-routine means our votes can
   332  		// be processed out of order.
   333  		// TODO: use CList here for strict determinism and
   334  		// attempt push to internalMsgQueue in receiveRoutine
   335  		cs.Logger.Info("Internal msg queue is full. Using a go-routine")
   336  		go func() { cs.internalMsgQueue <- mi }()
   337  	}
   338  }
   339  
   340  func (cs *State) handleTxsAvailable() {
   341  	cs.mtx.Lock()
   342  	defer cs.mtx.Unlock()
   343  
   344  	// We only need to do this for round 0.
   345  	if cs.Round != 0 {
   346  		return
   347  	}
   348  
   349  	switch cs.Step {
   350  	case cstypes.RoundStepNewHeight: // timeoutCommit phase
   351  		if cs.needProofBlock(cs.Height) {
   352  			// enterPropose will be called by enterNewRound
   353  			return
   354  		}
   355  
   356  		// +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight
   357  		timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond
   358  		cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound)
   359  	case cstypes.RoundStepNewRound: // after timeoutCommit
   360  		cs.enterPropose(cs.Height, 0)
   361  	}
   362  }
   363  
   364  func (cs *State) preMakeBlockRoutine() {
   365  	for {
   366  		select {
   367  		case task := <-cs.preBlockTaskChan:
   368  			if task.height == cs.Height {
   369  				cs.preMakeBlock(task.height, task.duration)
   370  			}
   371  		case <-cs.Quit():
   372  			return
   373  		}
   374  	}
   375  }