github.com/okex/exchain@v1.8.0/libs/tendermint/consensus/consensus_main_routine.go (about)

     1  package consensus
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	cfg "github.com/okex/exchain/libs/tendermint/config"
     7  	cstypes "github.com/okex/exchain/libs/tendermint/consensus/types"
     8  	"github.com/okex/exchain/libs/tendermint/libs/fail"
     9  	"github.com/okex/exchain/libs/tendermint/types"
    10  	tmtime "github.com/okex/exchain/libs/tendermint/types/time"
    11  	"reflect"
    12  	"runtime/debug"
    13  	"time"
    14  )
    15  
    16  //-----------------------------------------
    17  // the main go routines
    18  
    19  // receiveRoutine handles messages which may cause state transitions.
    20  // it's argument (n) is the number of messages to process before exiting - use 0 to run forever
    21  // It keeps the RoundState and is the only thing that updates it.
    22  // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities.
    23  // State must be locked before any internal state is updated.
    24  func (cs *State) receiveRoutine(maxSteps int) {
    25  	onExit := func(cs *State) {
    26  		// NOTE: the internalMsgQueue may have signed messages from our
    27  		// priv_val that haven't hit the WAL, but its ok because
    28  		// priv_val tracks LastSig
    29  
    30  		// close wal now that we're done writing to it
    31  		cs.wal.Stop()
    32  		cs.wal.Wait()
    33  
    34  		close(cs.done)
    35  		cs.done = nil
    36  	}
    37  
    38  	defer func() {
    39  		if r := recover(); r != nil {
    40  			cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack()))
    41  			// stop gracefully
    42  			//
    43  			// NOTE: We most probably shouldn't be running any further when there is
    44  			// some unexpected panic. Some unknown error happened, and so we don't
    45  			// know if that will result in the validator signing an invalid thing. It
    46  			// might be worthwhile to explore a mechanism for manual resuming via
    47  			// some console or secure RPC system, but for now, halting the chain upon
    48  			// unexpected consensus bugs sounds like the better option.
    49  			onExit(cs)
    50  		}
    51  	}()
    52  
    53  	for {
    54  		if maxSteps > 0 {
    55  			if cs.nSteps >= maxSteps {
    56  				cs.Logger.Info("reached max steps. exiting receive routine")
    57  				cs.nSteps = 0
    58  				return
    59  			}
    60  		}
    61  		rs := cs.RoundState
    62  		var mi msgInfo
    63  
    64  		select {
    65  		case <-cs.txNotifier.TxsAvailable():
    66  			cs.handleTxsAvailable()
    67  		case mi = <-cs.peerMsgQueue:
    68  			// handles proposals, block parts, votes
    69  			// may generate internal events (votes, complete proposals, 2/3 majorities)
    70  			if cs.handleMsg(mi) {
    71  				cs.wal.Write(mi)
    72  			}
    73  		case mi = <-cs.internalMsgQueue:
    74  			err := cs.wal.WriteSync(mi) // NOTE: fsync
    75  			if err != nil {
    76  				panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err))
    77  			}
    78  
    79  			if _, ok := mi.Msg.(*VoteMessage); ok {
    80  				// we actually want to simulate failing during
    81  				// the previous WriteSync, but this isn't easy to do.
    82  				// Equivalent would be to fail here and manually remove
    83  				// some bytes from the end of the wal.
    84  				fail.Fail() // XXX
    85  			}
    86  
    87  			// handles proposals, block parts, votes
    88  			cs.handleMsg(mi)
    89  		case ti := <-cs.timeoutTicker.Chan(): // tockChan:
    90  			cs.wal.Write(ti)
    91  			// if the timeout is relevant to the rs
    92  			// go to the next step
    93  			cs.handleTimeout(ti, rs)
    94  		case <-cs.Quit():
    95  			onExit(cs)
    96  			return
    97  		}
    98  	}
    99  }
   100  
   101  func (cs *State) handleAVCProposal(proposal *types.Proposal) {
   102  	if !GetActiveVC() ||
   103  		cs.Height != proposal.Height || cs.Round != proposal.Round ||
   104  		len(cs.taskResultChan) == 0 {
   105  		return
   106  	}
   107  	res := cs.getPreBlockResult(proposal.Height)
   108  	if res == nil {
   109  		cs.Logger.Error("handleAVCProposal get block nil", "cs height", cs.Height, "proposal height", proposal.Height)
   110  		return
   111  	}
   112  	if !bytes.Equal(proposal.BlockID.PartsHeader.Hash, res.blockParts.Header().Hash) || proposal.Height != res.block.Height {
   113  		return
   114  	}
   115  	cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""})
   116  	for i := 0; i < res.blockParts.Total(); i++ {
   117  		part := res.blockParts.GetPart(i)
   118  		cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""})
   119  	}
   120  }
   121  
   122  // state transitions on complete-proposal, 2/3-any, 2/3-one
   123  func (cs *State) handleMsg(mi msgInfo) (added bool) {
   124  	cs.mtx.Lock()
   125  	defer cs.mtx.Unlock()
   126  
   127  	var (
   128  		err error
   129  	)
   130  	msg, peerID := mi.Msg, mi.PeerID
   131  	switch msg := msg.(type) {
   132  	case *ProposeResponseMessage:
   133  		cs.handleAVCProposal(msg.Proposal)
   134  
   135  	case *ViewChangeMessage:
   136  		if !GetActiveVC() {
   137  			return
   138  		}
   139  
   140  		// no need to handle duplicate vcMsg
   141  		if cs.vcMsg != nil && cs.vcMsg.Height >= msg.Height {
   142  			return
   143  		}
   144  
   145  		// enterNewHeight use cs.vcMsg
   146  		if msg.Height == cs.Height+1 {
   147  			cs.vcMsg = msg
   148  			cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg)
   149  		} else if msg.Height == cs.Height {
   150  			// ApplyBlock of height-1 has finished
   151  			// at this height, it has enterNewHeight
   152  			// vc immediately
   153  			cs.vcMsg = msg
   154  			cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg)
   155  			if cs.Step != cstypes.RoundStepNewHeight && cs.Round == 0 {
   156  				_, val := cs.Validators.GetByAddress(msg.NewProposer)
   157  				cs.enterNewRoundAVC(cs.Height, 0, val)
   158  			}
   159  		}
   160  
   161  	case *ProposalMessage:
   162  		// will not cause transition.
   163  		// once proposal is set, we can receive block parts
   164  		if added, err = cs.setProposal(msg.Proposal); added {
   165  			cs.handleAVCProposal(msg.Proposal)
   166  		}
   167  	case *BlockPartMessage:
   168  		// if avc and has 2/3 votes, it can use the blockPartsHeader from votes
   169  		if cs.HasVC && cs.ProposalBlockParts == nil && cs.Round == 0 {
   170  			prevotes := cs.Votes.Prevotes(cs.Round)
   171  			blockID, hasTwoThirds := prevotes.TwoThirdsMajority()
   172  			if hasTwoThirds && !blockID.IsZero() {
   173  				cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartsHeader)
   174  			}
   175  		}
   176  		// if the proposal is complete, we'll enterPrevote or tryFinalizeCommit
   177  		added, err = cs.addProposalBlockPart(msg, peerID)
   178  
   179  		// We unlock here to yield to any routines that need to read the the RoundState.
   180  		// Previously, this code held the lock from the point at which the final block
   181  		// part was received until the block executed against the application.
   182  		// This prevented the reactor from being able to retrieve the most updated
   183  		// version of the RoundState. The reactor needs the updated RoundState to
   184  		// gossip the now completed block.
   185  		//
   186  		// This code can be further improved by either always operating on a copy
   187  		// of RoundState and only locking when switching out State's copy of
   188  		// RoundState with the updated copy or by emitting RoundState events in
   189  		// more places for routines depending on it to listen for.
   190  
   191  		cs.mtx.Unlock()
   192  		cs.mtx.Lock()
   193  		if added && cs.ProposalBlockParts.IsComplete() {
   194  			cs.handleCompleteProposal(msg.Height)
   195  		}
   196  
   197  		if added {
   198  			cs.statsMsgQueue <- mi
   199  		}
   200  
   201  		if err != nil && msg.Round != cs.Round {
   202  			cs.Logger.Debug(
   203  				"Received block part from wrong round",
   204  				"height",
   205  				cs.Height,
   206  				"csRound",
   207  				cs.Round,
   208  				"blockRound",
   209  				msg.Round)
   210  			err = nil
   211  		}
   212  	case *VoteMessage:
   213  		// attempt to add the vote and dupeout the validator if its a duplicate signature
   214  		// if the vote gives us a 2/3-any or 2/3-one, we transition
   215  		added, err = cs.tryAddVote(msg.Vote, peerID)
   216  		if added {
   217  			cs.statsMsgQueue <- mi
   218  		}
   219  
   220  		// if err == ErrAddingVote {
   221  		// TODO: punish peer
   222  		// We probably don't want to stop the peer here. The vote does not
   223  		// necessarily comes from a malicious peer but can be just broadcasted by
   224  		// a typical peer.
   225  		// https://github.com/tendermint/tendermint/issues/1281
   226  		// }
   227  
   228  		// NOTE: the vote is broadcast to peers by the reactor listening
   229  		// for vote events
   230  
   231  		// TODO: If rs.Height == vote.Height && rs.Round < vote.Round,
   232  		// the peer is sending us CatchupCommit precommits.
   233  		// We could make note of this and help filter in broadcastHasVoteMessage().
   234  	default:
   235  		cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg))
   236  		return
   237  	}
   238  
   239  	if err != nil { // nolint:staticcheck
   240  		// Causes TestReactorValidatorSetChanges to timeout
   241  		// https://github.com/tendermint/tendermint/issues/3406
   242  		// cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round,
   243  		// 	"peer", peerID, "err", err, "msg", msg)
   244  	}
   245  	return
   246  }
   247  
   248  func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) {
   249  	cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step)
   250  
   251  	// timeouts must be for current height, round, step
   252  	if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) {
   253  		cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step)
   254  		return
   255  	}
   256  
   257  	// the timeout will now cause a state transition
   258  	cs.mtx.Lock()
   259  	defer cs.mtx.Unlock()
   260  
   261  	switch ti.Step {
   262  	case cstypes.RoundStepNewHeight:
   263  		// NewRound event fired from enterNewRound.
   264  		// XXX: should we fire timeout here (for timeout commit)?
   265  		cs.enterNewHeight(ti.Height)
   266  	case cstypes.RoundStepNewRound:
   267  		cs.enterPropose(ti.Height, 0)
   268  	case cstypes.RoundStepPropose:
   269  		cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent())
   270  		cs.enterPrevote(ti.Height, ti.Round)
   271  	case cstypes.RoundStepPrevoteWait:
   272  		cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent())
   273  		cs.enterPrecommit(ti.Height, ti.Round)
   274  	case cstypes.RoundStepPrecommitWait:
   275  		cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent())
   276  		cs.enterPrecommit(ti.Height, ti.Round)
   277  		cs.enterNewRound(ti.Height, ti.Round+1)
   278  	default:
   279  		panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step))
   280  	}
   281  
   282  }
   283  
   284  // enterNewRound(height, 0) at cs.StartTime.
   285  func (cs *State) scheduleRound0(rs *cstypes.RoundState) {
   286  	overDuration := tmtime.Now().Sub(cs.StartTime)
   287  	if overDuration < 0 {
   288  		overDuration = 0
   289  	}
   290  	sleepDuration := cfg.DynamicConfig.GetCsTimeoutCommit() - overDuration
   291  	if sleepDuration < 0 {
   292  		sleepDuration = 0
   293  	}
   294  
   295  	if !cs.config.Waiting {
   296  		sleepDuration = 0
   297  	}
   298  
   299  	if GetActiveVC() && cs.privValidator != nil {
   300  		select {
   301  		case cs.preBlockTaskChan <- &preBlockTask{cs.Height, sleepDuration}:
   302  		default:
   303  		}
   304  
   305  	}
   306  
   307  	cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight)
   308  }
   309  
   310  // requestForProposer FireEvent to broadcast ProposeRequestMessage
   311  func (cs *State) requestForProposer(prMsg ProposeRequestMessage) {
   312  	if signature, err := cs.privValidator.SignBytes(prMsg.SignBytes()); err == nil {
   313  		prMsg.Signature = signature
   314  		cs.evsw.FireEvent(types.EventProposeRequest, &prMsg)
   315  	} else {
   316  		cs.Logger.Error("requestForProposer", "err", err)
   317  	}
   318  }
   319  
   320  // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan)
   321  func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int, step cstypes.RoundStepType) {
   322  	cs.timeoutTicker.ScheduleTimeout(timeoutInfo{Duration: duration, Height: height, Round: round, Step: step})
   323  }
   324  
   325  // send a msg into the receiveRoutine regarding our own proposal, block part, or vote
   326  func (cs *State) sendInternalMessage(mi msgInfo) {
   327  	select {
   328  	case cs.internalMsgQueue <- mi:
   329  	default:
   330  		// NOTE: using the go-routine means our votes can
   331  		// be processed out of order.
   332  		// TODO: use CList here for strict determinism and
   333  		// attempt push to internalMsgQueue in receiveRoutine
   334  		cs.Logger.Info("Internal msg queue is full. Using a go-routine")
   335  		go func() { cs.internalMsgQueue <- mi }()
   336  	}
   337  }
   338  
   339  func (cs *State) handleTxsAvailable() {
   340  	cs.mtx.Lock()
   341  	defer cs.mtx.Unlock()
   342  
   343  	// We only need to do this for round 0.
   344  	if cs.Round != 0 {
   345  		return
   346  	}
   347  
   348  	switch cs.Step {
   349  	case cstypes.RoundStepNewHeight: // timeoutCommit phase
   350  		if cs.needProofBlock(cs.Height) {
   351  			// enterPropose will be called by enterNewRound
   352  			return
   353  		}
   354  
   355  		// +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight
   356  		timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond
   357  		cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound)
   358  	case cstypes.RoundStepNewRound: // after timeoutCommit
   359  		cs.enterPropose(cs.Height, 0)
   360  	}
   361  }
   362  
   363  func (cs *State) preMakeBlockRoutine() {
   364  	for {
   365  		select {
   366  		case task := <-cs.preBlockTaskChan:
   367  			if task.height == cs.Height {
   368  				cs.preMakeBlock(task.height, task.duration)
   369  			}
   370  		case <-cs.Quit():
   371  			return
   372  		}
   373  	}
   374  }