github.com/vipernet-xyz/tm@v0.34.24/test/maverick/consensus/state.go (about)

     1  package consensus
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"reflect"
    10  	"runtime/debug"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/gogo/protobuf/proto"
    15  
    16  	cfg "github.com/vipernet-xyz/tm/config"
    17  	tmcon "github.com/vipernet-xyz/tm/consensus"
    18  	cstypes "github.com/vipernet-xyz/tm/consensus/types"
    19  	"github.com/vipernet-xyz/tm/crypto"
    20  	tmevents "github.com/vipernet-xyz/tm/libs/events"
    21  	"github.com/vipernet-xyz/tm/libs/fail"
    22  	tmjson "github.com/vipernet-xyz/tm/libs/json"
    23  	"github.com/vipernet-xyz/tm/libs/log"
    24  	tmmath "github.com/vipernet-xyz/tm/libs/math"
    25  	tmos "github.com/vipernet-xyz/tm/libs/os"
    26  	"github.com/vipernet-xyz/tm/libs/service"
    27  	"github.com/vipernet-xyz/tm/p2p"
    28  	tmproto "github.com/vipernet-xyz/tm/proto/tendermint/types"
    29  	sm "github.com/vipernet-xyz/tm/state"
    30  	"github.com/vipernet-xyz/tm/types"
    31  	tmtime "github.com/vipernet-xyz/tm/types/time"
    32  )
    33  
    34  // State handles execution of the consensus algorithm.
    35  // It processes votes and proposals, and upon reaching agreement,
    36  // commits blocks to the chain and executes them against the application.
    37  // The internal state machine receives input from peers, the internal validator, and from a timer.
    38  type State struct {
    39  	service.BaseService
    40  
    41  	// config details
    42  	config        *cfg.ConsensusConfig
    43  	privValidator types.PrivValidator // for signing votes
    44  
    45  	// store blocks and commits
    46  	blockStore sm.BlockStore
    47  
    48  	// create and execute blocks
    49  	blockExec *sm.BlockExecutor
    50  
    51  	// notify us if txs are available
    52  	txNotifier txNotifier
    53  
    54  	// add evidence to the pool
    55  	// when it's detected
    56  	evpool evidencePool
    57  
    58  	// internal state
    59  	mtx sync.RWMutex
    60  	cstypes.RoundState
    61  	state sm.State // State until height-1.
    62  
    63  	// state changes may be triggered by: msgs from peers,
    64  	// msgs from ourself, or by timeouts
    65  	peerMsgQueue     chan msgInfo
    66  	internalMsgQueue chan msgInfo
    67  	timeoutTicker    TimeoutTicker
    68  	// privValidator pubkey, memoized for the duration of one block
    69  	// to avoid extra requests to HSM
    70  	privValidatorPubKey crypto.PubKey
    71  
    72  	// information about about added votes and block parts are written on this channel
    73  	// so statistics can be computed by reactor
    74  	statsMsgQueue chan msgInfo
    75  
    76  	// we use eventBus to trigger msg broadcasts in the reactor,
    77  	// and to notify external subscribers, eg. through a websocket
    78  	eventBus *types.EventBus
    79  
    80  	// a Write-Ahead Log ensures we can recover from any kind of crash
    81  	// and helps us avoid signing conflicting votes
    82  	wal          tmcon.WAL
    83  	replayMode   bool // so we don't log signing errors during replay
    84  	doWALCatchup bool // determines if we even try to do the catchup
    85  
    86  	// for tests where we want to limit the number of transitions the state makes
    87  	nSteps int
    88  
    89  	// some functions can be overwritten for testing
    90  	decideProposal func(height int64, round int32)
    91  
    92  	// closed when we finish shutting down
    93  	done chan struct{}
    94  
    95  	// synchronous pubsub between consensus state and reactor.
    96  	// state only emits EventNewRoundStep and EventVote
    97  	evsw tmevents.EventSwitch
    98  
    99  	// for reporting metrics
   100  	metrics *tmcon.Metrics
   101  
   102  	// misbehaviors mapped for each height (can't have more than one misbehavior per height)
   103  	misbehaviors map[int64]Misbehavior
   104  
   105  	// the switch is passed to the state so that maveick misbehaviors can directly control which
   106  	// information they send to which nodes
   107  	sw *p2p.Switch
   108  }
   109  
   110  // StateOption sets an optional parameter on the State.
   111  type StateOption func(*State)
   112  
   113  // NewState returns a new State.
   114  func NewState(
   115  	config *cfg.ConsensusConfig,
   116  	state sm.State,
   117  	blockExec *sm.BlockExecutor,
   118  	blockStore sm.BlockStore,
   119  	txNotifier txNotifier,
   120  	evpool evidencePool,
   121  	misbehaviors map[int64]Misbehavior,
   122  	options ...StateOption,
   123  ) *State {
   124  	cs := &State{
   125  		config:           config,
   126  		blockExec:        blockExec,
   127  		blockStore:       blockStore,
   128  		txNotifier:       txNotifier,
   129  		peerMsgQueue:     make(chan msgInfo, msgQueueSize),
   130  		internalMsgQueue: make(chan msgInfo, msgQueueSize),
   131  		timeoutTicker:    NewTimeoutTicker(),
   132  		statsMsgQueue:    make(chan msgInfo, msgQueueSize),
   133  		done:             make(chan struct{}),
   134  		doWALCatchup:     true,
   135  		wal:              nilWAL{},
   136  		evpool:           evpool,
   137  		evsw:             tmevents.NewEventSwitch(),
   138  		metrics:          tmcon.NopMetrics(),
   139  		misbehaviors:     misbehaviors,
   140  	}
   141  	// set function defaults (may be overwritten before calling Start)
   142  	cs.decideProposal = cs.defaultDecideProposal
   143  
   144  	// We have no votes, so reconstruct LastCommit from SeenCommit.
   145  	if state.LastBlockHeight > 0 {
   146  		cs.reconstructLastCommit(state)
   147  	}
   148  
   149  	cs.updateToState(state)
   150  
   151  	// Don't call scheduleRound0 yet.
   152  	// We do that upon Start().
   153  
   154  	cs.BaseService = *service.NewBaseService(nil, "State", cs)
   155  	for _, option := range options {
   156  		option(cs)
   157  	}
   158  	return cs
   159  }
   160  
   161  // I know this is not great but the maverick consensus state needs access to the peers
   162  func (cs *State) SetSwitch(sw *p2p.Switch) {
   163  	cs.sw = sw
   164  }
   165  
   166  // state transitions on complete-proposal, 2/3-any, 2/3-one
   167  func (cs *State) handleMsg(mi msgInfo) {
   168  	cs.mtx.Lock()
   169  	defer cs.mtx.Unlock()
   170  
   171  	var (
   172  		added bool
   173  		err   error
   174  	)
   175  	msg, peerID := mi.Msg, mi.PeerID
   176  	switch msg := msg.(type) {
   177  	case *tmcon.ProposalMessage:
   178  		// will not cause transition.
   179  		// once proposal is set, we can receive block parts
   180  		// err = cs.setProposal(msg.Proposal)
   181  		if b, ok := cs.misbehaviors[cs.Height]; ok {
   182  			err = b.ReceiveProposal(cs, msg.Proposal)
   183  		} else {
   184  			err = defaultReceiveProposal(cs, msg.Proposal)
   185  		}
   186  	case *tmcon.BlockPartMessage:
   187  		// if the proposal is complete, we'll enterPrevote or tryFinalizeCommit
   188  		added, err = cs.addProposalBlockPart(msg, peerID)
   189  		if added {
   190  			cs.statsMsgQueue <- mi
   191  		}
   192  
   193  		if err != nil && msg.Round != cs.Round {
   194  			cs.Logger.Debug(
   195  				"Received block part from wrong round",
   196  				"height",
   197  				cs.Height,
   198  				"csRound",
   199  				cs.Round,
   200  				"blockRound",
   201  				msg.Round)
   202  			err = nil
   203  		}
   204  	case *tmcon.VoteMessage:
   205  		// attempt to add the vote and dupeout the validator if its a duplicate signature
   206  		// if the vote gives us a 2/3-any or 2/3-one, we transition
   207  		added, err = cs.tryAddVote(msg.Vote, peerID)
   208  		if added {
   209  			cs.statsMsgQueue <- mi
   210  		}
   211  
   212  		// if err == ErrAddingVote {
   213  		// TODO: punish peer
   214  		// We probably don't want to stop the peer here. The vote does not
   215  		// necessarily comes from a malicious peer but can be just broadcasted by
   216  		// a typical peer.
   217  		// https://github.com/vipernet-xyz/tm/issues/1281
   218  		// }
   219  
   220  		// NOTE: the vote is broadcast to peers by the reactor listening
   221  		// for vote events
   222  
   223  		// TODO: If rs.Height == vote.Height && rs.Round < vote.Round,
   224  		// the peer is sending us CatchupCommit precommits.
   225  		// We could make note of this and help filter in broadcastHasVoteMessage().
   226  	default:
   227  		cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg))
   228  		return
   229  	}
   230  
   231  	if err != nil {
   232  		cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round,
   233  			"peer", peerID, "err", err, "msg", msg)
   234  	}
   235  }
   236  
   237  // Enter (CreateEmptyBlocks): from enterNewRound(height,round)
   238  // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ):
   239  //
   240  //	after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval
   241  //
   242  // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool
   243  func (cs *State) enterPropose(height int64, round int32) {
   244  	logger := cs.Logger.With("height", height, "round", round)
   245  
   246  	if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) {
   247  		logger.Debug("enter propose", "msg", log.NewLazySprintf(
   248  			"enterPropose(%v/%v): Invalid args. Current step: %v/%v/%v",
   249  			height,
   250  			round,
   251  			cs.Height,
   252  			cs.Round,
   253  			cs.Step))
   254  		return
   255  	}
   256  	logger.Info("enter propose",
   257  		"msg",
   258  		log.NewLazySprintf("enterPropose(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
   259  
   260  	defer func() {
   261  		// Done enterPropose:
   262  		cs.updateRoundStep(round, cstypes.RoundStepPropose)
   263  		cs.newStep()
   264  
   265  		// If we have the whole proposal + POL, then goto Prevote now.
   266  		// else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart),
   267  		// or else after timeoutPropose
   268  		if cs.isProposalComplete() {
   269  			cs.enterPrevote(height, cs.Round)
   270  		}
   271  	}()
   272  
   273  	if b, ok := cs.misbehaviors[cs.Height]; ok {
   274  		b.EnterPropose(cs, height, round)
   275  	} else {
   276  		defaultEnterPropose(cs, height, round)
   277  	}
   278  }
   279  
   280  // Enter: `timeoutPropose` after entering Propose.
   281  // Enter: proposal block and POL is ready.
   282  // Prevote for LockedBlock if we're locked, or ProposalBlock if valid.
   283  // Otherwise vote nil.
   284  func (cs *State) enterPrevote(height int64, round int32) {
   285  	if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) {
   286  		cs.Logger.Debug("enter prevote", "msg", log.NewLazySprintf(
   287  			"enterPrevote(%v/%v): Invalid args. Current step: %v/%v/%v",
   288  			height,
   289  			round,
   290  			cs.Height,
   291  			cs.Round,
   292  			cs.Step))
   293  		return
   294  	}
   295  
   296  	defer func() {
   297  		// Done enterPrevote:
   298  		cs.updateRoundStep(round, cstypes.RoundStepPrevote)
   299  		cs.newStep()
   300  	}()
   301  
   302  	cs.Logger.Debug("enter prevote",
   303  		"msg",
   304  		log.NewLazySprintf("enterPrevote(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
   305  
   306  	// Sign and broadcast vote as necessary
   307  	if b, ok := cs.misbehaviors[cs.Height]; ok {
   308  		b.EnterPrevote(cs, height, round)
   309  	} else {
   310  		defaultEnterPrevote(cs, height, round)
   311  	}
   312  
   313  	// Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait
   314  	// (so we have more time to try and collect +2/3 prevotes for a single block)
   315  }
   316  
   317  // Enter: `timeoutPrevote` after any +2/3 prevotes.
   318  // Enter: `timeoutPrecommit` after any +2/3 precommits.
   319  // Enter: +2/3 precomits for block or nil.
   320  // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round)
   321  // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil,
   322  // else, precommit nil otherwise.
   323  func (cs *State) enterPrecommit(height int64, round int32) {
   324  	logger := cs.Logger.With("height", height, "round", round)
   325  
   326  	if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) {
   327  		logger.Debug("enter precommit",
   328  			"msg",
   329  			log.NewLazySprintf("enterPrecommit(%v/%v): Invalid args. Current step: %v/%v/%v",
   330  				height,
   331  				round,
   332  				cs.Height,
   333  				cs.Round,
   334  				cs.Step))
   335  		return
   336  	}
   337  
   338  	logger.Info("enter precommit",
   339  		"msg",
   340  		log.NewLazySprintf("enterPrecommit(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
   341  
   342  	defer func() {
   343  		// Done enterPrecommit:
   344  		cs.updateRoundStep(round, cstypes.RoundStepPrecommit)
   345  		cs.newStep()
   346  	}()
   347  
   348  	if b, ok := cs.misbehaviors[cs.Height]; ok {
   349  		b.EnterPrecommit(cs, height, round)
   350  	} else {
   351  		defaultEnterPrecommit(cs, height, round)
   352  	}
   353  }
   354  
   355  func (cs *State) addVote(
   356  	vote *types.Vote,
   357  	peerID p2p.ID,
   358  ) (added bool, err error) {
   359  	cs.Logger.Debug(
   360  		"addVote",
   361  		"voteHeight",
   362  		vote.Height,
   363  		"voteType",
   364  		vote.Type,
   365  		"valIndex",
   366  		vote.ValidatorIndex,
   367  		"csHeight",
   368  		cs.Height,
   369  	)
   370  
   371  	// A precommit for the previous height?
   372  	// These come in while we wait timeoutCommit
   373  	if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType {
   374  		if cs.Step != cstypes.RoundStepNewHeight {
   375  			// Late precommit at prior height is ignored
   376  			cs.Logger.Debug("Precommit vote came in after commit timeout and has been ignored", "vote", vote)
   377  			return
   378  		}
   379  		added, err = cs.LastCommit.AddVote(vote)
   380  		if !added {
   381  			return
   382  		}
   383  
   384  		cs.Logger.Info("add vote",
   385  			"msg",
   386  			log.NewLazySprintf("Added to lastPrecommits: %v", cs.LastCommit.StringShort()))
   387  		_ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote})
   388  		cs.evsw.FireEvent(types.EventVote, vote)
   389  
   390  		// if we can skip timeoutCommit and have all the votes now,
   391  		if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() {
   392  			// go straight to new round (skip timeout commit)
   393  			// cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight)
   394  			cs.enterNewRound(cs.Height, 0)
   395  		}
   396  
   397  		return
   398  	}
   399  
   400  	// Height mismatch is ignored.
   401  	// Not necessarily a bad peer, but not favourable behaviour.
   402  	if vote.Height != cs.Height {
   403  		cs.Logger.Debug("vote ignored and not added", "voteHeight", vote.Height, "csHeight", cs.Height, "peerID", peerID)
   404  		return
   405  	}
   406  
   407  	added, err = cs.Votes.AddVote(vote, peerID)
   408  	if !added {
   409  		// Either duplicate, or error upon cs.Votes.AddByIndex()
   410  		return
   411  	}
   412  
   413  	_ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote})
   414  	cs.evsw.FireEvent(types.EventVote, vote)
   415  
   416  	switch vote.Type {
   417  	case tmproto.PrevoteType:
   418  		if b, ok := cs.misbehaviors[cs.Height]; ok {
   419  			b.ReceivePrevote(cs, vote)
   420  		} else {
   421  			defaultReceivePrevote(cs, vote)
   422  		}
   423  
   424  	case tmproto.PrecommitType:
   425  		if b, ok := cs.misbehaviors[cs.Height]; ok {
   426  			b.ReceivePrecommit(cs, vote)
   427  		}
   428  		defaultReceivePrecommit(cs, vote)
   429  
   430  	default:
   431  		panic(fmt.Sprintf("Unexpected vote type %v", vote.Type))
   432  	}
   433  
   434  	return added, err
   435  }
   436  
   437  //-----------------------------------------------------------------------------
   438  // Errors
   439  
   440  var (
   441  	ErrInvalidProposalSignature   = errors.New("error invalid proposal signature")
   442  	ErrInvalidProposalPOLRound    = errors.New("error invalid proposal POL round")
   443  	ErrAddingVote                 = errors.New("error adding vote")
   444  	ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key")
   445  
   446  	errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors")
   447  )
   448  
   449  //-----------------------------------------------------------------------------
   450  
   451  var msgQueueSize = 1000
   452  
   453  // msgs from the reactor which may update the state
   454  type msgInfo struct {
   455  	Msg    tmcon.Message `json:"msg"`
   456  	PeerID p2p.ID        `json:"peer_key"`
   457  }
   458  
   459  // internally generated messages which may update the state
   460  type timeoutInfo struct {
   461  	Duration time.Duration         `json:"duration"`
   462  	Height   int64                 `json:"height"`
   463  	Round    int32                 `json:"round"`
   464  	Step     cstypes.RoundStepType `json:"step"`
   465  }
   466  
   467  func (ti *timeoutInfo) String() string {
   468  	return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step)
   469  }
   470  
   471  // interface to the mempool
   472  type txNotifier interface {
   473  	TxsAvailable() <-chan struct{}
   474  }
   475  
   476  // interface to the evidence pool
   477  type evidencePool interface {
   478  	// reports conflicting votes to the evidence pool to be processed into evidence
   479  	ReportConflictingVotes(voteA, voteB *types.Vote)
   480  }
   481  
   482  //----------------------------------------
   483  // Public interface
   484  
   485  // SetLogger implements Service.
   486  func (cs *State) SetLogger(l log.Logger) {
   487  	cs.BaseService.Logger = l
   488  	cs.timeoutTicker.SetLogger(l)
   489  }
   490  
   491  // SetEventBus sets event bus.
   492  func (cs *State) SetEventBus(b *types.EventBus) {
   493  	cs.eventBus = b
   494  	cs.blockExec.SetEventBus(b)
   495  }
   496  
   497  // StateMetrics sets the metrics.
   498  func StateMetrics(metrics *tmcon.Metrics) StateOption {
   499  	return func(cs *State) { cs.metrics = metrics }
   500  }
   501  
   502  // String returns a string.
   503  func (cs *State) String() string {
   504  	// better not to access shared variables
   505  	return "ConsensusState"
   506  }
   507  
   508  // GetState returns a copy of the chain state.
   509  func (cs *State) GetState() sm.State {
   510  	cs.mtx.RLock()
   511  	defer cs.mtx.RUnlock()
   512  	return cs.state.Copy()
   513  }
   514  
   515  // GetLastHeight returns the last height committed.
   516  // If there were no blocks, returns 0.
   517  func (cs *State) GetLastHeight() int64 {
   518  	cs.mtx.RLock()
   519  	defer cs.mtx.RUnlock()
   520  	return cs.RoundState.Height - 1
   521  }
   522  
   523  // GetRoundState returns a shallow copy of the internal consensus state.
   524  func (cs *State) GetRoundState() *cstypes.RoundState {
   525  	cs.mtx.RLock()
   526  	rs := cs.RoundState // copy
   527  	cs.mtx.RUnlock()
   528  	return &rs
   529  }
   530  
   531  // GetRoundStateJSON returns a json of RoundState.
   532  func (cs *State) GetRoundStateJSON() ([]byte, error) {
   533  	cs.mtx.RLock()
   534  	defer cs.mtx.RUnlock()
   535  	return tmjson.Marshal(cs.RoundState)
   536  }
   537  
   538  // GetRoundStateSimpleJSON returns a json of RoundStateSimple
   539  func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) {
   540  	cs.mtx.RLock()
   541  	defer cs.mtx.RUnlock()
   542  	return tmjson.Marshal(cs.RoundState.RoundStateSimple())
   543  }
   544  
   545  // GetValidators returns a copy of the current validators.
   546  func (cs *State) GetValidators() (int64, []*types.Validator) {
   547  	cs.mtx.RLock()
   548  	defer cs.mtx.RUnlock()
   549  	return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators
   550  }
   551  
   552  // SetPrivValidator sets the private validator account for signing votes. It
   553  // immediately requests pubkey and caches it.
   554  func (cs *State) SetPrivValidator(priv types.PrivValidator) {
   555  	cs.mtx.Lock()
   556  	defer cs.mtx.Unlock()
   557  
   558  	cs.privValidator = priv
   559  
   560  	if err := cs.updatePrivValidatorPubKey(); err != nil {
   561  		cs.Logger.Error("Can't get private validator pubkey", "err", err)
   562  	}
   563  }
   564  
   565  // SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing.
   566  func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) {
   567  	cs.mtx.Lock()
   568  	cs.timeoutTicker = timeoutTicker
   569  	cs.mtx.Unlock()
   570  }
   571  
   572  // LoadCommit loads the commit for a given height.
   573  func (cs *State) LoadCommit(height int64) *types.Commit {
   574  	cs.mtx.RLock()
   575  	defer cs.mtx.RUnlock()
   576  	if height == cs.blockStore.Height() {
   577  		return cs.blockStore.LoadSeenCommit(height)
   578  	}
   579  	return cs.blockStore.LoadBlockCommit(height)
   580  }
   581  
   582  // OnStart loads the latest state via the WAL, and starts the timeout and
   583  // receive routines.
   584  func (cs *State) OnStart() error {
   585  	// We may set the WAL in testing before calling Start, so only OpenWAL if its
   586  	// still the nilWAL.
   587  	if _, ok := cs.wal.(nilWAL); ok {
   588  		if err := cs.loadWalFile(); err != nil {
   589  			return err
   590  		}
   591  	}
   592  
   593  	// We may have lost some votes if the process crashed reload from consensus
   594  	// log to catchup.
   595  	if cs.doWALCatchup {
   596  		repairAttempted := false
   597  	LOOP:
   598  		for {
   599  			err := cs.catchupReplay(cs.Height)
   600  			switch {
   601  			case err == nil:
   602  				break LOOP
   603  			case !IsDataCorruptionError(err):
   604  				cs.Logger.Error("Error on catchup replay. Proceeding to start State anyway", "err", err)
   605  				break LOOP
   606  			case repairAttempted:
   607  				return err
   608  			}
   609  
   610  			cs.Logger.Info("WAL file is corrupted. Attempting repair", "err", err)
   611  
   612  			// 1) prep work
   613  			if err := cs.wal.Stop(); err != nil {
   614  				return err
   615  			}
   616  			repairAttempted = true
   617  
   618  			// 2) backup original WAL file
   619  			corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile())
   620  			if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil {
   621  				return err
   622  			}
   623  			cs.Logger.Info("Backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile)
   624  
   625  			// 3) try to repair (WAL file will be overwritten!)
   626  			if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil {
   627  				cs.Logger.Error("Repair failed", "err", err)
   628  				return err
   629  			}
   630  			cs.Logger.Info("Successful repair")
   631  
   632  			// reload WAL file
   633  			if err := cs.loadWalFile(); err != nil {
   634  				return err
   635  			}
   636  		}
   637  	}
   638  
   639  	if err := cs.evsw.Start(); err != nil {
   640  		return err
   641  	}
   642  
   643  	// we need the timeoutRoutine for replay so
   644  	// we don't block on the tick chan.
   645  	// NOTE: we will get a build up of garbage go routines
   646  	// firing on the tockChan until the receiveRoutine is started
   647  	// to deal with them (by that point, at most one will be valid)
   648  	if err := cs.timeoutTicker.Start(); err != nil {
   649  		return err
   650  	}
   651  
   652  	// Double Signing Risk Reduction
   653  	if err := cs.checkDoubleSigningRisk(cs.Height); err != nil {
   654  		return err
   655  	}
   656  
   657  	// now start the receiveRoutine
   658  	go cs.receiveRoutine(0)
   659  
   660  	// schedule the first round!
   661  	// use GetRoundState so we don't race the receiveRoutine for access
   662  	cs.scheduleRound0(cs.GetRoundState())
   663  
   664  	return nil
   665  }
   666  
   667  // loadWalFile loads WAL data from file. It overwrites cs.wal.
   668  func (cs *State) loadWalFile() error {
   669  	wal, err := cs.OpenWAL(cs.config.WalFile())
   670  	if err != nil {
   671  		cs.Logger.Error("Error loading State wal", "err", err)
   672  		return err
   673  	}
   674  	cs.wal = wal
   675  	return nil
   676  }
   677  
   678  // OnStop implements service.Service.
   679  func (cs *State) OnStop() {
   680  	if err := cs.evsw.Stop(); err != nil {
   681  		cs.Logger.Error("error trying to stop eventSwitch", "error", err)
   682  	}
   683  	if err := cs.timeoutTicker.Stop(); err != nil {
   684  		cs.Logger.Error("error trying to stop timeoutTicket", "error", err)
   685  	}
   686  	// WAL is stopped in receiveRoutine.
   687  }
   688  
   689  // Wait waits for the the main routine to return.
   690  // NOTE: be sure to Stop() the event switch and drain
   691  // any event channels or this may deadlock
   692  func (cs *State) Wait() {
   693  	<-cs.done
   694  }
   695  
   696  // OpenWAL opens a file to log all consensus messages and timeouts for
   697  // deterministic accountability.
   698  func (cs *State) OpenWAL(walFile string) (tmcon.WAL, error) {
   699  	wal, err := NewWAL(walFile)
   700  	if err != nil {
   701  		cs.Logger.Error("Failed to open WAL", "file", walFile, "err", err)
   702  		return nil, err
   703  	}
   704  	wal.SetLogger(cs.Logger.With("wal", walFile))
   705  	if err := wal.Start(); err != nil {
   706  		cs.Logger.Error("Failed to start WAL", "err", err)
   707  		return nil, err
   708  	}
   709  	return wal, nil
   710  }
   711  
   712  //------------------------------------------------------------
   713  // Public interface for passing messages into the consensus state, possibly causing a state transition.
   714  // If peerID == "", the msg is considered internal.
   715  // Messages are added to the appropriate queue (peer or internal).
   716  // If the queue is full, the function may block.
   717  // TODO: should these return anything or let callers just use events?
   718  
   719  // AddVote inputs a vote.
   720  func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) {
   721  	if peerID == "" {
   722  		cs.internalMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, ""}
   723  	} else {
   724  		cs.peerMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, peerID}
   725  	}
   726  
   727  	// TODO: wait for event?!
   728  	return false, nil
   729  }
   730  
   731  // SetProposal inputs a proposal.
   732  func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error {
   733  	if peerID == "" {
   734  		cs.internalMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""}
   735  	} else {
   736  		cs.peerMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, peerID}
   737  	}
   738  
   739  	// TODO: wait for event?!
   740  	return nil
   741  }
   742  
   743  // AddProposalBlockPart inputs a part of the proposal block.
   744  func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error {
   745  	if peerID == "" {
   746  		cs.internalMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, ""}
   747  	} else {
   748  		cs.peerMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, peerID}
   749  	}
   750  
   751  	// TODO: wait for event?!
   752  	return nil
   753  }
   754  
   755  // SetProposalAndBlock inputs the proposal and all block parts.
   756  func (cs *State) SetProposalAndBlock(
   757  	proposal *types.Proposal,
   758  	block *types.Block,
   759  	parts *types.PartSet,
   760  	peerID p2p.ID,
   761  ) error {
   762  	if err := cs.SetProposal(proposal, peerID); err != nil {
   763  		return err
   764  	}
   765  	for i := 0; i < int(parts.Total()); i++ {
   766  		part := parts.GetPart(i)
   767  		if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil {
   768  			return err
   769  		}
   770  	}
   771  	return nil
   772  }
   773  
   774  //------------------------------------------------------------
   775  // internal functions for managing the state
   776  
   777  func (cs *State) updateHeight(height int64) {
   778  	cs.metrics.Height.Set(float64(height))
   779  	cs.Height = height
   780  }
   781  
   782  func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) {
   783  	cs.Round = round
   784  	cs.Step = step
   785  }
   786  
   787  // enterNewRound(height, 0) at cs.StartTime.
   788  func (cs *State) scheduleRound0(rs *cstypes.RoundState) {
   789  	// cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime)
   790  	sleepDuration := rs.StartTime.Sub(tmtime.Now())
   791  	cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight)
   792  }
   793  
   794  // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan)
   795  func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) {
   796  	cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step})
   797  }
   798  
   799  // send a msg into the receiveRoutine regarding our own proposal, block part, or vote
   800  func (cs *State) sendInternalMessage(mi msgInfo) {
   801  	select {
   802  	case cs.internalMsgQueue <- mi:
   803  	default:
   804  		// NOTE: using the go-routine means our votes can
   805  		// be processed out of order.
   806  		// TODO: use CList here for strict determinism and
   807  		// attempt push to internalMsgQueue in receiveRoutine
   808  		cs.Logger.Info("Internal msg queue is full. Using a go-routine")
   809  		go func() { cs.internalMsgQueue <- mi }()
   810  	}
   811  }
   812  
   813  // Reconstruct LastCommit from SeenCommit, which we saved along with the block,
   814  // (which happens even before saving the state)
   815  func (cs *State) reconstructLastCommit(state sm.State) {
   816  	seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight)
   817  	if seenCommit == nil {
   818  		panic(fmt.Sprintf("Failed to reconstruct LastCommit: seen commit for height %v not found",
   819  			state.LastBlockHeight))
   820  	}
   821  
   822  	lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators)
   823  	if !lastPrecommits.HasTwoThirdsMajority() {
   824  		panic("Failed to reconstruct LastCommit: Does not have +2/3 maj")
   825  	}
   826  
   827  	cs.LastCommit = lastPrecommits
   828  }
   829  
   830  // Updates State and increments height to match that of state.
   831  // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight.
   832  func (cs *State) updateToState(state sm.State) {
   833  	if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight {
   834  		panic(fmt.Sprintf("updateToState() expected state height of %v but found %v",
   835  			cs.Height, state.LastBlockHeight))
   836  	}
   837  	if !cs.state.IsEmpty() {
   838  		if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height {
   839  			// This might happen when someone else is mutating cs.state.
   840  			// Someone forgot to pass in state.Copy() somewhere?!
   841  			panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v",
   842  				cs.state.LastBlockHeight+1, cs.Height))
   843  		}
   844  		if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight {
   845  			panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v",
   846  				cs.state.LastBlockHeight, cs.state.InitialHeight))
   847  		}
   848  
   849  		// If state isn't further out than cs.state, just ignore.
   850  		// This happens when SwitchToConsensus() is called in the reactor.
   851  		// We don't want to reset e.g. the Votes, but we still want to
   852  		// signal the new round step, because other services (eg. txNotifier)
   853  		// depend on having an up-to-date peer state!
   854  		if state.LastBlockHeight <= cs.state.LastBlockHeight {
   855  			cs.Logger.Info(
   856  				"Ignoring updateToState()",
   857  				"newHeight",
   858  				state.LastBlockHeight+1,
   859  				"oldHeight",
   860  				cs.state.LastBlockHeight+1)
   861  			cs.newStep()
   862  			return
   863  		}
   864  	}
   865  
   866  	// Reset fields based on state.
   867  	validators := state.Validators
   868  
   869  	switch {
   870  	case state.LastBlockHeight == 0: // Very first commit should be empty.
   871  		cs.LastCommit = (*types.VoteSet)(nil)
   872  	case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes
   873  		if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() {
   874  			panic(fmt.Sprintf("Wanted to form a Commit, but Precommits (H/R: %d/%d) didn't have 2/3+: %v",
   875  				state.LastBlockHeight,
   876  				cs.CommitRound,
   877  				cs.Votes.Precommits(cs.CommitRound)))
   878  		}
   879  		cs.LastCommit = cs.Votes.Precommits(cs.CommitRound)
   880  	case cs.LastCommit == nil:
   881  		// NOTE: when Tendermint starts, it has no votes. reconstructLastCommit
   882  		// must be called to reconstruct LastCommit from SeenCommit.
   883  		panic(fmt.Sprintf("LastCommit cannot be empty after initial block (H:%d)",
   884  			state.LastBlockHeight+1,
   885  		))
   886  	}
   887  
   888  	// Next desired block height
   889  	height := state.LastBlockHeight + 1
   890  	if height == 1 {
   891  		height = state.InitialHeight
   892  	}
   893  
   894  	// RoundState fields
   895  	cs.updateHeight(height)
   896  	cs.updateRoundStep(0, cstypes.RoundStepNewHeight)
   897  	if cs.CommitTime.IsZero() {
   898  		// "Now" makes it easier to sync up dev nodes.
   899  		// We add timeoutCommit to allow transactions
   900  		// to be gathered for the first block.
   901  		// And alternative solution that relies on clocks:
   902  		// cs.StartTime = state.LastBlockTime.Add(timeoutCommit)
   903  		cs.StartTime = cs.config.Commit(tmtime.Now())
   904  	} else {
   905  		cs.StartTime = cs.config.Commit(cs.CommitTime)
   906  	}
   907  
   908  	cs.Validators = validators
   909  	cs.Proposal = nil
   910  	cs.ProposalBlock = nil
   911  	cs.ProposalBlockParts = nil
   912  	cs.LockedRound = -1
   913  	cs.LockedBlock = nil
   914  	cs.LockedBlockParts = nil
   915  	cs.ValidRound = -1
   916  	cs.ValidBlock = nil
   917  	cs.ValidBlockParts = nil
   918  	cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators)
   919  	cs.CommitRound = -1
   920  	cs.LastValidators = state.LastValidators
   921  	cs.TriggeredTimeoutPrecommit = false
   922  
   923  	cs.state = state
   924  
   925  	// Finally, broadcast RoundState
   926  	cs.newStep()
   927  }
   928  
   929  func (cs *State) newStep() {
   930  	rs := cs.RoundStateEvent()
   931  	if err := cs.wal.Write(rs); err != nil {
   932  		cs.Logger.Error("Error writing to wal", "err", err)
   933  	}
   934  	cs.nSteps++
   935  	// newStep is called by updateToState in NewState before the eventBus is set!
   936  	if cs.eventBus != nil {
   937  		if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil {
   938  			cs.Logger.Error("Error publishing new round step", "err", err)
   939  		}
   940  		cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState)
   941  	}
   942  }
   943  
   944  //-----------------------------------------
   945  // the main go routines
   946  
   947  // receiveRoutine handles messages which may cause state transitions.
   948  // it's argument (n) is the number of messages to process before exiting - use 0 to run forever
   949  // It keeps the RoundState and is the only thing that updates it.
   950  // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities.
   951  // State must be locked before any internal state is updated.
   952  func (cs *State) receiveRoutine(maxSteps int) {
   953  	onExit := func(cs *State) {
   954  		// NOTE: the internalMsgQueue may have signed messages from our
   955  		// priv_val that haven't hit the WAL, but its ok because
   956  		// priv_val tracks LastSig
   957  
   958  		// close wal now that we're done writing to it
   959  		if err := cs.wal.Stop(); err != nil {
   960  			cs.Logger.Error("error trying to stop wal", "error", err)
   961  		}
   962  		cs.wal.Wait()
   963  
   964  		close(cs.done)
   965  	}
   966  
   967  	defer func() {
   968  		if r := recover(); r != nil {
   969  			cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack()))
   970  			// stop gracefully
   971  			//
   972  			// NOTE: We most probably shouldn't be running any further when there is
   973  			// some unexpected panic. Some unknown error happened, and so we don't
   974  			// know if that will result in the validator signing an invalid thing. It
   975  			// might be worthwhile to explore a mechanism for manual resuming via
   976  			// some console or secure RPC system, but for now, halting the chain upon
   977  			// unexpected consensus bugs sounds like the better option.
   978  			onExit(cs)
   979  		}
   980  	}()
   981  
   982  	for {
   983  		if maxSteps > 0 {
   984  			if cs.nSteps >= maxSteps {
   985  				cs.Logger.Info("reached max steps. exiting receive routine")
   986  				cs.nSteps = 0
   987  				return
   988  			}
   989  		}
   990  		rs := cs.RoundState
   991  		var mi msgInfo
   992  
   993  		select {
   994  		case <-cs.txNotifier.TxsAvailable():
   995  			cs.handleTxsAvailable()
   996  		case mi = <-cs.peerMsgQueue:
   997  			if err := cs.wal.Write(mi); err != nil {
   998  				cs.Logger.Error("Error writing to wal", "err", err)
   999  			}
  1000  			// handles proposals, block parts, votes
  1001  			// may generate internal events (votes, complete proposals, 2/3 majorities)
  1002  			cs.handleMsg(mi)
  1003  		case mi = <-cs.internalMsgQueue:
  1004  			err := cs.wal.WriteSync(mi) // NOTE: fsync
  1005  			if err != nil {
  1006  				panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err))
  1007  			}
  1008  
  1009  			if _, ok := mi.Msg.(*tmcon.VoteMessage); ok {
  1010  				// we actually want to simulate failing during
  1011  				// the previous WriteSync, but this isn't easy to do.
  1012  				// Equivalent would be to fail here and manually remove
  1013  				// some bytes from the end of the wal.
  1014  				fail.Fail() // XXX
  1015  			}
  1016  
  1017  			// handles proposals, block parts, votes
  1018  			cs.handleMsg(mi)
  1019  		case ti := <-cs.timeoutTicker.Chan(): // tockChan:
  1020  			if err := cs.wal.Write(ti); err != nil {
  1021  				cs.Logger.Error("Error writing to wal", "err", err)
  1022  			}
  1023  			// if the timeout is relevant to the rs
  1024  			// go to the next step
  1025  			cs.handleTimeout(ti, rs)
  1026  		case <-cs.Quit():
  1027  			onExit(cs)
  1028  			return
  1029  		}
  1030  	}
  1031  }
  1032  
  1033  func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) {
  1034  	cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step)
  1035  
  1036  	// timeouts must be for current height, round, step
  1037  	if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) {
  1038  		cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step)
  1039  		return
  1040  	}
  1041  
  1042  	// the timeout will now cause a state transition
  1043  	cs.mtx.Lock()
  1044  	defer cs.mtx.Unlock()
  1045  
  1046  	switch ti.Step {
  1047  	case cstypes.RoundStepNewHeight:
  1048  		// NewRound event fired from enterNewRound.
  1049  		// XXX: should we fire timeout here (for timeout commit)?
  1050  		cs.enterNewRound(ti.Height, 0)
  1051  	case cstypes.RoundStepNewRound:
  1052  		cs.enterPropose(ti.Height, 0)
  1053  	case cstypes.RoundStepPropose:
  1054  		if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil {
  1055  			cs.Logger.Error("Error publishing timeout propose", "err", err)
  1056  		}
  1057  		cs.enterPrevote(ti.Height, ti.Round)
  1058  	case cstypes.RoundStepPrevoteWait:
  1059  		if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil {
  1060  			cs.Logger.Error("Error publishing timeout wait", "err", err)
  1061  		}
  1062  		cs.enterPrecommit(ti.Height, ti.Round)
  1063  	case cstypes.RoundStepPrecommitWait:
  1064  		if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil {
  1065  			cs.Logger.Error("Error publishing timeout wait", "err", err)
  1066  		}
  1067  		cs.enterPrecommit(ti.Height, ti.Round)
  1068  		cs.enterNewRound(ti.Height, ti.Round+1)
  1069  	default:
  1070  		panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step))
  1071  	}
  1072  }
  1073  
  1074  func (cs *State) handleTxsAvailable() {
  1075  	cs.mtx.Lock()
  1076  	defer cs.mtx.Unlock()
  1077  
  1078  	// We only need to do this for round 0.
  1079  	if cs.Round != 0 {
  1080  		return
  1081  	}
  1082  
  1083  	switch cs.Step {
  1084  	case cstypes.RoundStepNewHeight: // timeoutCommit phase
  1085  		if cs.needProofBlock(cs.Height) {
  1086  			// enterPropose will be called by enterNewRound
  1087  			return
  1088  		}
  1089  
  1090  		// +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight
  1091  		timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond
  1092  		cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound)
  1093  	case cstypes.RoundStepNewRound: // after timeoutCommit
  1094  		cs.enterPropose(cs.Height, 0)
  1095  	}
  1096  }
  1097  
  1098  //-----------------------------------------------------------------------------
  1099  // State functions
  1100  // Used internally by handleTimeout and handleMsg to make state transitions
  1101  
  1102  // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit),
  1103  //
  1104  //	or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1)
  1105  //
  1106  // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1)
  1107  // Enter: +2/3 precommits for nil at (height,round-1)
  1108  // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round)
  1109  // NOTE: cs.StartTime was already set for height.
  1110  func (cs *State) enterNewRound(height int64, round int32) {
  1111  	logger := cs.Logger.With("height", height, "round", round)
  1112  
  1113  	if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) {
  1114  		logger.Debug("enter new round", "msg", log.NewLazySprintf(
  1115  			"enterNewRound(%v/%v): Invalid args. Current step: %v/%v/%v",
  1116  			height,
  1117  			round,
  1118  			cs.Height,
  1119  			cs.Round,
  1120  			cs.Step))
  1121  		return
  1122  	}
  1123  
  1124  	if now := tmtime.Now(); cs.StartTime.After(now) {
  1125  		logger.Debug("need to set a buffer and log message here for sanity", "startTime", cs.StartTime, "now", now)
  1126  	}
  1127  
  1128  	logger.Info("enter new round",
  1129  		"msg",
  1130  		log.NewLazySprintf("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  1131  
  1132  	// Increment validators if necessary
  1133  	validators := cs.Validators
  1134  	if cs.Round < round {
  1135  		validators = validators.Copy()
  1136  		validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round))
  1137  	}
  1138  
  1139  	// Setup new round
  1140  	// we don't fire newStep for this step,
  1141  	// but we fire an event, so update the round step first
  1142  	cs.updateRoundStep(round, cstypes.RoundStepNewRound)
  1143  	cs.Validators = validators
  1144  	if round == 0 {
  1145  		// We've already reset these upon new height,
  1146  		// and meanwhile we might have received a proposal
  1147  		// for round 0.
  1148  	} else {
  1149  		logger.Info("Resetting Proposal info")
  1150  		cs.Proposal = nil
  1151  		cs.ProposalBlock = nil
  1152  		cs.ProposalBlockParts = nil
  1153  	}
  1154  	cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping
  1155  	cs.TriggeredTimeoutPrecommit = false
  1156  
  1157  	if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil {
  1158  		cs.Logger.Error("Error publishing new round", "err", err)
  1159  	}
  1160  	cs.metrics.Rounds.Set(float64(round))
  1161  
  1162  	// Wait for txs to be available in the mempool
  1163  	// before we enterPropose in round 0. If the last block changed the app hash,
  1164  	// we may need an empty "proof" block, and enterPropose immediately.
  1165  	waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height)
  1166  	if waitForTxs {
  1167  		if cs.config.CreateEmptyBlocksInterval > 0 {
  1168  			cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round,
  1169  				cstypes.RoundStepNewRound)
  1170  		}
  1171  	} else {
  1172  		cs.enterPropose(height, round)
  1173  	}
  1174  }
  1175  
  1176  // needProofBlock returns true on the first height (so the genesis app hash is signed right away)
  1177  // and where the last block (height-1) caused the app hash to change
  1178  func (cs *State) needProofBlock(height int64) bool {
  1179  	if height == cs.state.InitialHeight {
  1180  		return true
  1181  	}
  1182  
  1183  	lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1)
  1184  	if lastBlockMeta == nil {
  1185  		panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1))
  1186  	}
  1187  	return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash)
  1188  }
  1189  
  1190  func (cs *State) isProposer(address []byte) bool {
  1191  	return bytes.Equal(cs.Validators.GetProposer().Address, address)
  1192  }
  1193  
  1194  func (cs *State) defaultDecideProposal(height int64, round int32) {
  1195  	var block *types.Block
  1196  	var blockParts *types.PartSet
  1197  
  1198  	// Decide on block
  1199  	if cs.ValidBlock != nil {
  1200  		// If there is valid block, choose that.
  1201  		block, blockParts = cs.ValidBlock, cs.ValidBlockParts
  1202  	} else {
  1203  		// Create a new proposal block from state/txs from the mempool.
  1204  		block, blockParts = cs.createProposalBlock()
  1205  		if block == nil {
  1206  			return
  1207  		}
  1208  	}
  1209  
  1210  	// Flush the WAL. Otherwise, we may not recompute the same proposal to sign,
  1211  	// and the privValidator will refuse to sign anything.
  1212  	if err := cs.wal.FlushAndSync(); err != nil {
  1213  		cs.Logger.Error("Error flushing to disk")
  1214  	}
  1215  
  1216  	// Make proposal
  1217  	propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()}
  1218  	proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID)
  1219  	p := proposal.ToProto()
  1220  	if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil {
  1221  		proposal.Signature = p.Signature
  1222  
  1223  		// send proposal and block parts on internal msg queue
  1224  		cs.sendInternalMessage(msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""})
  1225  		for i := 0; i < int(blockParts.Total()); i++ {
  1226  			part := blockParts.GetPart(i)
  1227  			cs.sendInternalMessage(msgInfo{&tmcon.BlockPartMessage{Height: cs.Height, Round: cs.Round, Part: part}, ""})
  1228  		}
  1229  		cs.Logger.Info("Signed proposal", "height", height, "round", round, "proposal", proposal)
  1230  		cs.Logger.Debug("default decide proposal",
  1231  			"msg",
  1232  			log.NewLazySprintf("Signed proposal block: %v", block))
  1233  	} else if !cs.replayMode {
  1234  		cs.Logger.Error("enterPropose: Error signing proposal", "height", height, "round", round, "err", err)
  1235  	}
  1236  }
  1237  
  1238  // Returns true if the proposal block is complete &&
  1239  // (if POLRound was proposed, we have +2/3 prevotes from there).
  1240  func (cs *State) isProposalComplete() bool {
  1241  	if cs.Proposal == nil || cs.ProposalBlock == nil {
  1242  		return false
  1243  	}
  1244  	// we have the proposal. if there's a POLRound,
  1245  	// make sure we have the prevotes from it too
  1246  	if cs.Proposal.POLRound < 0 {
  1247  		return true
  1248  	}
  1249  	// if this is false the proposer is lying or we haven't received the POL yet
  1250  	return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority()
  1251  }
  1252  
  1253  // Create the next block to propose and return it. Returns nil block upon error.
  1254  //
  1255  // We really only need to return the parts, but the block is returned for
  1256  // convenience so we can log the proposal block.
  1257  //
  1258  // NOTE: keep it side-effect free for clarity.
  1259  // CONTRACT: cs.privValidator is not nil.
  1260  func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) {
  1261  	if cs.privValidator == nil {
  1262  		panic("entered createProposalBlock with privValidator being nil")
  1263  	}
  1264  
  1265  	var commit *types.Commit
  1266  	switch {
  1267  	case cs.Height == cs.state.InitialHeight:
  1268  		// We're creating a proposal for the first block.
  1269  		// The commit is empty, but not nil.
  1270  		commit = types.NewCommit(0, 0, types.BlockID{}, nil)
  1271  	case cs.LastCommit.HasTwoThirdsMajority():
  1272  		// Make the commit from LastCommit
  1273  		commit = cs.LastCommit.MakeCommit()
  1274  	default: // This shouldn't happen.
  1275  		cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block")
  1276  		return
  1277  	}
  1278  
  1279  	if cs.privValidatorPubKey == nil {
  1280  		// If this node is a validator & proposer in the current round, it will
  1281  		// miss the opportunity to create a block.
  1282  		cs.Logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet))
  1283  		return
  1284  	}
  1285  	proposerAddr := cs.privValidatorPubKey.Address()
  1286  
  1287  	return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr)
  1288  }
  1289  
  1290  // Enter: any +2/3 prevotes at next round.
  1291  func (cs *State) enterPrevoteWait(height int64, round int32) {
  1292  	logger := cs.Logger.With("height", height, "round", round)
  1293  
  1294  	if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) {
  1295  		logger.Debug("enter prevote wait",
  1296  			"msg",
  1297  			log.NewLazySprintf(
  1298  				"enterPrevoteWait(%v/%v): Invalid args. Current step: %v/%v/%v",
  1299  				height,
  1300  				round,
  1301  				cs.Height,
  1302  				cs.Round,
  1303  				cs.Step))
  1304  		return
  1305  	}
  1306  	if !cs.Votes.Prevotes(round).HasTwoThirdsAny() {
  1307  		panic(fmt.Sprintf("enterPrevoteWait(%v/%v), but Prevotes does not have any +2/3 votes", height, round))
  1308  	}
  1309  
  1310  	logger.Debug("enter prevote wait",
  1311  		"msg",
  1312  		log.NewLazySprintf("enterPrevoteWait(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  1313  
  1314  	defer func() {
  1315  		// Done enterPrevoteWait:
  1316  		cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait)
  1317  		cs.newStep()
  1318  	}()
  1319  
  1320  	// Wait for some more prevotes; enterPrecommit
  1321  	cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait)
  1322  }
  1323  
  1324  // Enter: any +2/3 precommits for next round.
  1325  func (cs *State) enterPrecommitWait(height int64, round int32) {
  1326  	logger := cs.Logger.With("height", height, "round", round)
  1327  
  1328  	if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) {
  1329  		logger.Debug("state enter precommit wait",
  1330  			"msg",
  1331  			log.NewLazySprintf(
  1332  				"enterPrecommitWait(%v/%v): Invalid args. "+
  1333  					"Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v",
  1334  				height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit))
  1335  		return
  1336  	}
  1337  	if !cs.Votes.Precommits(round).HasTwoThirdsAny() {
  1338  		panic(fmt.Sprintf("enterPrecommitWait(%v/%v), but Precommits does not have any +2/3 votes", height, round))
  1339  	}
  1340  	logger.Info("enter precommit wait",
  1341  		"msg",
  1342  		log.NewLazySprintf("enterPrecommitWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step))
  1343  
  1344  	defer func() {
  1345  		// Done enterPrecommitWait:
  1346  		cs.TriggeredTimeoutPrecommit = true
  1347  		cs.newStep()
  1348  	}()
  1349  
  1350  	// Wait for some more precommits; enterNewRound
  1351  	cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait)
  1352  }
  1353  
  1354  // Enter: +2/3 precommits for block
  1355  func (cs *State) enterCommit(height int64, commitRound int32) {
  1356  	logger := cs.Logger.With("height", height, "commitRound", commitRound)
  1357  
  1358  	if cs.Height != height || cstypes.RoundStepCommit <= cs.Step {
  1359  		logger.Debug("enter commit",
  1360  			"msg",
  1361  			log.NewLazySprintf("enterCommit(%v/%v): Invalid args. Current step: %v/%v/%v",
  1362  				height,
  1363  				commitRound,
  1364  				cs.Height,
  1365  				cs.Round,
  1366  				cs.Step))
  1367  		return
  1368  	}
  1369  	logger.Info("enter commit",
  1370  		"msg",
  1371  		log.NewLazySprintf("enterCommit(%v/%v). Current: %v/%v/%v", height, commitRound, cs.Height, cs.Round, cs.Step))
  1372  
  1373  	defer func() {
  1374  		// Done enterCommit:
  1375  		// keep cs.Round the same, commitRound points to the right Precommits set.
  1376  		cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit)
  1377  		cs.CommitRound = commitRound
  1378  		cs.CommitTime = tmtime.Now()
  1379  		cs.newStep()
  1380  
  1381  		// Maybe finalize immediately.
  1382  		cs.tryFinalizeCommit(height)
  1383  	}()
  1384  
  1385  	blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority()
  1386  	if !ok {
  1387  		panic("RunActionCommit() expects +2/3 precommits")
  1388  	}
  1389  
  1390  	// The Locked* fields no longer matter.
  1391  	// Move them over to ProposalBlock if they match the commit hash,
  1392  	// otherwise they'll be cleared in updateToState.
  1393  	if cs.LockedBlock.HashesTo(blockID.Hash) {
  1394  		logger.Info("Commit is for locked block. Set ProposalBlock=LockedBlock", "blockHash", blockID.Hash)
  1395  		cs.ProposalBlock = cs.LockedBlock
  1396  		cs.ProposalBlockParts = cs.LockedBlockParts
  1397  	}
  1398  
  1399  	// If we don't have the block being committed, set up to get it.
  1400  	if !cs.ProposalBlock.HashesTo(blockID.Hash) {
  1401  		if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) {
  1402  			logger.Info(
  1403  				"commit is for a block we do not know about; set ProposalBlock=nil",
  1404  				"proposal", log.NewLazyBlockHash(cs.ProposalBlock),
  1405  				"commit", blockID.Hash,
  1406  			)
  1407  
  1408  			// We're getting the wrong block.
  1409  			// Set up ProposalBlockParts and keep waiting.
  1410  			cs.ProposalBlock = nil
  1411  			cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader)
  1412  			if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil {
  1413  				cs.Logger.Error("Error publishing valid block", "err", err)
  1414  			}
  1415  			cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState)
  1416  		}
  1417  		// else {
  1418  		// We just need to keep waiting.
  1419  		// }
  1420  	}
  1421  }
  1422  
  1423  // If we have the block AND +2/3 commits for it, finalize.
  1424  func (cs *State) tryFinalizeCommit(height int64) {
  1425  	logger := cs.Logger.With("height", height)
  1426  
  1427  	if cs.Height != height {
  1428  		panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height))
  1429  	}
  1430  
  1431  	blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority()
  1432  	if !ok || len(blockID.Hash) == 0 {
  1433  		logger.Error("Attempt to finalize failed. There was no +2/3 majority, or +2/3 was for <nil>.")
  1434  		return
  1435  	}
  1436  	if !cs.ProposalBlock.HashesTo(blockID.Hash) {
  1437  		// TODO: this happens every time if we're not a validator (ugly logs)
  1438  		// TODO: ^^ wait, why does it matter that we're a validator?
  1439  		logger.Debug(
  1440  			"attempt to finalize failed; we do not have the commit block",
  1441  			"proposal-block", log.NewLazyBlockHash(cs.ProposalBlock),
  1442  			"commit-block", blockID.Hash,
  1443  		)
  1444  		return
  1445  	}
  1446  
  1447  	//	go
  1448  	cs.finalizeCommit(height)
  1449  }
  1450  
  1451  // Increment height and goto cstypes.RoundStepNewHeight
  1452  func (cs *State) finalizeCommit(height int64) {
  1453  	if cs.Height != height || cs.Step != cstypes.RoundStepCommit {
  1454  		cs.Logger.Debug("finalize commit",
  1455  			"msg",
  1456  			log.NewLazySprintf("finalizeCommit(%v): Invalid args. Current step: %v/%v/%v",
  1457  				height,
  1458  				cs.Height,
  1459  				cs.Round,
  1460  				cs.Step))
  1461  		return
  1462  	}
  1463  
  1464  	blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority()
  1465  	block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts
  1466  
  1467  	if !ok {
  1468  		panic("Cannot finalizeCommit, commit does not have two thirds majority")
  1469  	}
  1470  	if !blockParts.HasHeader(blockID.PartSetHeader) {
  1471  		panic("Expected ProposalBlockParts header to be commit header")
  1472  	}
  1473  	if !block.HashesTo(blockID.Hash) {
  1474  		panic("Cannot finalizeCommit, ProposalBlock does not hash to commit hash")
  1475  	}
  1476  	if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil {
  1477  		panic(fmt.Errorf("+2/3 committed an invalid block: %w", err))
  1478  	}
  1479  
  1480  	cs.Logger.Info("finalizing commit of block with N txs",
  1481  		"height", block.Height,
  1482  		"hash", block.Hash(),
  1483  		"root", block.AppHash,
  1484  		"N", len(block.Txs),
  1485  	)
  1486  	cs.Logger.Debug("finalize commit", "msg", log.NewLazySprintf("%v", block))
  1487  
  1488  	fail.Fail() // XXX
  1489  
  1490  	// Save to blockStore.
  1491  	if cs.blockStore.Height() < block.Height {
  1492  		// NOTE: the seenCommit is local justification to commit this block,
  1493  		// but may differ from the LastCommit included in the next block
  1494  		precommits := cs.Votes.Precommits(cs.CommitRound)
  1495  		seenCommit := precommits.MakeCommit()
  1496  		cs.blockStore.SaveBlock(block, blockParts, seenCommit)
  1497  	} else {
  1498  		// Happens during replay if we already saved the block but didn't commit
  1499  		cs.Logger.Debug("calling finalizeCommit on already stored block", "height", block.Height)
  1500  	}
  1501  
  1502  	fail.Fail() // XXX
  1503  
  1504  	// Write EndHeightMessage{} for this height, implying that the blockstore
  1505  	// has saved the block.
  1506  	//
  1507  	// If we crash before writing this EndHeightMessage{}, we will recover by
  1508  	// running ApplyBlock during the ABCI handshake when we restart.  If we
  1509  	// didn't save the block to the blockstore before writing
  1510  	// EndHeightMessage{}, we'd have to change WAL replay -- currently it
  1511  	// complains about replaying for heights where an #ENDHEIGHT entry already
  1512  	// exists.
  1513  	//
  1514  	// Either way, the State should not be resumed until we
  1515  	// successfully call ApplyBlock (ie. later here, or in Handshake after
  1516  	// restart).
  1517  	endMsg := tmcon.EndHeightMessage{Height: height}
  1518  	if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync
  1519  		panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node",
  1520  			endMsg, err))
  1521  	}
  1522  
  1523  	fail.Fail() // XXX
  1524  
  1525  	// Create a copy of the state for staging and an event cache for txs.
  1526  	stateCopy := cs.state.Copy()
  1527  
  1528  	// Execute and commit the block, update and save the state, and update the mempool.
  1529  	// NOTE The block.AppHash wont reflect these txs until the next block.
  1530  	var err error
  1531  	var retainHeight int64
  1532  	stateCopy, retainHeight, err = cs.blockExec.ApplyBlock(
  1533  		stateCopy,
  1534  		types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()},
  1535  		block)
  1536  	if err != nil {
  1537  		cs.Logger.Error("Error on ApplyBlock", "err", err)
  1538  		return
  1539  	}
  1540  
  1541  	fail.Fail() // XXX
  1542  
  1543  	// Prune old heights, if requested by ABCI app.
  1544  	if retainHeight > 0 {
  1545  		pruned, err := cs.pruneBlocks(retainHeight)
  1546  		if err != nil {
  1547  			cs.Logger.Error("Failed to prune blocks", "retainHeight", retainHeight, "err", err)
  1548  		} else {
  1549  			cs.Logger.Info("Pruned blocks", "pruned", pruned, "retainHeight", retainHeight)
  1550  		}
  1551  	}
  1552  
  1553  	// must be called before we update state
  1554  	cs.recordMetrics(height, block)
  1555  
  1556  	// NewHeightStep!
  1557  	cs.updateToState(stateCopy)
  1558  
  1559  	fail.Fail() // XXX
  1560  
  1561  	// Private validator might have changed it's key pair => refetch pubkey.
  1562  	if err := cs.updatePrivValidatorPubKey(); err != nil {
  1563  		cs.Logger.Error("Can't get private validator pubkey", "err", err)
  1564  	}
  1565  
  1566  	// cs.StartTime is already set.
  1567  	// Schedule Round0 to start soon.
  1568  	cs.scheduleRound0(&cs.RoundState)
  1569  
  1570  	// By here,
  1571  	// * cs.Height has been increment to height+1
  1572  	// * cs.Step is now cstypes.RoundStepNewHeight
  1573  	// * cs.StartTime is set to when we will start round0.
  1574  }
  1575  
  1576  func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) {
  1577  	base := cs.blockStore.Base()
  1578  	if retainHeight <= base {
  1579  		return 0, nil
  1580  	}
  1581  	pruned, err := cs.blockStore.PruneBlocks(retainHeight)
  1582  	if err != nil {
  1583  		return 0, fmt.Errorf("failed to prune block store: %w", err)
  1584  	}
  1585  	err = cs.blockExec.Store().PruneStates(base, retainHeight)
  1586  	if err != nil {
  1587  		return 0, fmt.Errorf("failed to prune state database: %w", err)
  1588  	}
  1589  	return pruned, nil
  1590  }
  1591  
  1592  func (cs *State) recordMetrics(height int64, block *types.Block) {
  1593  	cs.metrics.Validators.Set(float64(cs.Validators.Size()))
  1594  	cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower()))
  1595  
  1596  	var (
  1597  		missingValidators      int
  1598  		missingValidatorsPower int64
  1599  	)
  1600  	// height=0 -> MissingValidators and MissingValidatorsPower are both 0.
  1601  	// Remember that the first LastCommit is intentionally empty, so it's not
  1602  	// fair to increment missing validators number.
  1603  	if height > cs.state.InitialHeight {
  1604  		// Sanity check that commit size matches validator set size - only applies
  1605  		// after first block.
  1606  		var (
  1607  			commitSize = block.LastCommit.Size()
  1608  			valSetLen  = len(cs.LastValidators.Validators)
  1609  			address    types.Address
  1610  		)
  1611  		if commitSize != valSetLen {
  1612  			panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v",
  1613  				commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators))
  1614  		}
  1615  
  1616  		if cs.privValidator != nil {
  1617  			if cs.privValidatorPubKey == nil {
  1618  				// Metrics won't be updated, but it's not critical.
  1619  				cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet))
  1620  			} else {
  1621  				address = cs.privValidatorPubKey.Address()
  1622  			}
  1623  		}
  1624  
  1625  		for i, val := range cs.LastValidators.Validators {
  1626  			commitSig := block.LastCommit.Signatures[i]
  1627  			if commitSig.Absent() {
  1628  				missingValidators++
  1629  				missingValidatorsPower += val.VotingPower
  1630  			}
  1631  
  1632  			if bytes.Equal(val.Address, address) {
  1633  				label := []string{
  1634  					"validator_address", val.Address.String(),
  1635  				}
  1636  				cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower))
  1637  				if commitSig.ForBlock() {
  1638  					cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height))
  1639  				} else {
  1640  					cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1))
  1641  				}
  1642  			}
  1643  
  1644  		}
  1645  	}
  1646  	cs.metrics.MissingValidators.Set(float64(missingValidators))
  1647  	cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower))
  1648  
  1649  	// NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote
  1650  	var (
  1651  		byzantineValidatorsPower = int64(0)
  1652  		byzantineValidatorsCount = int64(0)
  1653  	)
  1654  	for _, ev := range block.Evidence.Evidence {
  1655  		if dve, ok := ev.(*types.DuplicateVoteEvidence); ok {
  1656  			if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil {
  1657  				byzantineValidatorsCount++
  1658  				byzantineValidatorsPower += val.VotingPower
  1659  			}
  1660  		}
  1661  	}
  1662  	cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount))
  1663  	cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower))
  1664  
  1665  	if height > 1 {
  1666  		lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1)
  1667  		if lastBlockMeta != nil {
  1668  			cs.metrics.BlockIntervalSeconds.Observe(
  1669  				block.Time.Sub(lastBlockMeta.Header.Time).Seconds(),
  1670  			)
  1671  		}
  1672  	}
  1673  
  1674  	cs.metrics.NumTxs.Set(float64(len(block.Data.Txs)))
  1675  	cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs)))
  1676  	cs.metrics.BlockSizeBytes.Set(float64(block.Size()))
  1677  	cs.metrics.CommittedHeight.Set(float64(block.Height))
  1678  }
  1679  
  1680  //-----------------------------------------------------------------------------
  1681  
  1682  // NOTE: block is not necessarily valid.
  1683  // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit,
  1684  // once we have the full block.
  1685  func (cs *State) addProposalBlockPart(msg *tmcon.BlockPartMessage, peerID p2p.ID) (added bool, err error) {
  1686  	height, round, part := msg.Height, msg.Round, msg.Part
  1687  
  1688  	// Blocks might be reused, so round mismatch is OK
  1689  	if cs.Height != height {
  1690  		cs.Logger.Debug("Received block part from wrong height", "height", height, "round", round)
  1691  		return false, nil
  1692  	}
  1693  
  1694  	// We're not expecting a block part.
  1695  	if cs.ProposalBlockParts == nil {
  1696  		// NOTE: this can happen when we've gone to a higher round and
  1697  		// then receive parts from the previous round - not necessarily a bad peer.
  1698  		cs.Logger.Info("Received a block part when we're not expecting any",
  1699  			"height", height, "round", round, "index", part.Index, "peer", peerID)
  1700  		return false, nil
  1701  	}
  1702  
  1703  	added, err = cs.ProposalBlockParts.AddPart(part)
  1704  	if err != nil {
  1705  		return added, err
  1706  	}
  1707  	if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes {
  1708  		return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)",
  1709  			cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes,
  1710  		)
  1711  	}
  1712  	if added && cs.ProposalBlockParts.IsComplete() {
  1713  		bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader())
  1714  		if err != nil {
  1715  			return added, err
  1716  		}
  1717  
  1718  		pbb := new(tmproto.Block)
  1719  		err = proto.Unmarshal(bz, pbb)
  1720  		if err != nil {
  1721  			return added, err
  1722  		}
  1723  
  1724  		block, err := types.BlockFromProto(pbb)
  1725  		if err != nil {
  1726  			return added, err
  1727  		}
  1728  
  1729  		cs.ProposalBlock = block
  1730  		// NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal
  1731  		cs.Logger.Info("Received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash())
  1732  		if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil {
  1733  			cs.Logger.Error("Error publishing event complete proposal", "err", err)
  1734  		}
  1735  
  1736  		// Update Valid* if we can.
  1737  		prevotes := cs.Votes.Prevotes(cs.Round)
  1738  		blockID, hasTwoThirds := prevotes.TwoThirdsMajority()
  1739  		if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) {
  1740  			if cs.ProposalBlock.HashesTo(blockID.Hash) {
  1741  				cs.Logger.Info("Updating valid block to new proposal block",
  1742  					"valid-round", cs.Round, "valid-block-hash", cs.ProposalBlock.Hash())
  1743  				cs.ValidRound = cs.Round
  1744  				cs.ValidBlock = cs.ProposalBlock
  1745  				cs.ValidBlockParts = cs.ProposalBlockParts
  1746  			}
  1747  			// TODO: In case there is +2/3 majority in Prevotes set for some
  1748  			// block and cs.ProposalBlock contains different block, either
  1749  			// proposer is faulty or voting power of faulty processes is more
  1750  			// than 1/3. We should trigger in the future accountability
  1751  			// procedure at this point.
  1752  		}
  1753  
  1754  		if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() {
  1755  			// Move onto the next step
  1756  			cs.enterPrevote(height, cs.Round)
  1757  			if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added
  1758  				cs.enterPrecommit(height, cs.Round)
  1759  			}
  1760  		} else if cs.Step == cstypes.RoundStepCommit {
  1761  			// If we're waiting on the proposal block...
  1762  			cs.tryFinalizeCommit(height)
  1763  		}
  1764  		return added, nil
  1765  	}
  1766  	return added, nil
  1767  }
  1768  
  1769  // Attempt to add the vote. if its a duplicate signature, dupeout the validator
  1770  func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) {
  1771  	added, err := cs.addVote(vote, peerID)
  1772  	if err != nil {
  1773  		// If the vote height is off, we'll just ignore it,
  1774  		// But if it's a conflicting sig, add it to the cs.evpool.
  1775  		// If it's otherwise invalid, punish peer.
  1776  		//nolint: gocritic
  1777  		if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok {
  1778  			if cs.privValidatorPubKey == nil {
  1779  				return false, errPubKeyIsNotSet
  1780  			}
  1781  
  1782  			if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) {
  1783  				cs.Logger.Error(
  1784  					"Found conflicting vote from ourselves. Did you unsafe_reset a validator?",
  1785  					"height",
  1786  					vote.Height,
  1787  					"round",
  1788  					vote.Round,
  1789  					"type",
  1790  					vote.Type)
  1791  				return added, err
  1792  			}
  1793  			cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB)
  1794  			return added, err
  1795  		} else if err == types.ErrVoteNonDeterministicSignature {
  1796  			cs.Logger.Debug("Vote has non-deterministic signature", "err", err)
  1797  		} else {
  1798  			// Either
  1799  			// 1) bad peer OR
  1800  			// 2) not a bad peer? this can also err sometimes with "Unexpected step" OR
  1801  			// 3) tmkms use with multiple validators connecting to a single tmkms instance
  1802  			// 		(https://github.com/vipernet-xyz/tm/issues/3839).
  1803  			cs.Logger.Info("Error attempting to add vote", "err", err)
  1804  			return added, ErrAddingVote
  1805  		}
  1806  	}
  1807  	return added, nil
  1808  }
  1809  
  1810  //-----------------------------------------------------------------------------
  1811  
  1812  // CONTRACT: cs.privValidator is not nil.
  1813  func (cs *State) signVote(
  1814  	msgType tmproto.SignedMsgType,
  1815  	hash []byte,
  1816  	header types.PartSetHeader,
  1817  ) (*types.Vote, error) {
  1818  	// Flush the WAL. Otherwise, we may not recompute the same vote to sign,
  1819  	// and the privValidator will refuse to sign anything.
  1820  	if err := cs.wal.FlushAndSync(); err != nil {
  1821  		return nil, err
  1822  	}
  1823  
  1824  	if cs.privValidatorPubKey == nil {
  1825  		return nil, errPubKeyIsNotSet
  1826  	}
  1827  	addr := cs.privValidatorPubKey.Address()
  1828  	valIdx, _ := cs.Validators.GetByAddress(addr)
  1829  
  1830  	vote := &types.Vote{
  1831  		ValidatorAddress: addr,
  1832  		ValidatorIndex:   valIdx,
  1833  		Height:           cs.Height,
  1834  		Round:            cs.Round,
  1835  		Timestamp:        cs.voteTime(),
  1836  		Type:             msgType,
  1837  		BlockID:          types.BlockID{Hash: hash, PartSetHeader: header},
  1838  	}
  1839  	v := vote.ToProto()
  1840  	err := cs.privValidator.SignVote(cs.state.ChainID, v)
  1841  	vote.Signature = v.Signature
  1842  
  1843  	return vote, err
  1844  }
  1845  
  1846  func (cs *State) voteTime() time.Time {
  1847  	now := tmtime.Now()
  1848  	minVoteTime := now
  1849  	// TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil,
  1850  	// even if cs.LockedBlock != nil. See https://github.com/vipernet-xyz/tm/tree/v0.34.x/spec/.
  1851  	timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond
  1852  	if cs.LockedBlock != nil {
  1853  		// See the BFT time spec
  1854  		// https://github.com/vipernet-xyz/tm/blob/v0.34.x/spec/consensus/bft-time.md
  1855  		minVoteTime = cs.LockedBlock.Time.Add(timeIota)
  1856  	} else if cs.ProposalBlock != nil {
  1857  		minVoteTime = cs.ProposalBlock.Time.Add(timeIota)
  1858  	}
  1859  
  1860  	if now.After(minVoteTime) {
  1861  		return now
  1862  	}
  1863  	return minVoteTime
  1864  }
  1865  
  1866  // sign the vote and publish on internalMsgQueue
  1867  func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote {
  1868  	if cs.privValidator == nil { // the node does not have a key
  1869  		return nil
  1870  	}
  1871  
  1872  	if cs.privValidatorPubKey == nil {
  1873  		// Vote won't be signed, but it's not critical.
  1874  		cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet))
  1875  		return nil
  1876  	}
  1877  
  1878  	// If the node not in the validator set, do nothing.
  1879  	if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) {
  1880  		return nil
  1881  	}
  1882  
  1883  	// TODO: pass pubKey to signVote
  1884  	vote, err := cs.signVote(msgType, hash, header)
  1885  	if err == nil {
  1886  		cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: vote}, ""})
  1887  		cs.Logger.Info("Signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote)
  1888  		return vote
  1889  	}
  1890  	// if !cs.replayMode {
  1891  	cs.Logger.Error("Error signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err)
  1892  	//}
  1893  	return nil
  1894  }
  1895  
  1896  // updatePrivValidatorPubKey get's the private validator public key and
  1897  // memoizes it. This func returns an error if the private validator is not
  1898  // responding or responds with an error.
  1899  func (cs *State) updatePrivValidatorPubKey() error {
  1900  	if cs.privValidator == nil {
  1901  		return nil
  1902  	}
  1903  
  1904  	pubKey, err := cs.privValidator.GetPubKey()
  1905  	if err != nil {
  1906  		return err
  1907  	}
  1908  	cs.privValidatorPubKey = pubKey
  1909  	return nil
  1910  }
  1911  
  1912  // look back to check existence of the node's consensus votes before joining consensus
  1913  func (cs *State) checkDoubleSigningRisk(height int64) error {
  1914  	if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 {
  1915  		valAddr := cs.privValidatorPubKey.Address()
  1916  		doubleSignCheckHeight := cs.config.DoubleSignCheckHeight
  1917  		if doubleSignCheckHeight > height {
  1918  			doubleSignCheckHeight = height
  1919  		}
  1920  		for i := int64(1); i < doubleSignCheckHeight; i++ {
  1921  			lastCommit := cs.blockStore.LoadSeenCommit(height - i)
  1922  			if lastCommit != nil {
  1923  				for sigIdx, s := range lastCommit.Signatures {
  1924  					if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) {
  1925  						cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i)
  1926  						return ErrSignatureFoundInPastBlocks
  1927  					}
  1928  				}
  1929  			}
  1930  		}
  1931  	}
  1932  	return nil
  1933  }
  1934  
  1935  //---------------------------------------------------------
  1936  
  1937  func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int {
  1938  	if h1 < h2 {
  1939  		return -1
  1940  	} else if h1 > h2 {
  1941  		return 1
  1942  	}
  1943  	if r1 < r2 {
  1944  		return -1
  1945  	} else if r1 > r2 {
  1946  		return 1
  1947  	}
  1948  	if s1 < s2 {
  1949  		return -1
  1950  	} else if s1 > s2 {
  1951  		return 1
  1952  	}
  1953  	return 0
  1954  }
  1955  
  1956  // repairWalFile decodes messages from src (until the decoder errors) and
  1957  // writes them to dst.
  1958  func repairWalFile(src, dst string) error {
  1959  	in, err := os.Open(src)
  1960  	if err != nil {
  1961  		return err
  1962  	}
  1963  	defer in.Close()
  1964  
  1965  	out, err := os.Open(dst)
  1966  	if err != nil {
  1967  		return err
  1968  	}
  1969  	defer out.Close()
  1970  
  1971  	var (
  1972  		dec = NewWALDecoder(in)
  1973  		enc = NewWALEncoder(out)
  1974  	)
  1975  
  1976  	// best-case repair (until first error is encountered)
  1977  	for {
  1978  		msg, err := dec.Decode()
  1979  		if err != nil {
  1980  			break
  1981  		}
  1982  
  1983  		err = enc.Encode(msg)
  1984  		if err != nil {
  1985  			return fmt.Errorf("failed to encode msg: %w", err)
  1986  		}
  1987  	}
  1988  
  1989  	return nil
  1990  }