github.com/badrootd/nibiru-cometbft@v0.37.5-0.20240307173500-2a75559eee9b/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "runtime/debug" 10 "sort" 11 "time" 12 13 "github.com/cosmos/gogoproto/proto" 14 15 cfg "github.com/badrootd/nibiru-cometbft/config" 16 cstypes "github.com/badrootd/nibiru-cometbft/consensus/types" 17 "github.com/badrootd/nibiru-cometbft/crypto" 18 cmtevents "github.com/badrootd/nibiru-cometbft/libs/events" 19 "github.com/badrootd/nibiru-cometbft/libs/fail" 20 cmtjson "github.com/badrootd/nibiru-cometbft/libs/json" 21 "github.com/badrootd/nibiru-cometbft/libs/log" 22 cmtmath "github.com/badrootd/nibiru-cometbft/libs/math" 23 cmtos "github.com/badrootd/nibiru-cometbft/libs/os" 24 "github.com/badrootd/nibiru-cometbft/libs/service" 25 cmtsync "github.com/badrootd/nibiru-cometbft/libs/sync" 26 "github.com/badrootd/nibiru-cometbft/p2p" 27 cmtproto "github.com/badrootd/nibiru-cometbft/proto/tendermint/types" 28 sm "github.com/badrootd/nibiru-cometbft/state" 29 "github.com/badrootd/nibiru-cometbft/types" 30 cmttime "github.com/badrootd/nibiru-cometbft/types/time" 31 ) 32 33 // Consensus sentinel errors 34 var ( 35 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 36 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 37 ErrAddingVote = errors.New("error adding vote") 38 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 39 40 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 41 ) 42 43 var msgQueueSize = 1000 44 45 // msgs from the reactor which may update the state 46 type msgInfo struct { 47 Msg Message `json:"msg"` 48 PeerID p2p.ID `json:"peer_key"` 49 } 50 51 // internally generated messages which may update the state 52 type timeoutInfo struct { 53 Duration time.Duration `json:"duration"` 54 Height int64 `json:"height"` 55 Round int32 `json:"round"` 56 Step cstypes.RoundStepType `json:"step"` 57 } 58 59 func (ti *timeoutInfo) String() string { 60 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 61 } 62 63 // interface to the mempool 64 type txNotifier interface { 65 TxsAvailable() <-chan struct{} 66 } 67 68 // interface to the evidence pool 69 type evidencePool interface { 70 // reports conflicting votes to the evidence pool to be processed into evidence 71 ReportConflictingVotes(voteA, voteB *types.Vote) 72 } 73 74 // State handles execution of the consensus algorithm. 75 // It processes votes and proposals, and upon reaching agreement, 76 // commits blocks to the chain and executes them against the application. 77 // The internal state machine receives input from peers, the internal validator, and from a timer. 78 type State struct { 79 service.BaseService 80 81 // config details 82 config *cfg.ConsensusConfig 83 privValidator types.PrivValidator // for signing votes 84 85 // store blocks and commits 86 blockStore sm.BlockStore 87 88 // create and execute blocks 89 blockExec *sm.BlockExecutor 90 91 // notify us if txs are available 92 txNotifier txNotifier 93 94 // add evidence to the pool 95 // when it's detected 96 evpool evidencePool 97 98 // internal state 99 mtx cmtsync.RWMutex 100 cstypes.RoundState 101 state sm.State // State until height-1. 102 // privValidator pubkey, memoized for the duration of one block 103 // to avoid extra requests to HSM 104 privValidatorPubKey crypto.PubKey 105 106 // state changes may be triggered by: msgs from peers, 107 // msgs from ourself, or by timeouts 108 peerMsgQueue chan msgInfo 109 internalMsgQueue chan msgInfo 110 timeoutTicker TimeoutTicker 111 112 // information about about added votes and block parts are written on this channel 113 // so statistics can be computed by reactor 114 statsMsgQueue chan msgInfo 115 116 // we use eventBus to trigger msg broadcasts in the reactor, 117 // and to notify external subscribers, eg. through a websocket 118 eventBus *types.EventBus 119 120 // a Write-Ahead Log ensures we can recover from any kind of crash 121 // and helps us avoid signing conflicting votes 122 wal WAL 123 replayMode bool // so we don't log signing errors during replay 124 doWALCatchup bool // determines if we even try to do the catchup 125 126 // for tests where we want to limit the number of transitions the state makes 127 nSteps int 128 129 // some functions can be overwritten for testing 130 decideProposal func(height int64, round int32) 131 doPrevote func(height int64, round int32) 132 setProposal func(proposal *types.Proposal) error 133 134 // closed when we finish shutting down 135 done chan struct{} 136 137 // synchronous pubsub between consensus state and reactor. 138 // state only emits EventNewRoundStep and EventVote 139 evsw cmtevents.EventSwitch 140 141 // for reporting metrics 142 metrics *Metrics 143 } 144 145 // StateOption sets an optional parameter on the State. 146 type StateOption func(*State) 147 148 // NewState returns a new State. 149 func NewState( 150 config *cfg.ConsensusConfig, 151 state sm.State, 152 blockExec *sm.BlockExecutor, 153 blockStore sm.BlockStore, 154 txNotifier txNotifier, 155 evpool evidencePool, 156 options ...StateOption, 157 ) *State { 158 cs := &State{ 159 config: config, 160 blockExec: blockExec, 161 blockStore: blockStore, 162 txNotifier: txNotifier, 163 peerMsgQueue: make(chan msgInfo, msgQueueSize), 164 internalMsgQueue: make(chan msgInfo, msgQueueSize), 165 timeoutTicker: NewTimeoutTicker(), 166 statsMsgQueue: make(chan msgInfo, msgQueueSize), 167 done: make(chan struct{}), 168 doWALCatchup: true, 169 wal: nilWAL{}, 170 evpool: evpool, 171 evsw: cmtevents.NewEventSwitch(), 172 metrics: NopMetrics(), 173 } 174 for _, option := range options { 175 option(cs) 176 } 177 // set function defaults (may be overwritten before calling Start) 178 cs.decideProposal = cs.defaultDecideProposal 179 cs.doPrevote = cs.defaultDoPrevote 180 cs.setProposal = cs.defaultSetProposal 181 182 // We have no votes, so reconstruct LastCommit from SeenCommit. 183 if state.LastBlockHeight > 0 { 184 // In case of out of band performed statesync, the state store 185 // will have a state but no extended commit (as no block has been downloaded). 186 // If the height at which the vote extensions are enabled is lower 187 // than the height at which we statesync, consensus will panic because 188 // it will try to reconstruct the extended commit here. 189 cs.reconstructLastCommit(state) 190 } 191 192 cs.updateToState(state) 193 194 // NOTE: we do not call scheduleRound0 yet, we do that upon Start() 195 196 cs.BaseService = *service.NewBaseService(nil, "State", cs) 197 198 return cs 199 } 200 201 // SetLogger implements Service. 202 func (cs *State) SetLogger(l log.Logger) { 203 cs.BaseService.Logger = l 204 cs.timeoutTicker.SetLogger(l) 205 } 206 207 // SetEventBus sets event bus. 208 func (cs *State) SetEventBus(b *types.EventBus) { 209 cs.eventBus = b 210 cs.blockExec.SetEventBus(b) 211 } 212 213 // StateMetrics sets the metrics. 214 func StateMetrics(metrics *Metrics) StateOption { 215 return func(cs *State) { cs.metrics = metrics } 216 } 217 218 // String returns a string. 219 func (cs *State) String() string { 220 // better not to access shared variables 221 return "ConsensusState" 222 } 223 224 // GetState returns a copy of the chain state. 225 func (cs *State) GetState() sm.State { 226 cs.mtx.RLock() 227 defer cs.mtx.RUnlock() 228 return cs.state.Copy() 229 } 230 231 // GetLastHeight returns the last height committed. 232 // If there were no blocks, returns 0. 233 func (cs *State) GetLastHeight() int64 { 234 cs.mtx.RLock() 235 defer cs.mtx.RUnlock() 236 return cs.RoundState.Height - 1 237 } 238 239 // GetRoundState returns a shallow copy of the internal consensus state. 240 func (cs *State) GetRoundState() *cstypes.RoundState { 241 cs.mtx.RLock() 242 rs := cs.RoundState // copy 243 cs.mtx.RUnlock() 244 return &rs 245 } 246 247 // GetRoundStateJSON returns a json of RoundState. 248 func (cs *State) GetRoundStateJSON() ([]byte, error) { 249 cs.mtx.RLock() 250 defer cs.mtx.RUnlock() 251 return cmtjson.Marshal(cs.RoundState) 252 } 253 254 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 255 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 256 cs.mtx.RLock() 257 defer cs.mtx.RUnlock() 258 return cmtjson.Marshal(cs.RoundState.RoundStateSimple()) 259 } 260 261 // GetValidators returns a copy of the current validators. 262 func (cs *State) GetValidators() (int64, []*types.Validator) { 263 cs.mtx.RLock() 264 defer cs.mtx.RUnlock() 265 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 266 } 267 268 // SetPrivValidator sets the private validator account for signing votes. It 269 // immediately requests pubkey and caches it. 270 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 271 cs.mtx.Lock() 272 defer cs.mtx.Unlock() 273 274 cs.privValidator = priv 275 276 if err := cs.updatePrivValidatorPubKey(); err != nil { 277 cs.Logger.Error("failed to get private validator pubkey", "err", err) 278 } 279 } 280 281 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for 282 // testing. 283 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 284 cs.mtx.Lock() 285 cs.timeoutTicker = timeoutTicker 286 cs.mtx.Unlock() 287 } 288 289 // LoadCommit loads the commit for a given height. 290 func (cs *State) LoadCommit(height int64) *types.Commit { 291 cs.mtx.RLock() 292 defer cs.mtx.RUnlock() 293 294 if height == cs.blockStore.Height() { 295 return cs.blockStore.LoadSeenCommit(height) 296 } 297 298 return cs.blockStore.LoadBlockCommit(height) 299 } 300 301 // OnStart loads the latest state via the WAL, and starts the timeout and 302 // receive routines. 303 func (cs *State) OnStart() error { 304 // We may set the WAL in testing before calling Start, so only OpenWAL if its 305 // still the nilWAL. 306 if _, ok := cs.wal.(nilWAL); ok { 307 if err := cs.loadWalFile(); err != nil { 308 return err 309 } 310 } 311 312 // we need the timeoutRoutine for replay so 313 // we don't block on the tick chan. 314 // NOTE: we will get a build up of garbage go routines 315 // firing on the tockChan until the receiveRoutine is started 316 // to deal with them (by that point, at most one will be valid) 317 if err := cs.timeoutTicker.Start(); err != nil { 318 return err 319 } 320 321 // We may have lost some votes if the process crashed reload from consensus 322 // log to catchup. 323 if cs.doWALCatchup { 324 repairAttempted := false 325 326 LOOP: 327 for { 328 err := cs.catchupReplay(cs.Height) 329 switch { 330 case err == nil: 331 break LOOP 332 333 case !IsDataCorruptionError(err): 334 cs.Logger.Error("error on catchup replay; proceeding to start state anyway", "err", err) 335 break LOOP 336 337 case repairAttempted: 338 return err 339 } 340 341 cs.Logger.Error("the WAL file is corrupted; attempting repair", "err", err) 342 343 // 1) prep work 344 if err := cs.wal.Stop(); err != nil { 345 return err 346 } 347 348 repairAttempted = true 349 350 // 2) backup original WAL file 351 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 352 if err := cmtos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 353 return err 354 } 355 356 cs.Logger.Debug("backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 357 358 // 3) try to repair (WAL file will be overwritten!) 359 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 360 cs.Logger.Error("the WAL repair failed", "err", err) 361 return err 362 } 363 364 cs.Logger.Info("successful WAL repair") 365 366 // reload WAL file 367 if err := cs.loadWalFile(); err != nil { 368 return err 369 } 370 } 371 } 372 373 if err := cs.evsw.Start(); err != nil { 374 return err 375 } 376 377 // Double Signing Risk Reduction 378 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 379 return err 380 } 381 382 // now start the receiveRoutine 383 go cs.receiveRoutine(0) 384 385 // schedule the first round! 386 // use GetRoundState so we don't race the receiveRoutine for access 387 cs.scheduleRound0(cs.GetRoundState()) 388 389 return nil 390 } 391 392 // timeoutRoutine: receive requests for timeouts on tickChan and fire timeouts on tockChan 393 // receiveRoutine: serializes processing of proposoals, block parts, votes; coordinates state transitions 394 func (cs *State) startRoutines(maxSteps int) { 395 err := cs.timeoutTicker.Start() 396 if err != nil { 397 cs.Logger.Error("failed to start timeout ticker", "err", err) 398 return 399 } 400 401 go cs.receiveRoutine(maxSteps) 402 } 403 404 // loadWalFile loads WAL data from file. It overwrites cs.wal. 405 func (cs *State) loadWalFile() error { 406 wal, err := cs.OpenWAL(cs.config.WalFile()) 407 if err != nil { 408 cs.Logger.Error("failed to load state WAL", "err", err) 409 return err 410 } 411 412 cs.wal = wal 413 return nil 414 } 415 416 // OnStop implements service.Service. 417 func (cs *State) OnStop() { 418 if err := cs.evsw.Stop(); err != nil { 419 cs.Logger.Error("failed trying to stop eventSwitch", "error", err) 420 } 421 422 if err := cs.timeoutTicker.Stop(); err != nil { 423 cs.Logger.Error("failed trying to stop timeoutTicket", "error", err) 424 } 425 // WAL is stopped in receiveRoutine. 426 } 427 428 // Wait waits for the the main routine to return. 429 // NOTE: be sure to Stop() the event switch and drain 430 // any event channels or this may deadlock 431 func (cs *State) Wait() { 432 <-cs.done 433 } 434 435 // OpenWAL opens a file to log all consensus messages and timeouts for 436 // deterministic accountability. 437 func (cs *State) OpenWAL(walFile string) (WAL, error) { 438 wal, err := NewWAL(walFile) 439 if err != nil { 440 cs.Logger.Error("failed to open WAL", "file", walFile, "err", err) 441 return nil, err 442 } 443 444 wal.SetLogger(cs.Logger.With("wal", walFile)) 445 446 if err := wal.Start(); err != nil { 447 cs.Logger.Error("failed to start WAL", "err", err) 448 return nil, err 449 } 450 451 return wal, nil 452 } 453 454 //------------------------------------------------------------ 455 // Public interface for passing messages into the consensus state, possibly causing a state transition. 456 // If peerID == "", the msg is considered internal. 457 // Messages are added to the appropriate queue (peer or internal). 458 // If the queue is full, the function may block. 459 // TODO: should these return anything or let callers just use events? 460 461 // AddVote inputs a vote. 462 func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 463 if peerID == "" { 464 cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""} 465 } else { 466 cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID} 467 } 468 469 // TODO: wait for event?! 470 return false, nil 471 } 472 473 // SetProposal inputs a proposal. 474 func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { 475 if peerID == "" { 476 cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""} 477 } else { 478 cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID} 479 } 480 481 // TODO: wait for event?! 482 return nil 483 } 484 485 // AddProposalBlockPart inputs a part of the proposal block. 486 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { 487 if peerID == "" { 488 cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""} 489 } else { 490 cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID} 491 } 492 493 // TODO: wait for event?! 494 return nil 495 } 496 497 // SetProposalAndBlock inputs the proposal and all block parts. 498 func (cs *State) SetProposalAndBlock( 499 proposal *types.Proposal, 500 block *types.Block, 501 parts *types.PartSet, 502 peerID p2p.ID, 503 ) error { 504 if err := cs.SetProposal(proposal, peerID); err != nil { 505 return err 506 } 507 508 for i := 0; i < int(parts.Total()); i++ { 509 part := parts.GetPart(i) 510 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 511 return err 512 } 513 } 514 515 return nil 516 } 517 518 //------------------------------------------------------------ 519 // internal functions for managing the state 520 521 func (cs *State) updateHeight(height int64) { 522 cs.metrics.Height.Set(float64(height)) 523 cs.Height = height 524 } 525 526 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 527 if !cs.replayMode { 528 if round != cs.Round || round == 0 && step == cstypes.RoundStepNewRound { 529 cs.metrics.MarkRound(cs.Round, cs.StartTime) 530 } 531 if cs.Step != step { 532 cs.metrics.MarkStep(cs.Step) 533 } 534 } 535 cs.Round = round 536 cs.Step = step 537 } 538 539 // enterNewRound(height, 0) at cs.StartTime. 540 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 541 // cs.Logger.Info("scheduleRound0", "now", cmttime.Now(), "startTime", cs.StartTime) 542 sleepDuration := rs.StartTime.Sub(cmttime.Now()) 543 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 544 } 545 546 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 547 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 548 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 549 } 550 551 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 552 func (cs *State) sendInternalMessage(mi msgInfo) { 553 select { 554 case cs.internalMsgQueue <- mi: 555 default: 556 // NOTE: using the go-routine means our votes can 557 // be processed out of order. 558 // TODO: use CList here for strict determinism and 559 // attempt push to internalMsgQueue in receiveRoutine 560 cs.Logger.Debug("internal msg queue is full; using a go-routine") 561 go func() { cs.internalMsgQueue <- mi }() 562 } 563 } 564 565 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 566 // (which happens even before saving the state) 567 func (cs *State) reconstructLastCommit(state sm.State) { 568 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 569 if seenCommit == nil { 570 panic(fmt.Sprintf( 571 "failed to reconstruct last commit; seen commit for height %v not found", 572 state.LastBlockHeight, 573 )) 574 } 575 576 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 577 if !lastPrecommits.HasTwoThirdsMajority() { 578 panic("failed to reconstruct last commit; does not have +2/3 maj") 579 } 580 581 cs.LastCommit = lastPrecommits 582 } 583 584 // Updates State and increments height to match that of state. 585 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 586 func (cs *State) updateToState(state sm.State) { 587 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 588 panic(fmt.Sprintf( 589 "updateToState() expected state height of %v but found %v", 590 cs.Height, state.LastBlockHeight, 591 )) 592 } 593 594 if !cs.state.IsEmpty() { 595 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 596 // This might happen when someone else is mutating cs.state. 597 // Someone forgot to pass in state.Copy() somewhere?! 598 panic(fmt.Sprintf( 599 "inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 600 cs.state.LastBlockHeight+1, cs.Height, 601 )) 602 } 603 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 604 panic(fmt.Sprintf( 605 "inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 606 cs.state.LastBlockHeight, cs.state.InitialHeight, 607 )) 608 } 609 610 // If state isn't further out than cs.state, just ignore. 611 // This happens when SwitchToConsensus() is called in the reactor. 612 // We don't want to reset e.g. the Votes, but we still want to 613 // signal the new round step, because other services (eg. txNotifier) 614 // depend on having an up-to-date peer state! 615 if state.LastBlockHeight <= cs.state.LastBlockHeight { 616 cs.Logger.Debug( 617 "ignoring updateToState()", 618 "new_height", state.LastBlockHeight+1, 619 "old_height", cs.state.LastBlockHeight+1, 620 ) 621 cs.newStep() 622 return 623 } 624 } 625 626 // Reset fields based on state. 627 validators := state.Validators 628 629 switch { 630 case state.LastBlockHeight == 0: // Very first commit should be empty. 631 cs.LastCommit = (*types.VoteSet)(nil) 632 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 633 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 634 panic(fmt.Sprintf( 635 "wanted to form a commit, but precommits (H/R: %d/%d) didn't have 2/3+: %v", 636 state.LastBlockHeight, cs.CommitRound, cs.Votes.Precommits(cs.CommitRound), 637 )) 638 } 639 640 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 641 642 case cs.LastCommit == nil: 643 // NOTE: when consensus starts, it has no votes. reconstructLastCommit 644 // must be called to reconstruct LastCommit from SeenCommit. 645 panic(fmt.Sprintf( 646 "last commit cannot be empty after initial block (H:%d)", 647 state.LastBlockHeight+1, 648 )) 649 } 650 651 // Next desired block height 652 height := state.LastBlockHeight + 1 653 if height == 1 { 654 height = state.InitialHeight 655 } 656 657 // RoundState fields 658 cs.updateHeight(height) 659 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 660 661 if cs.CommitTime.IsZero() { 662 // "Now" makes it easier to sync up dev nodes. 663 // We add timeoutCommit to allow transactions 664 // to be gathered for the first block. 665 // And alternative solution that relies on clocks: 666 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 667 cs.StartTime = cs.config.Commit(cmttime.Now()) 668 } else { 669 cs.StartTime = cs.config.Commit(cs.CommitTime) 670 } 671 672 cs.Validators = validators 673 cs.Proposal = nil 674 cs.ProposalBlock = nil 675 cs.ProposalBlockParts = nil 676 cs.LockedRound = -1 677 cs.LockedBlock = nil 678 cs.LockedBlockParts = nil 679 cs.ValidRound = -1 680 cs.ValidBlock = nil 681 cs.ValidBlockParts = nil 682 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 683 cs.CommitRound = -1 684 cs.LastValidators = state.LastValidators 685 cs.TriggeredTimeoutPrecommit = false 686 687 cs.state = state 688 689 // Finally, broadcast RoundState 690 cs.newStep() 691 } 692 693 func (cs *State) newStep() { 694 rs := cs.RoundStateEvent() 695 if err := cs.wal.Write(rs); err != nil { 696 cs.Logger.Error("failed writing to WAL", "err", err) 697 } 698 699 cs.nSteps++ 700 701 // newStep is called by updateToState in NewState before the eventBus is set! 702 if cs.eventBus != nil { 703 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 704 cs.Logger.Error("failed publishing new round step", "err", err) 705 } 706 707 cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) 708 } 709 } 710 711 //----------------------------------------- 712 // the main go routines 713 714 // receiveRoutine handles messages which may cause state transitions. 715 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 716 // It keeps the RoundState and is the only thing that updates it. 717 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 718 // State must be locked before any internal state is updated. 719 func (cs *State) receiveRoutine(maxSteps int) { 720 onExit := func(cs *State) { 721 // NOTE: the internalMsgQueue may have signed messages from our 722 // priv_val that haven't hit the WAL, but its ok because 723 // priv_val tracks LastSig 724 725 // close wal now that we're done writing to it 726 if err := cs.wal.Stop(); err != nil { 727 cs.Logger.Error("failed trying to stop WAL", "error", err) 728 } 729 730 cs.wal.Wait() 731 close(cs.done) 732 } 733 734 defer func() { 735 if r := recover(); r != nil { 736 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 737 // stop gracefully 738 // 739 // NOTE: We most probably shouldn't be running any further when there is 740 // some unexpected panic. Some unknown error happened, and so we don't 741 // know if that will result in the validator signing an invalid thing. It 742 // might be worthwhile to explore a mechanism for manual resuming via 743 // some console or secure RPC system, but for now, halting the chain upon 744 // unexpected consensus bugs sounds like the better option. 745 onExit(cs) 746 } 747 }() 748 749 for { 750 if maxSteps > 0 { 751 if cs.nSteps >= maxSteps { 752 cs.Logger.Debug("reached max steps; exiting receive routine") 753 cs.nSteps = 0 754 return 755 } 756 } 757 758 rs := cs.RoundState 759 var mi msgInfo 760 761 select { 762 case <-cs.txNotifier.TxsAvailable(): 763 cs.handleTxsAvailable() 764 765 case mi = <-cs.peerMsgQueue: 766 if err := cs.wal.Write(mi); err != nil { 767 cs.Logger.Error("failed writing to WAL", "err", err) 768 } 769 770 // handles proposals, block parts, votes 771 // may generate internal events (votes, complete proposals, 2/3 majorities) 772 cs.handleMsg(mi) 773 774 case mi = <-cs.internalMsgQueue: 775 err := cs.wal.WriteSync(mi) // NOTE: fsync 776 if err != nil { 777 panic(fmt.Sprintf( 778 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 779 mi, err, 780 )) 781 } 782 783 if _, ok := mi.Msg.(*VoteMessage); ok { 784 // we actually want to simulate failing during 785 // the previous WriteSync, but this isn't easy to do. 786 // Equivalent would be to fail here and manually remove 787 // some bytes from the end of the wal. 788 fail.Fail() // XXX 789 } 790 791 // handles proposals, block parts, votes 792 cs.handleMsg(mi) 793 794 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 795 if err := cs.wal.Write(ti); err != nil { 796 cs.Logger.Error("failed writing to WAL", "err", err) 797 } 798 799 // if the timeout is relevant to the rs 800 // go to the next step 801 cs.handleTimeout(ti, rs) 802 803 case <-cs.Quit(): 804 onExit(cs) 805 return 806 } 807 } 808 } 809 810 // state transitions on complete-proposal, 2/3-any, 2/3-one 811 func (cs *State) handleMsg(mi msgInfo) { 812 cs.mtx.Lock() 813 defer cs.mtx.Unlock() 814 var ( 815 added bool 816 err error 817 ) 818 819 msg, peerID := mi.Msg, mi.PeerID 820 821 switch msg := msg.(type) { 822 case *ProposalMessage: 823 // will not cause transition. 824 // once proposal is set, we can receive block parts 825 err = cs.setProposal(msg.Proposal) 826 827 case *BlockPartMessage: 828 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 829 added, err = cs.addProposalBlockPart(msg, peerID) 830 831 // We unlock here to yield to any routines that need to read the the RoundState. 832 // Previously, this code held the lock from the point at which the final block 833 // part was received until the block executed against the application. 834 // This prevented the reactor from being able to retrieve the most updated 835 // version of the RoundState. The reactor needs the updated RoundState to 836 // gossip the now completed block. 837 // 838 // This code can be further improved by either always operating on a copy 839 // of RoundState and only locking when switching out State's copy of 840 // RoundState with the updated copy or by emitting RoundState events in 841 // more places for routines depending on it to listen for. 842 cs.mtx.Unlock() 843 844 cs.mtx.Lock() 845 if added && cs.ProposalBlockParts.IsComplete() { 846 cs.handleCompleteProposal(msg.Height) 847 } 848 if added { 849 cs.statsMsgQueue <- mi 850 } 851 852 if err != nil && msg.Round != cs.Round { 853 cs.Logger.Debug( 854 "received block part from wrong round", 855 "height", cs.Height, 856 "cs_round", cs.Round, 857 "block_round", msg.Round, 858 ) 859 err = nil 860 } 861 862 case *VoteMessage: 863 // attempt to add the vote and dupeout the validator if its a duplicate signature 864 // if the vote gives us a 2/3-any or 2/3-one, we transition 865 added, err = cs.tryAddVote(msg.Vote, peerID) 866 if added { 867 cs.statsMsgQueue <- mi 868 } 869 870 // if err == ErrAddingVote { 871 // TODO: punish peer 872 // We probably don't want to stop the peer here. The vote does not 873 // necessarily comes from a malicious peer but can be just broadcasted by 874 // a typical peer. 875 // https://github.com/tendermint/tendermint/issues/1281 876 // } 877 878 // NOTE: the vote is broadcast to peers by the reactor listening 879 // for vote events 880 881 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 882 // the peer is sending us CatchupCommit precommits. 883 // We could make note of this and help filter in broadcastHasVoteMessage(). 884 885 default: 886 cs.Logger.Error("unknown msg type", "type", fmt.Sprintf("%T", msg)) 887 return 888 } 889 890 if err != nil { 891 cs.Logger.Error( 892 "failed to process message", 893 "height", cs.Height, 894 "round", cs.Round, 895 "peer", peerID, 896 "msg_type", fmt.Sprintf("%T", msg), 897 "err", err, 898 ) 899 } 900 } 901 902 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 903 cs.Logger.Debug("received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 904 905 // timeouts must be for current height, round, step 906 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 907 cs.Logger.Debug("ignoring tock because we are ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 908 return 909 } 910 911 // the timeout will now cause a state transition 912 cs.mtx.Lock() 913 defer cs.mtx.Unlock() 914 915 switch ti.Step { 916 case cstypes.RoundStepNewHeight: 917 // NewRound event fired from enterNewRound. 918 // XXX: should we fire timeout here (for timeout commit)? 919 cs.enterNewRound(ti.Height, 0) 920 921 case cstypes.RoundStepNewRound: 922 cs.enterPropose(ti.Height, ti.Round) 923 924 case cstypes.RoundStepPropose: 925 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 926 cs.Logger.Error("failed publishing timeout propose", "err", err) 927 } 928 929 cs.enterPrevote(ti.Height, ti.Round) 930 931 case cstypes.RoundStepPrevoteWait: 932 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 933 cs.Logger.Error("failed publishing timeout wait", "err", err) 934 } 935 936 cs.enterPrecommit(ti.Height, ti.Round) 937 938 case cstypes.RoundStepPrecommitWait: 939 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 940 cs.Logger.Error("failed publishing timeout wait", "err", err) 941 } 942 943 cs.enterPrecommit(ti.Height, ti.Round) 944 cs.enterNewRound(ti.Height, ti.Round+1) 945 946 default: 947 panic(fmt.Sprintf("invalid timeout step: %v", ti.Step)) 948 } 949 } 950 951 func (cs *State) handleTxsAvailable() { 952 cs.mtx.Lock() 953 defer cs.mtx.Unlock() 954 955 // We only need to do this for round 0. 956 if cs.Round != 0 { 957 return 958 } 959 960 switch cs.Step { 961 case cstypes.RoundStepNewHeight: // timeoutCommit phase 962 if cs.needProofBlock(cs.Height) { 963 // enterPropose will be called by enterNewRound 964 return 965 } 966 967 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 968 timeoutCommit := cs.StartTime.Sub(cmttime.Now()) + 1*time.Millisecond 969 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 970 971 case cstypes.RoundStepNewRound: // after timeoutCommit 972 cs.enterPropose(cs.Height, 0) 973 } 974 } 975 976 //----------------------------------------------------------------------------- 977 // State functions 978 // Used internally by handleTimeout and handleMsg to make state transitions 979 980 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 981 // 982 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 983 // 984 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 985 // Enter: +2/3 precommits for nil at (height,round-1) 986 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 987 // NOTE: cs.StartTime was already set for height. 988 func (cs *State) enterNewRound(height int64, round int32) { 989 logger := cs.Logger.With("height", height, "round", round) 990 991 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 992 logger.Debug( 993 "entering new round with invalid args", 994 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 995 ) 996 return 997 } 998 999 if now := cmttime.Now(); cs.StartTime.After(now) { 1000 logger.Debug("need to set a buffer and log message here for sanity", "start_time", cs.StartTime, "now", now) 1001 } 1002 1003 prevHeight, prevRound, prevStep := cs.Height, cs.Round, cs.Step 1004 1005 // increment validators if necessary 1006 validators := cs.Validators 1007 if cs.Round < round { 1008 validators = validators.Copy() 1009 validators.IncrementProposerPriority(cmtmath.SafeSubInt32(round, cs.Round)) 1010 } 1011 1012 // Setup new round 1013 // we don't fire newStep for this step, 1014 // but we fire an event, so update the round step first 1015 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 1016 cs.Validators = validators 1017 propAddress := validators.GetProposer().PubKey.Address() 1018 if round == 0 { 1019 // We've already reset these upon new height, 1020 // and meanwhile we might have received a proposal 1021 // for round 0. 1022 } else { 1023 logger.Info("resetting proposal info", "proposer", propAddress) 1024 cs.Proposal = nil 1025 cs.ProposalBlock = nil 1026 cs.ProposalBlockParts = nil 1027 } 1028 1029 logger.Debug("entering new round", 1030 "previous", log.NewLazySprintf("%v/%v/%v", prevHeight, prevRound, prevStep), 1031 "proposer", propAddress, 1032 ) 1033 1034 cs.Votes.SetRound(cmtmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1035 cs.TriggeredTimeoutPrecommit = false 1036 1037 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1038 cs.Logger.Error("failed publishing new round", "err", err) 1039 } 1040 // Wait for txs to be available in the mempool 1041 // before we enterPropose in round 0. If the last block changed the app hash, 1042 // we may need an empty "proof" block, and enterPropose immediately. 1043 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1044 if waitForTxs { 1045 if cs.config.CreateEmptyBlocksInterval > 0 { 1046 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1047 cstypes.RoundStepNewRound) 1048 } 1049 } else { 1050 cs.enterPropose(height, round) 1051 } 1052 } 1053 1054 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1055 // and where the last block (height-1) caused the app hash to change 1056 func (cs *State) needProofBlock(height int64) bool { 1057 if height == cs.state.InitialHeight { 1058 return true 1059 } 1060 1061 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1062 if lastBlockMeta == nil { 1063 // See https://github.com/cometbft/cometbft/issues/370 1064 cs.Logger.Info("short-circuited needProofBlock", "height", height, "InitialHeight", cs.state.InitialHeight) 1065 return true 1066 } 1067 1068 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1069 } 1070 1071 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 1072 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 1073 // 1074 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 1075 // 1076 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 1077 func (cs *State) enterPropose(height int64, round int32) { 1078 logger := cs.Logger.With("height", height, "round", round) 1079 1080 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 1081 logger.Debug( 1082 "entering propose step with invalid args", 1083 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1084 ) 1085 return 1086 } 1087 1088 logger.Debug("entering propose step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1089 1090 defer func() { 1091 // Done enterPropose: 1092 cs.updateRoundStep(round, cstypes.RoundStepPropose) 1093 cs.newStep() 1094 1095 // If we have the whole proposal + POL, then goto Prevote now. 1096 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 1097 // or else after timeoutPropose 1098 if cs.isProposalComplete() { 1099 cs.enterPrevote(height, cs.Round) 1100 } 1101 }() 1102 1103 // If we don't get the proposal and all block parts quick enough, enterPrevote 1104 cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) 1105 1106 // Nothing more to do if we're not a validator 1107 if cs.privValidator == nil { 1108 logger.Debug("node is not a validator") 1109 return 1110 } 1111 1112 logger.Debug("node is a validator") 1113 1114 if cs.privValidatorPubKey == nil { 1115 // If this node is a validator & proposer in the current round, it will 1116 // miss the opportunity to create a block. 1117 logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1118 return 1119 } 1120 1121 address := cs.privValidatorPubKey.Address() 1122 1123 // if not a validator, we're done 1124 if !cs.Validators.HasAddress(address) { 1125 logger.Debug("node is not a validator", "addr", address, "vals", cs.Validators) 1126 return 1127 } 1128 1129 if cs.isProposer(address) { 1130 logger.Debug("propose step; our turn to propose", "proposer", address) 1131 cs.decideProposal(height, round) 1132 } else { 1133 logger.Debug("propose step; not our turn to propose", "proposer", cs.Validators.GetProposer().Address) 1134 } 1135 } 1136 1137 func (cs *State) isProposer(address []byte) bool { 1138 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1139 } 1140 1141 func (cs *State) defaultDecideProposal(height int64, round int32) { 1142 var block *types.Block 1143 var blockParts *types.PartSet 1144 1145 // Decide on block 1146 if cs.ValidBlock != nil { 1147 // If there is valid block, choose that. 1148 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1149 } else { 1150 // Create a new proposal block from state/txs from the mempool. 1151 var err error 1152 block, err = cs.createProposalBlock() 1153 if err != nil { 1154 cs.Logger.Error("unable to create proposal block", "error", err) 1155 return 1156 } else if block == nil { 1157 panic("Method createProposalBlock should not provide a nil block without errors") 1158 } 1159 cs.metrics.ProposalCreateCount.Add(1) 1160 blockParts, err = block.MakePartSet(types.BlockPartSizeBytes) 1161 if err != nil { 1162 cs.Logger.Error("unable to create proposal block part set", "error", err) 1163 return 1164 } 1165 } 1166 1167 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1168 // and the privValidator will refuse to sign anything. 1169 if err := cs.wal.FlushAndSync(); err != nil { 1170 cs.Logger.Error("failed flushing WAL to disk") 1171 } 1172 1173 // Make proposal 1174 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1175 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1176 p := proposal.ToProto() 1177 if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { 1178 proposal.Signature = p.Signature 1179 1180 // send proposal and block parts on internal msg queue 1181 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 1182 1183 for i := 0; i < int(blockParts.Total()); i++ { 1184 part := blockParts.GetPart(i) 1185 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 1186 } 1187 1188 cs.Logger.Debug("signed proposal", "height", height, "round", round, "proposal", proposal) 1189 } else if !cs.replayMode { 1190 cs.Logger.Error("propose step; failed signing proposal", "height", height, "round", round, "err", err) 1191 } 1192 } 1193 1194 // Returns true if the proposal block is complete && 1195 // (if POLRound was proposed, we have +2/3 prevotes from there). 1196 func (cs *State) isProposalComplete() bool { 1197 if cs.Proposal == nil || cs.ProposalBlock == nil { 1198 return false 1199 } 1200 // we have the proposal. if there's a POLRound, 1201 // make sure we have the prevotes from it too 1202 if cs.Proposal.POLRound < 0 { 1203 return true 1204 } 1205 // if this is false the proposer is lying or we haven't received the POL yet 1206 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1207 } 1208 1209 // Create the next block to propose and return it. Returns nil block upon error. 1210 // 1211 // We really only need to return the parts, but the block is returned for 1212 // convenience so we can log the proposal block. 1213 // 1214 // NOTE: keep it side-effect free for clarity. 1215 // CONTRACT: cs.privValidator is not nil. 1216 func (cs *State) createProposalBlock() (*types.Block, error) { 1217 if cs.privValidator == nil { 1218 return nil, errors.New("entered createProposalBlock with privValidator being nil") 1219 } 1220 1221 var commit *types.Commit 1222 switch { 1223 case cs.Height == cs.state.InitialHeight: 1224 // We're creating a proposal for the first block. 1225 // The commit is empty, but not nil. 1226 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1227 1228 case cs.LastCommit.HasTwoThirdsMajority(): 1229 // Make the commit from LastCommit 1230 commit = cs.LastCommit.MakeCommit() 1231 1232 default: // This shouldn't happen. 1233 return nil, errors.New("propose step; cannot propose anything without commit for the previous block") 1234 } 1235 1236 if cs.privValidatorPubKey == nil { 1237 // If this node is a validator & proposer in the current round, it will 1238 // miss the opportunity to create a block. 1239 return nil, fmt.Errorf("propose step; empty priv validator public key, error: %w", errPubKeyIsNotSet) 1240 } 1241 1242 proposerAddr := cs.privValidatorPubKey.Address() 1243 1244 ret, err := cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1245 if err != nil { 1246 panic(err) 1247 } 1248 return ret, nil 1249 } 1250 1251 // Enter: `timeoutPropose` after entering Propose. 1252 // Enter: proposal block and POL is ready. 1253 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 1254 // Otherwise vote nil. 1255 func (cs *State) enterPrevote(height int64, round int32) { 1256 logger := cs.Logger.With("height", height, "round", round) 1257 1258 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 1259 logger.Debug( 1260 "entering prevote step with invalid args", 1261 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1262 ) 1263 return 1264 } 1265 1266 defer func() { 1267 // Done enterPrevote: 1268 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 1269 cs.newStep() 1270 }() 1271 1272 logger.Debug("entering prevote step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1273 1274 // Sign and broadcast vote as necessary 1275 cs.doPrevote(height, round) 1276 1277 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 1278 // (so we have more time to try and collect +2/3 prevotes for a single block) 1279 } 1280 1281 func (cs *State) defaultDoPrevote(height int64, round int32) { 1282 logger := cs.Logger.With("height", height, "round", round) 1283 1284 // If a block is locked, prevote that. 1285 if cs.LockedBlock != nil { 1286 logger.Debug("prevote step; already locked on a block; prevoting locked block") 1287 cs.signAddVote(cmtproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) 1288 return 1289 } 1290 1291 // If ProposalBlock is nil, prevote nil. 1292 if cs.ProposalBlock == nil { 1293 logger.Debug("prevote step: ProposalBlock is nil") 1294 cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{}) 1295 return 1296 } 1297 1298 // Validate proposal block, from consensus' perspective 1299 err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) 1300 if err != nil { 1301 // ProposalBlock is invalid, prevote nil. 1302 logger.Error("prevote step: consensus deems this block invalid; prevoting nil", 1303 "err", err) 1304 cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{}) 1305 return 1306 } 1307 1308 /* 1309 Before prevoting on the block received from the proposer for the current round and height, 1310 we request the Application, via `ProcessProposal` ABCI call, to confirm that the block is 1311 valid. If the Application does not accept the block, consensus prevotes `nil`. 1312 1313 WARNING: misuse of block rejection by the Application can seriously compromise 1314 the liveness properties of consensus. 1315 Please see `PrepareProosal`-`ProcessProposal` coherence and determinism properties 1316 in the ABCI++ specification. 1317 */ 1318 isAppValid, err := cs.blockExec.ProcessProposal(cs.ProposalBlock, cs.state) 1319 if err != nil { 1320 panic(fmt.Sprintf( 1321 "state machine returned an error (%v) when calling ProcessProposal", err, 1322 )) 1323 } 1324 cs.metrics.MarkProposalProcessed(isAppValid) 1325 1326 // Vote nil if the Application rejected the block 1327 if !isAppValid { 1328 logger.Error("prevote step: state machine rejected a proposed block; this should not happen:"+ 1329 "the proposer may be misbehaving; prevoting nil", "err", err) 1330 cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{}) 1331 return 1332 } 1333 1334 // Prevote cs.ProposalBlock 1335 // NOTE: the proposal signature is validated when it is received, 1336 // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) 1337 logger.Debug("prevote step: ProposalBlock is valid") 1338 cs.signAddVote(cmtproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) 1339 } 1340 1341 // Enter: any +2/3 prevotes at next round. 1342 func (cs *State) enterPrevoteWait(height int64, round int32) { 1343 logger := cs.Logger.With("height", height, "round", round) 1344 1345 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1346 logger.Debug( 1347 "entering prevote wait step with invalid args", 1348 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1349 ) 1350 return 1351 } 1352 1353 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1354 panic(fmt.Sprintf( 1355 "entering prevote wait step (%v/%v), but prevotes does not have any +2/3 votes", 1356 height, round, 1357 )) 1358 } 1359 1360 logger.Debug("entering prevote wait step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1361 1362 defer func() { 1363 // Done enterPrevoteWait: 1364 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1365 cs.newStep() 1366 }() 1367 1368 // Wait for some more prevotes; enterPrecommit 1369 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1370 } 1371 1372 // Enter: `timeoutPrevote` after any +2/3 prevotes. 1373 // Enter: `timeoutPrecommit` after any +2/3 precommits. 1374 // Enter: +2/3 precomits for block or nil. 1375 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 1376 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 1377 // else, precommit nil otherwise. 1378 func (cs *State) enterPrecommit(height int64, round int32) { 1379 logger := cs.Logger.With("height", height, "round", round) 1380 1381 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 1382 logger.Debug( 1383 "entering precommit step with invalid args", 1384 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1385 ) 1386 return 1387 } 1388 1389 logger.Debug("entering precommit step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1390 1391 defer func() { 1392 // Done enterPrecommit: 1393 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 1394 cs.newStep() 1395 }() 1396 1397 // check for a polka 1398 blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() 1399 1400 // If we don't have a polka, we must precommit nil. 1401 if !ok { 1402 if cs.LockedBlock != nil { 1403 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit while we are locked; precommitting nil") 1404 } else { 1405 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit; precommitting nil") 1406 } 1407 1408 cs.signAddVote(cmtproto.PrecommitType, nil, types.PartSetHeader{}) 1409 return 1410 } 1411 1412 // At this point +2/3 prevoted for a particular block or nil. 1413 if err := cs.eventBus.PublishEventPolka(cs.RoundStateEvent()); err != nil { 1414 logger.Error("failed publishing polka", "err", err) 1415 } 1416 1417 // the latest POLRound should be this round. 1418 polRound, _ := cs.Votes.POLInfo() 1419 if polRound < round { 1420 panic(fmt.Sprintf("this POLRound should be %v but got %v", round, polRound)) 1421 } 1422 1423 // +2/3 prevoted nil. Unlock and precommit nil. 1424 if len(blockID.Hash) == 0 { 1425 if cs.LockedBlock == nil { 1426 logger.Debug("precommit step; +2/3 prevoted for nil") 1427 } else { 1428 logger.Debug("precommit step; +2/3 prevoted for nil; unlocking") 1429 cs.LockedRound = -1 1430 cs.LockedBlock = nil 1431 cs.LockedBlockParts = nil 1432 1433 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1434 logger.Error("failed publishing event unlock", "err", err) 1435 } 1436 } 1437 1438 cs.signAddVote(cmtproto.PrecommitType, nil, types.PartSetHeader{}) 1439 return 1440 } 1441 1442 // At this point, +2/3 prevoted for a particular block. 1443 1444 // If we're already locked on that block, precommit it, and update the LockedRound 1445 if cs.LockedBlock.HashesTo(blockID.Hash) { 1446 logger.Debug("precommit step; +2/3 prevoted locked block; relocking") 1447 cs.LockedRound = round 1448 1449 if err := cs.eventBus.PublishEventRelock(cs.RoundStateEvent()); err != nil { 1450 logger.Error("failed publishing event relock", "err", err) 1451 } 1452 1453 cs.signAddVote(cmtproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1454 return 1455 } 1456 1457 // If +2/3 prevoted for proposal block, stage and precommit it 1458 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1459 logger.Debug("precommit step; +2/3 prevoted proposal block; locking", "hash", blockID.Hash) 1460 1461 // Validate the block. 1462 if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { 1463 panic(fmt.Sprintf("precommit step; +2/3 prevoted for an invalid block: %v", err)) 1464 } 1465 1466 cs.LockedRound = round 1467 cs.LockedBlock = cs.ProposalBlock 1468 cs.LockedBlockParts = cs.ProposalBlockParts 1469 1470 if err := cs.eventBus.PublishEventLock(cs.RoundStateEvent()); err != nil { 1471 logger.Error("failed publishing event lock", "err", err) 1472 } 1473 1474 cs.signAddVote(cmtproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1475 return 1476 } 1477 1478 // There was a polka in this round for a block we don't have. 1479 // Fetch that block, unlock, and precommit nil. 1480 // The +2/3 prevotes for this round is the POL for our unlock. 1481 logger.Debug("precommit step; +2/3 prevotes for a block we do not have; voting nil", "block_id", blockID) 1482 1483 cs.LockedRound = -1 1484 cs.LockedBlock = nil 1485 cs.LockedBlockParts = nil 1486 1487 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1488 cs.ProposalBlock = nil 1489 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1490 } 1491 1492 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1493 logger.Error("failed publishing event unlock", "err", err) 1494 } 1495 1496 cs.signAddVote(cmtproto.PrecommitType, nil, types.PartSetHeader{}) 1497 } 1498 1499 // Enter: any +2/3 precommits for next round. 1500 func (cs *State) enterPrecommitWait(height int64, round int32) { 1501 logger := cs.Logger.With("height", height, "round", round) 1502 1503 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1504 logger.Debug( 1505 "entering precommit wait step with invalid args", 1506 "triggered_timeout", cs.TriggeredTimeoutPrecommit, 1507 "current", log.NewLazySprintf("%v/%v", cs.Height, cs.Round), 1508 ) 1509 return 1510 } 1511 1512 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1513 panic(fmt.Sprintf( 1514 "entering precommit wait step (%v/%v), but precommits does not have any +2/3 votes", 1515 height, round, 1516 )) 1517 } 1518 1519 logger.Debug("entering precommit wait step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1520 1521 defer func() { 1522 // Done enterPrecommitWait: 1523 cs.TriggeredTimeoutPrecommit = true 1524 cs.newStep() 1525 }() 1526 1527 // wait for some more precommits; enterNewRound 1528 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1529 } 1530 1531 // Enter: +2/3 precommits for block 1532 func (cs *State) enterCommit(height int64, commitRound int32) { 1533 logger := cs.Logger.With("height", height, "commit_round", commitRound) 1534 1535 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1536 logger.Debug( 1537 "entering commit step with invalid args", 1538 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1539 ) 1540 return 1541 } 1542 1543 logger.Debug("entering commit step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1544 1545 defer func() { 1546 // Done enterCommit: 1547 // keep cs.Round the same, commitRound points to the right Precommits set. 1548 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1549 cs.CommitRound = commitRound 1550 cs.CommitTime = cmttime.Now() 1551 cs.newStep() 1552 1553 // Maybe finalize immediately. 1554 cs.tryFinalizeCommit(height) 1555 }() 1556 1557 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1558 if !ok { 1559 panic("RunActionCommit() expects +2/3 precommits") 1560 } 1561 1562 // The Locked* fields no longer matter. 1563 // Move them over to ProposalBlock if they match the commit hash, 1564 // otherwise they'll be cleared in updateToState. 1565 if cs.LockedBlock.HashesTo(blockID.Hash) { 1566 logger.Debug("commit is for a locked block; set ProposalBlock=LockedBlock", "block_hash", blockID.Hash) 1567 cs.ProposalBlock = cs.LockedBlock 1568 cs.ProposalBlockParts = cs.LockedBlockParts 1569 } 1570 1571 // If we don't have the block being committed, set up to get it. 1572 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1573 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1574 logger.Info( 1575 "commit is for a block we do not know about; set ProposalBlock=nil", 1576 "proposal", log.NewLazyBlockHash(cs.ProposalBlock), 1577 "commit", blockID.Hash, 1578 ) 1579 1580 // We're getting the wrong block. 1581 // Set up ProposalBlockParts and keep waiting. 1582 cs.ProposalBlock = nil 1583 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1584 1585 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1586 logger.Error("failed publishing valid block", "err", err) 1587 } 1588 1589 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 1590 } 1591 } 1592 } 1593 1594 // If we have the block AND +2/3 commits for it, finalize. 1595 func (cs *State) tryFinalizeCommit(height int64) { 1596 logger := cs.Logger.With("height", height) 1597 1598 if cs.Height != height { 1599 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1600 } 1601 1602 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1603 if !ok || len(blockID.Hash) == 0 { 1604 logger.Error("failed attempt to finalize commit; there was no +2/3 majority or +2/3 was for nil") 1605 return 1606 } 1607 1608 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1609 // TODO: this happens every time if we're not a validator (ugly logs) 1610 // TODO: ^^ wait, why does it matter that we're a validator? 1611 logger.Debug( 1612 "failed attempt to finalize commit; we do not have the commit block", 1613 "proposal_block", log.NewLazyBlockHash(cs.ProposalBlock), 1614 "commit_block", blockID.Hash, 1615 ) 1616 return 1617 } 1618 1619 cs.finalizeCommit(height) 1620 } 1621 1622 // Increment height and goto cstypes.RoundStepNewHeight 1623 func (cs *State) finalizeCommit(height int64) { 1624 logger := cs.Logger.With("height", height) 1625 1626 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1627 logger.Debug( 1628 "entering finalize commit step", 1629 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1630 ) 1631 return 1632 } 1633 1634 cs.calculatePrevoteMessageDelayMetrics() 1635 1636 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1637 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1638 1639 if !ok { 1640 panic("cannot finalize commit; commit does not have 2/3 majority") 1641 } 1642 if !blockParts.HasHeader(blockID.PartSetHeader) { 1643 panic("expected ProposalBlockParts header to be commit header") 1644 } 1645 if !block.HashesTo(blockID.Hash) { 1646 panic("cannot finalize commit; proposal block does not hash to commit hash") 1647 } 1648 1649 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1650 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1651 } 1652 1653 logger.Info( 1654 "finalizing commit of block", 1655 "hash", log.NewLazyBlockHash(block), 1656 "root", block.AppHash, 1657 "num_txs", len(block.Txs), 1658 ) 1659 logger.Debug("committed block", "block", log.NewLazySprintf("%v", block)) 1660 1661 fail.Fail() // XXX 1662 1663 // Save to blockStore. 1664 if cs.blockStore.Height() < block.Height { 1665 // NOTE: the seenCommit is local justification to commit this block, 1666 // but may differ from the LastCommit included in the next block 1667 precommits := cs.Votes.Precommits(cs.CommitRound) 1668 seenCommit := precommits.MakeCommit() 1669 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1670 } else { 1671 // Happens during replay if we already saved the block but didn't commit 1672 logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1673 } 1674 1675 fail.Fail() // XXX 1676 1677 // Write EndHeightMessage{} for this height, implying that the blockstore 1678 // has saved the block. 1679 // 1680 // If we crash before writing this EndHeightMessage{}, we will recover by 1681 // running ApplyBlock during the ABCI handshake when we restart. If we 1682 // didn't save the block to the blockstore before writing 1683 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1684 // complains about replaying for heights where an #ENDHEIGHT entry already 1685 // exists. 1686 // 1687 // Either way, the State should not be resumed until we 1688 // successfully call ApplyBlock (ie. later here, or in Handshake after 1689 // restart). 1690 endMsg := EndHeightMessage{height} 1691 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1692 panic(fmt.Sprintf( 1693 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 1694 endMsg, err, 1695 )) 1696 } 1697 1698 fail.Fail() // XXX 1699 1700 // Create a copy of the state for staging and an event cache for txs. 1701 stateCopy := cs.state.Copy() 1702 1703 // Execute and commit the block, update and save the state, and update the mempool. 1704 // NOTE The block.AppHash wont reflect these txs until the next block. 1705 var ( 1706 err error 1707 retainHeight int64 1708 ) 1709 1710 stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( 1711 stateCopy, 1712 types.BlockID{ 1713 Hash: block.Hash(), 1714 PartSetHeader: blockParts.Header(), 1715 }, 1716 block, 1717 ) 1718 if err != nil { 1719 panic(fmt.Sprintf("failed to apply block; error %v", err)) 1720 } 1721 1722 fail.Fail() // XXX 1723 1724 // Prune old heights, if requested by ABCI app. 1725 if retainHeight > 0 { 1726 pruned, err := cs.pruneBlocks(retainHeight) 1727 if err != nil { 1728 logger.Error("failed to prune blocks", "retain_height", retainHeight, "err", err) 1729 } else { 1730 logger.Debug("pruned blocks", "pruned", pruned, "retain_height", retainHeight) 1731 } 1732 } 1733 1734 // must be called before we update state 1735 cs.recordMetrics(height, block) 1736 1737 // NewHeightStep! 1738 cs.updateToState(stateCopy) 1739 1740 fail.Fail() // XXX 1741 1742 // Private validator might have changed it's key pair => refetch pubkey. 1743 if err := cs.updatePrivValidatorPubKey(); err != nil { 1744 logger.Error("failed to get private validator pubkey", "err", err) 1745 } 1746 1747 // cs.StartTime is already set. 1748 // Schedule Round0 to start soon. 1749 cs.scheduleRound0(&cs.RoundState) 1750 1751 // By here, 1752 // * cs.Height has been increment to height+1 1753 // * cs.Step is now cstypes.RoundStepNewHeight 1754 // * cs.StartTime is set to when we will start round0. 1755 } 1756 1757 func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { 1758 base := cs.blockStore.Base() 1759 if retainHeight <= base { 1760 return 0, nil 1761 } 1762 pruned, err := cs.blockStore.PruneBlocks(retainHeight) 1763 if err != nil { 1764 return 0, fmt.Errorf("failed to prune block store: %w", err) 1765 } 1766 err = cs.blockExec.Store().PruneStates(base, retainHeight) 1767 if err != nil { 1768 return 0, fmt.Errorf("failed to prune state database: %w", err) 1769 } 1770 return pruned, nil 1771 } 1772 1773 func (cs *State) recordMetrics(height int64, block *types.Block) { 1774 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1775 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1776 1777 var ( 1778 missingValidators int 1779 missingValidatorsPower int64 1780 ) 1781 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1782 // Remember that the first LastCommit is intentionally empty, so it's not 1783 // fair to increment missing validators number. 1784 if height > cs.state.InitialHeight { 1785 // Sanity check that commit size matches validator set size - only applies 1786 // after first block. 1787 var ( 1788 commitSize = block.LastCommit.Size() 1789 valSetLen = len(cs.LastValidators.Validators) 1790 address types.Address 1791 ) 1792 if commitSize != valSetLen { 1793 panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1794 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1795 } 1796 1797 if cs.privValidator != nil { 1798 if cs.privValidatorPubKey == nil { 1799 // Metrics won't be updated, but it's not critical. 1800 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1801 } else { 1802 address = cs.privValidatorPubKey.Address() 1803 } 1804 } 1805 1806 for i, val := range cs.LastValidators.Validators { 1807 commitSig := block.LastCommit.Signatures[i] 1808 if commitSig.Absent() { 1809 missingValidators++ 1810 missingValidatorsPower += val.VotingPower 1811 } 1812 1813 if bytes.Equal(val.Address, address) { 1814 label := []string{ 1815 "validator_address", val.Address.String(), 1816 } 1817 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1818 if commitSig.ForBlock() { 1819 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1820 } else { 1821 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1822 } 1823 } 1824 1825 } 1826 } 1827 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1828 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1829 1830 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1831 var ( 1832 byzantineValidatorsPower = int64(0) 1833 byzantineValidatorsCount = int64(0) 1834 ) 1835 for _, ev := range block.Evidence.Evidence { 1836 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1837 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1838 byzantineValidatorsCount++ 1839 byzantineValidatorsPower += val.VotingPower 1840 } 1841 } 1842 } 1843 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1844 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1845 1846 if height > 1 { 1847 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1848 if lastBlockMeta != nil { 1849 cs.metrics.BlockIntervalSeconds.Observe( 1850 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1851 ) 1852 } 1853 } 1854 1855 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1856 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1857 cs.metrics.BlockSizeBytes.Set(float64(block.Size())) 1858 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1859 } 1860 1861 //----------------------------------------------------------------------------- 1862 1863 func (cs *State) defaultSetProposal(proposal *types.Proposal) error { 1864 // Already have one 1865 // TODO: possibly catch double proposals 1866 if cs.Proposal != nil { 1867 return nil 1868 } 1869 1870 // Does not apply 1871 if proposal.Height != cs.Height || proposal.Round != cs.Round { 1872 return nil 1873 } 1874 1875 // Verify POLRound, which must be -1 or in range [0, proposal.Round). 1876 if proposal.POLRound < -1 || 1877 (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { 1878 return ErrInvalidProposalPOLRound 1879 } 1880 1881 p := proposal.ToProto() 1882 // Verify signature 1883 pubKey := cs.Validators.GetProposer().PubKey 1884 if !pubKey.VerifySignature( 1885 types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature, 1886 ) { 1887 return ErrInvalidProposalSignature 1888 } 1889 1890 proposal.Signature = p.Signature 1891 cs.Proposal = proposal 1892 // We don't update cs.ProposalBlockParts if it is already set. 1893 // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. 1894 // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! 1895 if cs.ProposalBlockParts == nil { 1896 cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) 1897 } 1898 1899 cs.Logger.Info("received proposal", "proposal", proposal, "proposer", pubKey.Address()) 1900 return nil 1901 } 1902 1903 // NOTE: block is not necessarily valid. 1904 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1905 // once we have the full block. 1906 func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (added bool, err error) { 1907 height, round, part := msg.Height, msg.Round, msg.Part 1908 1909 // Blocks might be reused, so round mismatch is OK 1910 if cs.Height != height { 1911 cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) 1912 cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) 1913 return false, nil 1914 } 1915 1916 // We're not expecting a block part. 1917 if cs.ProposalBlockParts == nil { 1918 cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) 1919 // NOTE: this can happen when we've gone to a higher round and 1920 // then receive parts from the previous round - not necessarily a bad peer. 1921 cs.Logger.Debug( 1922 "received a block part when we are not expecting any", 1923 "height", height, 1924 "round", round, 1925 "index", part.Index, 1926 "peer", peerID, 1927 ) 1928 return false, nil 1929 } 1930 1931 added, err = cs.ProposalBlockParts.AddPart(part) 1932 if err != nil { 1933 if errors.Is(err, types.ErrPartSetInvalidProof) || errors.Is(err, types.ErrPartSetUnexpectedIndex) { 1934 cs.metrics.BlockGossipPartsReceived.With("matches_current", "false").Add(1) 1935 } 1936 return added, err 1937 } 1938 1939 cs.metrics.BlockGossipPartsReceived.With("matches_current", "true").Add(1) 1940 1941 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1942 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1943 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1944 ) 1945 } 1946 if added && cs.ProposalBlockParts.IsComplete() { 1947 bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader()) 1948 if err != nil { 1949 return added, err 1950 } 1951 1952 pbb := new(cmtproto.Block) 1953 err = proto.Unmarshal(bz, pbb) 1954 if err != nil { 1955 return added, err 1956 } 1957 1958 block, err := types.BlockFromProto(pbb) 1959 if err != nil { 1960 return added, err 1961 } 1962 1963 cs.ProposalBlock = block 1964 1965 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1966 cs.Logger.Info("received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1967 1968 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1969 cs.Logger.Error("failed publishing event complete proposal", "err", err) 1970 } 1971 } 1972 return added, nil 1973 } 1974 1975 func (cs *State) handleCompleteProposal(blockHeight int64) { 1976 // Update Valid* if we can. 1977 prevotes := cs.Votes.Prevotes(cs.Round) 1978 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1979 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1980 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1981 cs.Logger.Debug( 1982 "updating valid block to new proposal block", 1983 "valid_round", cs.Round, 1984 "valid_block_hash", log.NewLazyBlockHash(cs.ProposalBlock), 1985 ) 1986 1987 cs.ValidRound = cs.Round 1988 cs.ValidBlock = cs.ProposalBlock 1989 cs.ValidBlockParts = cs.ProposalBlockParts 1990 } 1991 // TODO: In case there is +2/3 majority in Prevotes set for some 1992 // block and cs.ProposalBlock contains different block, either 1993 // proposer is faulty or voting power of faulty processes is more 1994 // than 1/3. We should trigger in the future accountability 1995 // procedure at this point. 1996 } 1997 1998 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1999 // Move onto the next step 2000 cs.enterPrevote(blockHeight, cs.Round) 2001 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 2002 cs.enterPrecommit(blockHeight, cs.Round) 2003 } 2004 } else if cs.Step == cstypes.RoundStepCommit { 2005 // If we're waiting on the proposal block... 2006 cs.tryFinalizeCommit(blockHeight) 2007 } 2008 } 2009 2010 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 2011 func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { 2012 added, err := cs.addVote(vote, peerID) 2013 if err != nil { 2014 // If the vote height is off, we'll just ignore it, 2015 // But if it's a conflicting sig, add it to the cs.evpool. 2016 // If it's otherwise invalid, punish peer. 2017 //nolint: gocritic 2018 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 2019 if cs.privValidatorPubKey == nil { 2020 return false, errPubKeyIsNotSet 2021 } 2022 2023 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 2024 cs.Logger.Error( 2025 "found conflicting vote from ourselves; did you unsafe_reset a validator?", 2026 "height", vote.Height, 2027 "round", vote.Round, 2028 "type", vote.Type, 2029 ) 2030 2031 return added, err 2032 } 2033 2034 // report conflicting votes to the evidence pool 2035 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 2036 cs.Logger.Debug( 2037 "found and sent conflicting votes to the evidence pool", 2038 "vote_a", voteErr.VoteA, 2039 "vote_b", voteErr.VoteB, 2040 ) 2041 2042 return added, err 2043 } else if errors.Is(err, types.ErrVoteNonDeterministicSignature) { 2044 cs.Logger.Debug("vote has non-deterministic signature", "err", err) 2045 } else { 2046 // Either 2047 // 1) bad peer OR 2048 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 2049 // 3) tmkms use with multiple validators connecting to a single tmkms instance 2050 // (https://github.com/tendermint/tendermint/issues/3839). 2051 cs.Logger.Info("failed attempting to add vote", "err", err) 2052 return added, ErrAddingVote 2053 } 2054 } 2055 2056 return added, nil 2057 } 2058 2059 func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 2060 cs.Logger.Debug( 2061 "adding vote", 2062 "vote_height", vote.Height, 2063 "vote_type", vote.Type, 2064 "val_index", vote.ValidatorIndex, 2065 "cs_height", cs.Height, 2066 ) 2067 2068 if vote.Height < cs.Height || (vote.Height == cs.Height && vote.Round < cs.Round) { 2069 cs.metrics.MarkLateVote(vote.Type) 2070 } 2071 2072 // A precommit for the previous height? 2073 // These come in while we wait timeoutCommit 2074 if vote.Height+1 == cs.Height && vote.Type == cmtproto.PrecommitType { 2075 if cs.Step != cstypes.RoundStepNewHeight { 2076 // Late precommit at prior height is ignored 2077 cs.Logger.Debug("precommit vote came in after commit timeout and has been ignored", "vote", vote) 2078 return 2079 } 2080 2081 added, err = cs.LastCommit.AddVote(vote) 2082 if !added { 2083 return 2084 } 2085 2086 cs.Logger.Debug("added vote to last precommits", "last_commit", cs.LastCommit.StringShort()) 2087 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2088 return added, err 2089 } 2090 2091 cs.evsw.FireEvent(types.EventVote, vote) 2092 2093 // if we can skip timeoutCommit and have all the votes now, 2094 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 2095 // go straight to new round (skip timeout commit) 2096 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 2097 cs.enterNewRound(cs.Height, 0) 2098 } 2099 2100 return 2101 } 2102 2103 // Height mismatch is ignored. 2104 // Not necessarily a bad peer, but not favorable behavior. 2105 if vote.Height != cs.Height { 2106 cs.Logger.Debug("vote ignored and not added", "vote_height", vote.Height, "cs_height", cs.Height, "peer", peerID) 2107 return 2108 } 2109 2110 height := cs.Height 2111 added, err = cs.Votes.AddVote(vote, peerID) 2112 if !added { 2113 // Either duplicate, or error upon cs.Votes.AddByIndex() 2114 return 2115 } 2116 if vote.Round == cs.Round { 2117 vals := cs.state.Validators 2118 _, val := vals.GetByIndex(vote.ValidatorIndex) 2119 cs.metrics.MarkVoteReceived(vote.Type, val.VotingPower, vals.TotalVotingPower()) 2120 } 2121 2122 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2123 return added, err 2124 } 2125 cs.evsw.FireEvent(types.EventVote, vote) 2126 2127 switch vote.Type { 2128 case cmtproto.PrevoteType: 2129 prevotes := cs.Votes.Prevotes(vote.Round) 2130 cs.Logger.Debug("added vote to prevote", "vote", vote, "prevotes", prevotes.StringShort()) 2131 2132 // If +2/3 prevotes for a block or nil for *any* round: 2133 if blockID, ok := prevotes.TwoThirdsMajority(); ok { 2134 // There was a polka! 2135 // If we're locked but this is a recent polka, unlock. 2136 // If it matches our ProposalBlock, update the ValidBlock 2137 2138 // Unlock if `cs.LockedRound < vote.Round <= cs.Round` 2139 // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round 2140 if (cs.LockedBlock != nil) && 2141 (cs.LockedRound < vote.Round) && 2142 (vote.Round <= cs.Round) && 2143 !cs.LockedBlock.HashesTo(blockID.Hash) { 2144 2145 cs.Logger.Debug("unlocking because of POL", "locked_round", cs.LockedRound, "pol_round", vote.Round) 2146 2147 cs.LockedRound = -1 2148 cs.LockedBlock = nil 2149 cs.LockedBlockParts = nil 2150 2151 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 2152 return added, err 2153 } 2154 } 2155 2156 // Update Valid* if we can. 2157 // NOTE: our proposal block may be nil or not what received a polka.. 2158 if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { 2159 if cs.ProposalBlock.HashesTo(blockID.Hash) { 2160 cs.Logger.Debug("updating valid block because of POL", "valid_round", cs.ValidRound, "pol_round", vote.Round) 2161 cs.ValidRound = vote.Round 2162 cs.ValidBlock = cs.ProposalBlock 2163 cs.ValidBlockParts = cs.ProposalBlockParts 2164 } else { 2165 cs.Logger.Debug( 2166 "valid block we do not know about; set ProposalBlock=nil", 2167 "proposal", log.NewLazyBlockHash(cs.ProposalBlock), 2168 "block_id", blockID.Hash, 2169 ) 2170 2171 // we're getting the wrong block 2172 cs.ProposalBlock = nil 2173 } 2174 2175 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 2176 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 2177 } 2178 2179 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 2180 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 2181 return added, err 2182 } 2183 } 2184 } 2185 2186 // If +2/3 prevotes for *anything* for future round: 2187 switch { 2188 case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): 2189 // Round-skip if there is any 2/3+ of votes ahead of us 2190 cs.enterNewRound(height, vote.Round) 2191 2192 case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round 2193 blockID, ok := prevotes.TwoThirdsMajority() 2194 if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { 2195 cs.enterPrecommit(height, vote.Round) 2196 } else if prevotes.HasTwoThirdsAny() { 2197 cs.enterPrevoteWait(height, vote.Round) 2198 } 2199 2200 case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: 2201 // If the proposal is now complete, enter prevote of cs.Round. 2202 if cs.isProposalComplete() { 2203 cs.enterPrevote(height, cs.Round) 2204 } 2205 } 2206 2207 case cmtproto.PrecommitType: 2208 precommits := cs.Votes.Precommits(vote.Round) 2209 cs.Logger.Debug("added vote to precommit", 2210 "height", vote.Height, 2211 "round", vote.Round, 2212 "validator", vote.ValidatorAddress.String(), 2213 "vote_timestamp", vote.Timestamp, 2214 "data", precommits.LogString()) 2215 2216 blockID, ok := precommits.TwoThirdsMajority() 2217 if ok { 2218 // Executed as TwoThirdsMajority could be from a higher round 2219 cs.enterNewRound(height, vote.Round) 2220 cs.enterPrecommit(height, vote.Round) 2221 2222 if len(blockID.Hash) != 0 { 2223 cs.enterCommit(height, vote.Round) 2224 if cs.config.SkipTimeoutCommit && precommits.HasAll() { 2225 cs.enterNewRound(cs.Height, 0) 2226 } 2227 } else { 2228 cs.enterPrecommitWait(height, vote.Round) 2229 } 2230 } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { 2231 cs.enterNewRound(height, vote.Round) 2232 cs.enterPrecommitWait(height, vote.Round) 2233 } 2234 2235 default: 2236 panic(fmt.Sprintf("unexpected vote type %v", vote.Type)) 2237 } 2238 2239 return added, err 2240 } 2241 2242 // CONTRACT: cs.privValidator is not nil. 2243 func (cs *State) signVote( 2244 msgType cmtproto.SignedMsgType, 2245 hash []byte, 2246 header types.PartSetHeader, 2247 ) (*types.Vote, error) { 2248 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 2249 // and the privValidator will refuse to sign anything. 2250 if err := cs.wal.FlushAndSync(); err != nil { 2251 return nil, err 2252 } 2253 2254 if cs.privValidatorPubKey == nil { 2255 return nil, errPubKeyIsNotSet 2256 } 2257 2258 addr := cs.privValidatorPubKey.Address() 2259 valIdx, _ := cs.Validators.GetByAddress(addr) 2260 2261 vote := &types.Vote{ 2262 ValidatorAddress: addr, 2263 ValidatorIndex: valIdx, 2264 Height: cs.Height, 2265 Round: cs.Round, 2266 Timestamp: cs.voteTime(), 2267 Type: msgType, 2268 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 2269 } 2270 2271 v := vote.ToProto() 2272 err := cs.privValidator.SignVote(cs.state.ChainID, v) 2273 vote.Signature = v.Signature 2274 vote.Timestamp = v.Timestamp 2275 2276 return vote, err 2277 } 2278 2279 func (cs *State) voteTime() time.Time { 2280 now := cmttime.Now() 2281 minVoteTime := now 2282 // Minimum time increment between blocks 2283 const timeIota = time.Millisecond 2284 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 2285 // even if cs.LockedBlock != nil. See https://github.com/cometbft/cometbft/tree/v0.37.x/spec/. 2286 if cs.LockedBlock != nil { 2287 // See the BFT time spec 2288 // https://github.com/cometbft/cometbft/blob/v0.37.x/spec/consensus/bft-time.md 2289 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 2290 } else if cs.ProposalBlock != nil { 2291 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 2292 } 2293 2294 if now.After(minVoteTime) { 2295 return now 2296 } 2297 return minVoteTime 2298 } 2299 2300 // sign the vote and publish on internalMsgQueue 2301 func (cs *State) signAddVote(msgType cmtproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 2302 if cs.privValidator == nil { // the node does not have a key 2303 return nil 2304 } 2305 2306 if cs.privValidatorPubKey == nil { 2307 // Vote won't be signed, but it's not critical. 2308 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 2309 return nil 2310 } 2311 2312 // If the node not in the validator set, do nothing. 2313 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 2314 return nil 2315 } 2316 2317 // TODO: pass pubKey to signVote 2318 vote, err := cs.signVote(msgType, hash, header) 2319 if err == nil { 2320 cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""}) 2321 cs.Logger.Debug("signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 2322 return vote 2323 } 2324 2325 cs.Logger.Error("failed signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 2326 return nil 2327 } 2328 2329 // updatePrivValidatorPubKey get's the private validator public key and 2330 // memoizes it. This func returns an error if the private validator is not 2331 // responding or responds with an error. 2332 func (cs *State) updatePrivValidatorPubKey() error { 2333 if cs.privValidator == nil { 2334 return nil 2335 } 2336 2337 pubKey, err := cs.privValidator.GetPubKey() 2338 if err != nil { 2339 return err 2340 } 2341 cs.privValidatorPubKey = pubKey 2342 return nil 2343 } 2344 2345 // look back to check existence of the node's consensus votes before joining consensus 2346 func (cs *State) checkDoubleSigningRisk(height int64) error { 2347 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 2348 valAddr := cs.privValidatorPubKey.Address() 2349 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 2350 if doubleSignCheckHeight > height { 2351 doubleSignCheckHeight = height 2352 } 2353 2354 for i := int64(1); i < doubleSignCheckHeight; i++ { 2355 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 2356 if lastCommit != nil { 2357 for sigIdx, s := range lastCommit.Signatures { 2358 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 2359 cs.Logger.Info("found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 2360 return ErrSignatureFoundInPastBlocks 2361 } 2362 } 2363 } 2364 } 2365 } 2366 2367 return nil 2368 } 2369 2370 func (cs *State) calculatePrevoteMessageDelayMetrics() { 2371 if cs.Proposal == nil { 2372 return 2373 } 2374 2375 ps := cs.Votes.Prevotes(cs.Round) 2376 pl := ps.List() 2377 2378 sort.Slice(pl, func(i, j int) bool { 2379 return pl[i].Timestamp.Before(pl[j].Timestamp) 2380 }) 2381 2382 var votingPowerSeen int64 2383 for _, v := range pl { 2384 _, val := cs.Validators.GetByAddress(v.ValidatorAddress) 2385 votingPowerSeen += val.VotingPower 2386 if votingPowerSeen >= cs.Validators.TotalVotingPower()*2/3+1 { 2387 cs.metrics.QuorumPrevoteDelay.With("proposer_address", cs.Validators.GetProposer().Address.String()).Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) 2388 break 2389 } 2390 } 2391 if ps.HasAll() { 2392 cs.metrics.FullPrevoteDelay.With("proposer_address", cs.Validators.GetProposer().Address.String()).Set(pl[len(pl)-1].Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) 2393 } 2394 } 2395 2396 //--------------------------------------------------------- 2397 2398 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 2399 if h1 < h2 { 2400 return -1 2401 } else if h1 > h2 { 2402 return 1 2403 } 2404 if r1 < r2 { 2405 return -1 2406 } else if r1 > r2 { 2407 return 1 2408 } 2409 if s1 < s2 { 2410 return -1 2411 } else if s1 > s2 { 2412 return 1 2413 } 2414 return 0 2415 } 2416 2417 // repairWalFile decodes messages from src (until the decoder errors) and 2418 // writes them to dst. 2419 func repairWalFile(src, dst string) error { 2420 in, err := os.Open(src) 2421 if err != nil { 2422 return err 2423 } 2424 defer in.Close() 2425 2426 out, err := os.Create(dst) 2427 if err != nil { 2428 return err 2429 } 2430 defer out.Close() 2431 2432 var ( 2433 dec = NewWALDecoder(in) 2434 enc = NewWALEncoder(out) 2435 ) 2436 2437 // best-case repair (until first error is encountered) 2438 for { 2439 msg, err := dec.Decode() 2440 if err != nil { 2441 break 2442 } 2443 2444 err = enc.Encode(msg) 2445 if err != nil { 2446 return fmt.Errorf("failed to encode msg: %w", err) 2447 } 2448 } 2449 2450 return nil 2451 }