github.com/vipernet-xyz/tm@v0.34.24/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "runtime/debug" 10 "sort" 11 "time" 12 13 "github.com/gogo/protobuf/proto" 14 15 cfg "github.com/vipernet-xyz/tm/config" 16 cstypes "github.com/vipernet-xyz/tm/consensus/types" 17 "github.com/vipernet-xyz/tm/crypto" 18 tmevents "github.com/vipernet-xyz/tm/libs/events" 19 "github.com/vipernet-xyz/tm/libs/fail" 20 tmjson "github.com/vipernet-xyz/tm/libs/json" 21 "github.com/vipernet-xyz/tm/libs/log" 22 tmmath "github.com/vipernet-xyz/tm/libs/math" 23 tmos "github.com/vipernet-xyz/tm/libs/os" 24 "github.com/vipernet-xyz/tm/libs/service" 25 tmsync "github.com/vipernet-xyz/tm/libs/sync" 26 "github.com/vipernet-xyz/tm/p2p" 27 tmproto "github.com/vipernet-xyz/tm/proto/tendermint/types" 28 sm "github.com/vipernet-xyz/tm/state" 29 "github.com/vipernet-xyz/tm/types" 30 tmtime "github.com/vipernet-xyz/tm/types/time" 31 ) 32 33 // Consensus sentinel errors 34 var ( 35 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 36 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 37 ErrAddingVote = errors.New("error adding vote") 38 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 39 40 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 41 ) 42 43 var msgQueueSize = 1000 44 45 // msgs from the reactor which may update the state 46 type msgInfo struct { 47 Msg Message `json:"msg"` 48 PeerID p2p.ID `json:"peer_key"` 49 } 50 51 // internally generated messages which may update the state 52 type timeoutInfo struct { 53 Duration time.Duration `json:"duration"` 54 Height int64 `json:"height"` 55 Round int32 `json:"round"` 56 Step cstypes.RoundStepType `json:"step"` 57 } 58 59 func (ti *timeoutInfo) String() string { 60 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 61 } 62 63 // interface to the mempool 64 type txNotifier interface { 65 TxsAvailable() <-chan struct{} 66 } 67 68 // interface to the evidence pool 69 type evidencePool interface { 70 // reports conflicting votes to the evidence pool to be processed into evidence 71 ReportConflictingVotes(voteA, voteB *types.Vote) 72 } 73 74 // State handles execution of the consensus algorithm. 75 // It processes votes and proposals, and upon reaching agreement, 76 // commits blocks to the chain and executes them against the application. 77 // The internal state machine receives input from peers, the internal validator, and from a timer. 78 type State struct { 79 service.BaseService 80 81 // config details 82 config *cfg.ConsensusConfig 83 privValidator types.PrivValidator // for signing votes 84 85 // store blocks and commits 86 blockStore sm.BlockStore 87 88 // create and execute blocks 89 blockExec *sm.BlockExecutor 90 91 // notify us if txs are available 92 txNotifier txNotifier 93 94 // add evidence to the pool 95 // when it's detected 96 evpool evidencePool 97 98 // internal state 99 mtx tmsync.RWMutex 100 cstypes.RoundState 101 state sm.State // State until height-1. 102 // privValidator pubkey, memoized for the duration of one block 103 // to avoid extra requests to HSM 104 privValidatorPubKey crypto.PubKey 105 106 // state changes may be triggered by: msgs from peers, 107 // msgs from ourself, or by timeouts 108 peerMsgQueue chan msgInfo 109 internalMsgQueue chan msgInfo 110 timeoutTicker TimeoutTicker 111 112 // information about about added votes and block parts are written on this channel 113 // so statistics can be computed by reactor 114 statsMsgQueue chan msgInfo 115 116 // we use eventBus to trigger msg broadcasts in the reactor, 117 // and to notify external subscribers, eg. through a websocket 118 eventBus *types.EventBus 119 120 // a Write-Ahead Log ensures we can recover from any kind of crash 121 // and helps us avoid signing conflicting votes 122 wal WAL 123 replayMode bool // so we don't log signing errors during replay 124 doWALCatchup bool // determines if we even try to do the catchup 125 126 // for tests where we want to limit the number of transitions the state makes 127 nSteps int 128 129 // some functions can be overwritten for testing 130 decideProposal func(height int64, round int32) 131 doPrevote func(height int64, round int32) 132 setProposal func(proposal *types.Proposal) error 133 134 // closed when we finish shutting down 135 done chan struct{} 136 137 // synchronous pubsub between consensus state and reactor. 138 // state only emits EventNewRoundStep and EventVote 139 evsw tmevents.EventSwitch 140 141 // for reporting metrics 142 metrics *Metrics 143 } 144 145 // StateOption sets an optional parameter on the State. 146 type StateOption func(*State) 147 148 // NewState returns a new State. 149 func NewState( 150 config *cfg.ConsensusConfig, 151 state sm.State, 152 blockExec *sm.BlockExecutor, 153 blockStore sm.BlockStore, 154 txNotifier txNotifier, 155 evpool evidencePool, 156 options ...StateOption, 157 ) *State { 158 cs := &State{ 159 config: config, 160 blockExec: blockExec, 161 blockStore: blockStore, 162 txNotifier: txNotifier, 163 peerMsgQueue: make(chan msgInfo, msgQueueSize), 164 internalMsgQueue: make(chan msgInfo, msgQueueSize), 165 timeoutTicker: NewTimeoutTicker(), 166 statsMsgQueue: make(chan msgInfo, msgQueueSize), 167 done: make(chan struct{}), 168 doWALCatchup: true, 169 wal: nilWAL{}, 170 evpool: evpool, 171 evsw: tmevents.NewEventSwitch(), 172 metrics: NopMetrics(), 173 } 174 175 // set function defaults (may be overwritten before calling Start) 176 cs.decideProposal = cs.defaultDecideProposal 177 cs.doPrevote = cs.defaultDoPrevote 178 cs.setProposal = cs.defaultSetProposal 179 180 // We have no votes, so reconstruct LastCommit from SeenCommit. 181 if state.LastBlockHeight > 0 { 182 cs.reconstructLastCommit(state) 183 } 184 185 cs.updateToState(state) 186 187 // NOTE: we do not call scheduleRound0 yet, we do that upon Start() 188 189 cs.BaseService = *service.NewBaseService(nil, "State", cs) 190 for _, option := range options { 191 option(cs) 192 } 193 194 return cs 195 } 196 197 // SetLogger implements Service. 198 func (cs *State) SetLogger(l log.Logger) { 199 cs.BaseService.Logger = l 200 cs.timeoutTicker.SetLogger(l) 201 } 202 203 // SetEventBus sets event bus. 204 func (cs *State) SetEventBus(b *types.EventBus) { 205 cs.eventBus = b 206 cs.blockExec.SetEventBus(b) 207 } 208 209 // StateMetrics sets the metrics. 210 func StateMetrics(metrics *Metrics) StateOption { 211 return func(cs *State) { cs.metrics = metrics } 212 } 213 214 // String returns a string. 215 func (cs *State) String() string { 216 // better not to access shared variables 217 return "ConsensusState" 218 } 219 220 // GetState returns a copy of the chain state. 221 func (cs *State) GetState() sm.State { 222 cs.mtx.RLock() 223 defer cs.mtx.RUnlock() 224 return cs.state.Copy() 225 } 226 227 // GetLastHeight returns the last height committed. 228 // If there were no blocks, returns 0. 229 func (cs *State) GetLastHeight() int64 { 230 cs.mtx.RLock() 231 defer cs.mtx.RUnlock() 232 return cs.RoundState.Height - 1 233 } 234 235 // GetRoundState returns a shallow copy of the internal consensus state. 236 func (cs *State) GetRoundState() *cstypes.RoundState { 237 cs.mtx.RLock() 238 rs := cs.RoundState // copy 239 cs.mtx.RUnlock() 240 return &rs 241 } 242 243 // GetRoundStateJSON returns a json of RoundState. 244 func (cs *State) GetRoundStateJSON() ([]byte, error) { 245 cs.mtx.RLock() 246 defer cs.mtx.RUnlock() 247 return tmjson.Marshal(cs.RoundState) 248 } 249 250 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 251 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 252 cs.mtx.RLock() 253 defer cs.mtx.RUnlock() 254 return tmjson.Marshal(cs.RoundState.RoundStateSimple()) 255 } 256 257 // GetValidators returns a copy of the current validators. 258 func (cs *State) GetValidators() (int64, []*types.Validator) { 259 cs.mtx.RLock() 260 defer cs.mtx.RUnlock() 261 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 262 } 263 264 // SetPrivValidator sets the private validator account for signing votes. It 265 // immediately requests pubkey and caches it. 266 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 267 cs.mtx.Lock() 268 defer cs.mtx.Unlock() 269 270 cs.privValidator = priv 271 272 if err := cs.updatePrivValidatorPubKey(); err != nil { 273 cs.Logger.Error("failed to get private validator pubkey", "err", err) 274 } 275 } 276 277 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for 278 // testing. 279 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 280 cs.mtx.Lock() 281 cs.timeoutTicker = timeoutTicker 282 cs.mtx.Unlock() 283 } 284 285 // LoadCommit loads the commit for a given height. 286 func (cs *State) LoadCommit(height int64) *types.Commit { 287 cs.mtx.RLock() 288 defer cs.mtx.RUnlock() 289 290 if height == cs.blockStore.Height() { 291 return cs.blockStore.LoadSeenCommit(height) 292 } 293 294 return cs.blockStore.LoadBlockCommit(height) 295 } 296 297 // OnStart loads the latest state via the WAL, and starts the timeout and 298 // receive routines. 299 func (cs *State) OnStart() error { 300 // We may set the WAL in testing before calling Start, so only OpenWAL if its 301 // still the nilWAL. 302 if _, ok := cs.wal.(nilWAL); ok { 303 if err := cs.loadWalFile(); err != nil { 304 return err 305 } 306 } 307 308 // we need the timeoutRoutine for replay so 309 // we don't block on the tick chan. 310 // NOTE: we will get a build up of garbage go routines 311 // firing on the tockChan until the receiveRoutine is started 312 // to deal with them (by that point, at most one will be valid) 313 if err := cs.timeoutTicker.Start(); err != nil { 314 return err 315 } 316 317 // We may have lost some votes if the process crashed reload from consensus 318 // log to catchup. 319 if cs.doWALCatchup { 320 repairAttempted := false 321 322 LOOP: 323 for { 324 err := cs.catchupReplay(cs.Height) 325 switch { 326 case err == nil: 327 break LOOP 328 329 case !IsDataCorruptionError(err): 330 cs.Logger.Error("error on catchup replay; proceeding to start state anyway", "err", err) 331 break LOOP 332 333 case repairAttempted: 334 return err 335 } 336 337 cs.Logger.Error("the WAL file is corrupted; attempting repair", "err", err) 338 339 // 1) prep work 340 if err := cs.wal.Stop(); err != nil { 341 return err 342 } 343 344 repairAttempted = true 345 346 // 2) backup original WAL file 347 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 348 if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 349 return err 350 } 351 352 cs.Logger.Debug("backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 353 354 // 3) try to repair (WAL file will be overwritten!) 355 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 356 cs.Logger.Error("the WAL repair failed", "err", err) 357 return err 358 } 359 360 cs.Logger.Info("successful WAL repair") 361 362 // reload WAL file 363 if err := cs.loadWalFile(); err != nil { 364 return err 365 } 366 } 367 } 368 369 if err := cs.evsw.Start(); err != nil { 370 return err 371 } 372 373 // Double Signing Risk Reduction 374 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 375 return err 376 } 377 378 // now start the receiveRoutine 379 go cs.receiveRoutine(0) 380 381 // schedule the first round! 382 // use GetRoundState so we don't race the receiveRoutine for access 383 cs.scheduleRound0(cs.GetRoundState()) 384 385 return nil 386 } 387 388 // timeoutRoutine: receive requests for timeouts on tickChan and fire timeouts on tockChan 389 // receiveRoutine: serializes processing of proposoals, block parts, votes; coordinates state transitions 390 func (cs *State) startRoutines(maxSteps int) { 391 err := cs.timeoutTicker.Start() 392 if err != nil { 393 cs.Logger.Error("failed to start timeout ticker", "err", err) 394 return 395 } 396 397 go cs.receiveRoutine(maxSteps) 398 } 399 400 // loadWalFile loads WAL data from file. It overwrites cs.wal. 401 func (cs *State) loadWalFile() error { 402 wal, err := cs.OpenWAL(cs.config.WalFile()) 403 if err != nil { 404 cs.Logger.Error("failed to load state WAL", "err", err) 405 return err 406 } 407 408 cs.wal = wal 409 return nil 410 } 411 412 // OnStop implements service.Service. 413 func (cs *State) OnStop() { 414 if err := cs.evsw.Stop(); err != nil { 415 cs.Logger.Error("failed trying to stop eventSwitch", "error", err) 416 } 417 418 if err := cs.timeoutTicker.Stop(); err != nil { 419 cs.Logger.Error("failed trying to stop timeoutTicket", "error", err) 420 } 421 // WAL is stopped in receiveRoutine. 422 } 423 424 // Wait waits for the the main routine to return. 425 // NOTE: be sure to Stop() the event switch and drain 426 // any event channels or this may deadlock 427 func (cs *State) Wait() { 428 <-cs.done 429 } 430 431 // OpenWAL opens a file to log all consensus messages and timeouts for 432 // deterministic accountability. 433 func (cs *State) OpenWAL(walFile string) (WAL, error) { 434 wal, err := NewWAL(walFile) 435 if err != nil { 436 cs.Logger.Error("failed to open WAL", "file", walFile, "err", err) 437 return nil, err 438 } 439 440 wal.SetLogger(cs.Logger.With("wal", walFile)) 441 442 if err := wal.Start(); err != nil { 443 cs.Logger.Error("failed to start WAL", "err", err) 444 return nil, err 445 } 446 447 return wal, nil 448 } 449 450 //------------------------------------------------------------ 451 // Public interface for passing messages into the consensus state, possibly causing a state transition. 452 // If peerID == "", the msg is considered internal. 453 // Messages are added to the appropriate queue (peer or internal). 454 // If the queue is full, the function may block. 455 // TODO: should these return anything or let callers just use events? 456 457 // AddVote inputs a vote. 458 func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 459 if peerID == "" { 460 cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""} 461 } else { 462 cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID} 463 } 464 465 // TODO: wait for event?! 466 return false, nil 467 } 468 469 // SetProposal inputs a proposal. 470 func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { 471 if peerID == "" { 472 cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""} 473 } else { 474 cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID} 475 } 476 477 // TODO: wait for event?! 478 return nil 479 } 480 481 // AddProposalBlockPart inputs a part of the proposal block. 482 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { 483 if peerID == "" { 484 cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""} 485 } else { 486 cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID} 487 } 488 489 // TODO: wait for event?! 490 return nil 491 } 492 493 // SetProposalAndBlock inputs the proposal and all block parts. 494 func (cs *State) SetProposalAndBlock( 495 proposal *types.Proposal, 496 block *types.Block, 497 parts *types.PartSet, 498 peerID p2p.ID, 499 ) error { 500 if err := cs.SetProposal(proposal, peerID); err != nil { 501 return err 502 } 503 504 for i := 0; i < int(parts.Total()); i++ { 505 part := parts.GetPart(i) 506 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 507 return err 508 } 509 } 510 511 return nil 512 } 513 514 //------------------------------------------------------------ 515 // internal functions for managing the state 516 517 func (cs *State) updateHeight(height int64) { 518 cs.metrics.Height.Set(float64(height)) 519 cs.Height = height 520 } 521 522 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 523 cs.Round = round 524 cs.Step = step 525 } 526 527 // enterNewRound(height, 0) at cs.StartTime. 528 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 529 // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) 530 sleepDuration := rs.StartTime.Sub(tmtime.Now()) 531 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 532 } 533 534 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 535 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 536 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 537 } 538 539 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 540 func (cs *State) sendInternalMessage(mi msgInfo) { 541 select { 542 case cs.internalMsgQueue <- mi: 543 default: 544 // NOTE: using the go-routine means our votes can 545 // be processed out of order. 546 // TODO: use CList here for strict determinism and 547 // attempt push to internalMsgQueue in receiveRoutine 548 cs.Logger.Debug("internal msg queue is full; using a go-routine") 549 go func() { cs.internalMsgQueue <- mi }() 550 } 551 } 552 553 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 554 // (which happens even before saving the state) 555 func (cs *State) reconstructLastCommit(state sm.State) { 556 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 557 if seenCommit == nil { 558 panic(fmt.Sprintf( 559 "failed to reconstruct last commit; seen commit for height %v not found", 560 state.LastBlockHeight, 561 )) 562 } 563 564 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 565 if !lastPrecommits.HasTwoThirdsMajority() { 566 panic("failed to reconstruct last commit; does not have +2/3 maj") 567 } 568 569 cs.LastCommit = lastPrecommits 570 } 571 572 // Updates State and increments height to match that of state. 573 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 574 func (cs *State) updateToState(state sm.State) { 575 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 576 panic(fmt.Sprintf( 577 "updateToState() expected state height of %v but found %v", 578 cs.Height, state.LastBlockHeight, 579 )) 580 } 581 582 if !cs.state.IsEmpty() { 583 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 584 // This might happen when someone else is mutating cs.state. 585 // Someone forgot to pass in state.Copy() somewhere?! 586 panic(fmt.Sprintf( 587 "inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 588 cs.state.LastBlockHeight+1, cs.Height, 589 )) 590 } 591 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 592 panic(fmt.Sprintf( 593 "inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 594 cs.state.LastBlockHeight, cs.state.InitialHeight, 595 )) 596 } 597 598 // If state isn't further out than cs.state, just ignore. 599 // This happens when SwitchToConsensus() is called in the reactor. 600 // We don't want to reset e.g. the Votes, but we still want to 601 // signal the new round step, because other services (eg. txNotifier) 602 // depend on having an up-to-date peer state! 603 if state.LastBlockHeight <= cs.state.LastBlockHeight { 604 cs.Logger.Debug( 605 "ignoring updateToState()", 606 "new_height", state.LastBlockHeight+1, 607 "old_height", cs.state.LastBlockHeight+1, 608 ) 609 cs.newStep() 610 return 611 } 612 } 613 614 // Reset fields based on state. 615 validators := state.Validators 616 617 switch { 618 case state.LastBlockHeight == 0: // Very first commit should be empty. 619 cs.LastCommit = (*types.VoteSet)(nil) 620 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 621 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 622 panic(fmt.Sprintf( 623 "wanted to form a commit, but precommits (H/R: %d/%d) didn't have 2/3+: %v", 624 state.LastBlockHeight, cs.CommitRound, cs.Votes.Precommits(cs.CommitRound), 625 )) 626 } 627 628 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 629 630 case cs.LastCommit == nil: 631 // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit 632 // must be called to reconstruct LastCommit from SeenCommit. 633 panic(fmt.Sprintf( 634 "last commit cannot be empty after initial block (H:%d)", 635 state.LastBlockHeight+1, 636 )) 637 } 638 639 // Next desired block height 640 height := state.LastBlockHeight + 1 641 if height == 1 { 642 height = state.InitialHeight 643 } 644 645 // RoundState fields 646 cs.updateHeight(height) 647 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 648 649 if cs.CommitTime.IsZero() { 650 // "Now" makes it easier to sync up dev nodes. 651 // We add timeoutCommit to allow transactions 652 // to be gathered for the first block. 653 // And alternative solution that relies on clocks: 654 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 655 cs.StartTime = cs.config.Commit(tmtime.Now()) 656 } else { 657 cs.StartTime = cs.config.Commit(cs.CommitTime) 658 } 659 660 cs.Validators = validators 661 cs.Proposal = nil 662 cs.ProposalBlock = nil 663 cs.ProposalBlockParts = nil 664 cs.LockedRound = -1 665 cs.LockedBlock = nil 666 cs.LockedBlockParts = nil 667 cs.ValidRound = -1 668 cs.ValidBlock = nil 669 cs.ValidBlockParts = nil 670 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 671 cs.CommitRound = -1 672 cs.LastValidators = state.LastValidators 673 cs.TriggeredTimeoutPrecommit = false 674 675 cs.state = state 676 677 // Finally, broadcast RoundState 678 cs.newStep() 679 } 680 681 func (cs *State) newStep() { 682 rs := cs.RoundStateEvent() 683 if err := cs.wal.Write(rs); err != nil { 684 cs.Logger.Error("failed writing to WAL", "err", err) 685 } 686 687 cs.nSteps++ 688 689 // newStep is called by updateToState in NewState before the eventBus is set! 690 if cs.eventBus != nil { 691 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 692 cs.Logger.Error("failed publishing new round step", "err", err) 693 } 694 695 cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) 696 } 697 } 698 699 //----------------------------------------- 700 // the main go routines 701 702 // receiveRoutine handles messages which may cause state transitions. 703 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 704 // It keeps the RoundState and is the only thing that updates it. 705 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 706 // State must be locked before any internal state is updated. 707 func (cs *State) receiveRoutine(maxSteps int) { 708 onExit := func(cs *State) { 709 // NOTE: the internalMsgQueue may have signed messages from our 710 // priv_val that haven't hit the WAL, but its ok because 711 // priv_val tracks LastSig 712 713 // close wal now that we're done writing to it 714 if err := cs.wal.Stop(); err != nil { 715 cs.Logger.Error("failed trying to stop WAL", "error", err) 716 } 717 718 cs.wal.Wait() 719 close(cs.done) 720 } 721 722 defer func() { 723 if r := recover(); r != nil { 724 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 725 // stop gracefully 726 // 727 // NOTE: We most probably shouldn't be running any further when there is 728 // some unexpected panic. Some unknown error happened, and so we don't 729 // know if that will result in the validator signing an invalid thing. It 730 // might be worthwhile to explore a mechanism for manual resuming via 731 // some console or secure RPC system, but for now, halting the chain upon 732 // unexpected consensus bugs sounds like the better option. 733 onExit(cs) 734 } 735 }() 736 737 for { 738 if maxSteps > 0 { 739 if cs.nSteps >= maxSteps { 740 cs.Logger.Debug("reached max steps; exiting receive routine") 741 cs.nSteps = 0 742 return 743 } 744 } 745 746 rs := cs.RoundState 747 var mi msgInfo 748 749 select { 750 case <-cs.txNotifier.TxsAvailable(): 751 cs.handleTxsAvailable() 752 753 case mi = <-cs.peerMsgQueue: 754 if err := cs.wal.Write(mi); err != nil { 755 cs.Logger.Error("failed writing to WAL", "err", err) 756 } 757 758 // handles proposals, block parts, votes 759 // may generate internal events (votes, complete proposals, 2/3 majorities) 760 cs.handleMsg(mi) 761 762 case mi = <-cs.internalMsgQueue: 763 err := cs.wal.WriteSync(mi) // NOTE: fsync 764 if err != nil { 765 panic(fmt.Sprintf( 766 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 767 mi, err, 768 )) 769 } 770 771 if _, ok := mi.Msg.(*VoteMessage); ok { 772 // we actually want to simulate failing during 773 // the previous WriteSync, but this isn't easy to do. 774 // Equivalent would be to fail here and manually remove 775 // some bytes from the end of the wal. 776 fail.Fail() // XXX 777 } 778 779 // handles proposals, block parts, votes 780 cs.handleMsg(mi) 781 782 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 783 if err := cs.wal.Write(ti); err != nil { 784 cs.Logger.Error("failed writing to WAL", "err", err) 785 } 786 787 // if the timeout is relevant to the rs 788 // go to the next step 789 cs.handleTimeout(ti, rs) 790 791 case <-cs.Quit(): 792 onExit(cs) 793 return 794 } 795 } 796 } 797 798 // state transitions on complete-proposal, 2/3-any, 2/3-one 799 func (cs *State) handleMsg(mi msgInfo) { 800 cs.mtx.Lock() 801 defer cs.mtx.Unlock() 802 var ( 803 added bool 804 err error 805 ) 806 807 msg, peerID := mi.Msg, mi.PeerID 808 809 switch msg := msg.(type) { 810 case *ProposalMessage: 811 // will not cause transition. 812 // once proposal is set, we can receive block parts 813 err = cs.setProposal(msg.Proposal) 814 815 case *BlockPartMessage: 816 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 817 added, err = cs.addProposalBlockPart(msg, peerID) 818 819 // We unlock here to yield to any routines that need to read the the RoundState. 820 // Previously, this code held the lock from the point at which the final block 821 // part was received until the block executed against the application. 822 // This prevented the reactor from being able to retrieve the most updated 823 // version of the RoundState. The reactor needs the updated RoundState to 824 // gossip the now completed block. 825 // 826 // This code can be further improved by either always operating on a copy 827 // of RoundState and only locking when switching out State's copy of 828 // RoundState with the updated copy or by emitting RoundState events in 829 // more places for routines depending on it to listen for. 830 cs.mtx.Unlock() 831 832 cs.mtx.Lock() 833 if added && cs.ProposalBlockParts.IsComplete() { 834 cs.handleCompleteProposal(msg.Height) 835 } 836 if added { 837 cs.statsMsgQueue <- mi 838 } 839 840 if err != nil && msg.Round != cs.Round { 841 cs.Logger.Debug( 842 "received block part from wrong round", 843 "height", cs.Height, 844 "cs_round", cs.Round, 845 "block_round", msg.Round, 846 ) 847 err = nil 848 } 849 850 case *VoteMessage: 851 // attempt to add the vote and dupeout the validator if its a duplicate signature 852 // if the vote gives us a 2/3-any or 2/3-one, we transition 853 added, err = cs.tryAddVote(msg.Vote, peerID) 854 if added { 855 cs.statsMsgQueue <- mi 856 } 857 858 // if err == ErrAddingVote { 859 // TODO: punish peer 860 // We probably don't want to stop the peer here. The vote does not 861 // necessarily comes from a malicious peer but can be just broadcasted by 862 // a typical peer. 863 // https://github.com/vipernet-xyz/tm/issues/1281 864 // } 865 866 // NOTE: the vote is broadcast to peers by the reactor listening 867 // for vote events 868 869 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 870 // the peer is sending us CatchupCommit precommits. 871 // We could make note of this and help filter in broadcastHasVoteMessage(). 872 873 default: 874 cs.Logger.Error("unknown msg type", "type", fmt.Sprintf("%T", msg)) 875 return 876 } 877 878 if err != nil { 879 cs.Logger.Error( 880 "failed to process message", 881 "height", cs.Height, 882 "round", cs.Round, 883 "peer", peerID, 884 "msg_type", fmt.Sprintf("%T", msg), 885 "err", err, 886 ) 887 } 888 } 889 890 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 891 cs.Logger.Debug("received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 892 893 // timeouts must be for current height, round, step 894 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 895 cs.Logger.Debug("ignoring tock because we are ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 896 return 897 } 898 899 // the timeout will now cause a state transition 900 cs.mtx.Lock() 901 defer cs.mtx.Unlock() 902 903 switch ti.Step { 904 case cstypes.RoundStepNewHeight: 905 // NewRound event fired from enterNewRound. 906 // XXX: should we fire timeout here (for timeout commit)? 907 cs.enterNewRound(ti.Height, 0) 908 909 case cstypes.RoundStepNewRound: 910 cs.enterPropose(ti.Height, 0) 911 912 case cstypes.RoundStepPropose: 913 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 914 cs.Logger.Error("failed publishing timeout propose", "err", err) 915 } 916 917 cs.enterPrevote(ti.Height, ti.Round) 918 919 case cstypes.RoundStepPrevoteWait: 920 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 921 cs.Logger.Error("failed publishing timeout wait", "err", err) 922 } 923 924 cs.enterPrecommit(ti.Height, ti.Round) 925 926 case cstypes.RoundStepPrecommitWait: 927 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 928 cs.Logger.Error("failed publishing timeout wait", "err", err) 929 } 930 931 cs.enterPrecommit(ti.Height, ti.Round) 932 cs.enterNewRound(ti.Height, ti.Round+1) 933 934 default: 935 panic(fmt.Sprintf("invalid timeout step: %v", ti.Step)) 936 } 937 } 938 939 func (cs *State) handleTxsAvailable() { 940 cs.mtx.Lock() 941 defer cs.mtx.Unlock() 942 943 // We only need to do this for round 0. 944 if cs.Round != 0 { 945 return 946 } 947 948 switch cs.Step { 949 case cstypes.RoundStepNewHeight: // timeoutCommit phase 950 if cs.needProofBlock(cs.Height) { 951 // enterPropose will be called by enterNewRound 952 return 953 } 954 955 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 956 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 957 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 958 959 case cstypes.RoundStepNewRound: // after timeoutCommit 960 cs.enterPropose(cs.Height, 0) 961 } 962 } 963 964 //----------------------------------------------------------------------------- 965 // State functions 966 // Used internally by handleTimeout and handleMsg to make state transitions 967 968 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 969 // 970 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 971 // 972 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 973 // Enter: +2/3 precommits for nil at (height,round-1) 974 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 975 // NOTE: cs.StartTime was already set for height. 976 func (cs *State) enterNewRound(height int64, round int32) { 977 logger := cs.Logger.With("height", height, "round", round) 978 979 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 980 logger.Debug( 981 "entering new round with invalid args", 982 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 983 ) 984 return 985 } 986 987 if now := tmtime.Now(); cs.StartTime.After(now) { 988 logger.Debug("need to set a buffer and log message here for sanity", "start_time", cs.StartTime, "now", now) 989 } 990 991 logger.Debug("entering new round", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 992 993 // increment validators if necessary 994 validators := cs.Validators 995 if cs.Round < round { 996 validators = validators.Copy() 997 validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) 998 } 999 1000 // Setup new round 1001 // we don't fire newStep for this step, 1002 // but we fire an event, so update the round step first 1003 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 1004 cs.Validators = validators 1005 if round == 0 { 1006 // We've already reset these upon new height, 1007 // and meanwhile we might have received a proposal 1008 // for round 0. 1009 } else { 1010 logger.Debug("resetting proposal info") 1011 cs.Proposal = nil 1012 cs.ProposalBlock = nil 1013 cs.ProposalBlockParts = nil 1014 } 1015 1016 cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1017 cs.TriggeredTimeoutPrecommit = false 1018 1019 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1020 cs.Logger.Error("failed publishing new round", "err", err) 1021 } 1022 1023 cs.metrics.Rounds.Set(float64(round)) 1024 1025 // Wait for txs to be available in the mempool 1026 // before we enterPropose in round 0. If the last block changed the app hash, 1027 // we may need an empty "proof" block, and enterPropose immediately. 1028 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1029 if waitForTxs { 1030 if cs.config.CreateEmptyBlocksInterval > 0 { 1031 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1032 cstypes.RoundStepNewRound) 1033 } 1034 } else { 1035 cs.enterPropose(height, round) 1036 } 1037 } 1038 1039 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1040 // and where the last block (height-1) caused the app hash to change 1041 func (cs *State) needProofBlock(height int64) bool { 1042 if height == cs.state.InitialHeight { 1043 return true 1044 } 1045 1046 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1047 if lastBlockMeta == nil { 1048 panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) 1049 } 1050 1051 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1052 } 1053 1054 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 1055 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 1056 // 1057 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 1058 // 1059 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 1060 func (cs *State) enterPropose(height int64, round int32) { 1061 logger := cs.Logger.With("height", height, "round", round) 1062 1063 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 1064 logger.Debug( 1065 "entering propose step with invalid args", 1066 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1067 ) 1068 return 1069 } 1070 1071 logger.Debug("entering propose step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1072 1073 defer func() { 1074 // Done enterPropose: 1075 cs.updateRoundStep(round, cstypes.RoundStepPropose) 1076 cs.newStep() 1077 1078 // If we have the whole proposal + POL, then goto Prevote now. 1079 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 1080 // or else after timeoutPropose 1081 if cs.isProposalComplete() { 1082 cs.enterPrevote(height, cs.Round) 1083 } 1084 }() 1085 1086 // If we don't get the proposal and all block parts quick enough, enterPrevote 1087 cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) 1088 1089 // Nothing more to do if we're not a validator 1090 if cs.privValidator == nil { 1091 logger.Debug("node is not a validator") 1092 return 1093 } 1094 1095 logger.Debug("node is a validator") 1096 1097 if cs.privValidatorPubKey == nil { 1098 // If this node is a validator & proposer in the current round, it will 1099 // miss the opportunity to create a block. 1100 logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1101 return 1102 } 1103 1104 address := cs.privValidatorPubKey.Address() 1105 1106 // if not a validator, we're done 1107 if !cs.Validators.HasAddress(address) { 1108 logger.Debug("node is not a validator", "addr", address, "vals", cs.Validators) 1109 return 1110 } 1111 1112 if cs.isProposer(address) { 1113 logger.Debug("propose step; our turn to propose", "proposer", address) 1114 cs.decideProposal(height, round) 1115 } else { 1116 logger.Debug("propose step; not our turn to propose", "proposer", cs.Validators.GetProposer().Address) 1117 } 1118 } 1119 1120 func (cs *State) isProposer(address []byte) bool { 1121 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1122 } 1123 1124 func (cs *State) defaultDecideProposal(height int64, round int32) { 1125 var block *types.Block 1126 var blockParts *types.PartSet 1127 1128 // Decide on block 1129 if cs.ValidBlock != nil { 1130 // If there is valid block, choose that. 1131 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1132 } else { 1133 // Create a new proposal block from state/txs from the mempool. 1134 block, blockParts = cs.createProposalBlock() 1135 if block == nil { 1136 return 1137 } 1138 } 1139 1140 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1141 // and the privValidator will refuse to sign anything. 1142 if err := cs.wal.FlushAndSync(); err != nil { 1143 cs.Logger.Error("failed flushing WAL to disk") 1144 } 1145 1146 // Make proposal 1147 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1148 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1149 p := proposal.ToProto() 1150 if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { 1151 proposal.Signature = p.Signature 1152 1153 // send proposal and block parts on internal msg queue 1154 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 1155 1156 for i := 0; i < int(blockParts.Total()); i++ { 1157 part := blockParts.GetPart(i) 1158 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 1159 } 1160 1161 cs.Logger.Debug("signed proposal", "height", height, "round", round, "proposal", proposal) 1162 } else if !cs.replayMode { 1163 cs.Logger.Error("propose step; failed signing proposal", "height", height, "round", round, "err", err) 1164 } 1165 } 1166 1167 // Returns true if the proposal block is complete && 1168 // (if POLRound was proposed, we have +2/3 prevotes from there). 1169 func (cs *State) isProposalComplete() bool { 1170 if cs.Proposal == nil || cs.ProposalBlock == nil { 1171 return false 1172 } 1173 // we have the proposal. if there's a POLRound, 1174 // make sure we have the prevotes from it too 1175 if cs.Proposal.POLRound < 0 { 1176 return true 1177 } 1178 // if this is false the proposer is lying or we haven't received the POL yet 1179 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1180 } 1181 1182 // Create the next block to propose and return it. Returns nil block upon error. 1183 // 1184 // We really only need to return the parts, but the block is returned for 1185 // convenience so we can log the proposal block. 1186 // 1187 // NOTE: keep it side-effect free for clarity. 1188 // CONTRACT: cs.privValidator is not nil. 1189 func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { 1190 if cs.privValidator == nil { 1191 panic("entered createProposalBlock with privValidator being nil") 1192 } 1193 1194 var commit *types.Commit 1195 switch { 1196 case cs.Height == cs.state.InitialHeight: 1197 // We're creating a proposal for the first block. 1198 // The commit is empty, but not nil. 1199 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1200 1201 case cs.LastCommit.HasTwoThirdsMajority(): 1202 // Make the commit from LastCommit 1203 commit = cs.LastCommit.MakeCommit() 1204 1205 default: // This shouldn't happen. 1206 cs.Logger.Error("propose step; cannot propose anything without commit for the previous block") 1207 return 1208 } 1209 1210 if cs.privValidatorPubKey == nil { 1211 // If this node is a validator & proposer in the current round, it will 1212 // miss the opportunity to create a block. 1213 cs.Logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1214 return 1215 } 1216 1217 proposerAddr := cs.privValidatorPubKey.Address() 1218 1219 return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1220 } 1221 1222 // Enter: `timeoutPropose` after entering Propose. 1223 // Enter: proposal block and POL is ready. 1224 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 1225 // Otherwise vote nil. 1226 func (cs *State) enterPrevote(height int64, round int32) { 1227 logger := cs.Logger.With("height", height, "round", round) 1228 1229 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 1230 logger.Debug( 1231 "entering prevote step with invalid args", 1232 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1233 ) 1234 return 1235 } 1236 1237 defer func() { 1238 // Done enterPrevote: 1239 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 1240 cs.newStep() 1241 }() 1242 1243 logger.Debug("entering prevote step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1244 1245 // Sign and broadcast vote as necessary 1246 cs.doPrevote(height, round) 1247 1248 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 1249 // (so we have more time to try and collect +2/3 prevotes for a single block) 1250 } 1251 1252 func (cs *State) defaultDoPrevote(height int64, round int32) { 1253 logger := cs.Logger.With("height", height, "round", round) 1254 1255 // If a block is locked, prevote that. 1256 if cs.LockedBlock != nil { 1257 logger.Debug("prevote step; already locked on a block; prevoting locked block") 1258 cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) 1259 return 1260 } 1261 1262 // If ProposalBlock is nil, prevote nil. 1263 if cs.ProposalBlock == nil { 1264 logger.Debug("prevote step: ProposalBlock is nil") 1265 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1266 return 1267 } 1268 1269 // Validate proposal block 1270 err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) 1271 if err != nil { 1272 // ProposalBlock is invalid, prevote nil. 1273 logger.Error("prevote step: ProposalBlock is invalid", "err", err) 1274 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1275 return 1276 } 1277 1278 // Prevote cs.ProposalBlock 1279 // NOTE: the proposal signature is validated when it is received, 1280 // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) 1281 logger.Debug("prevote step: ProposalBlock is valid") 1282 cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) 1283 } 1284 1285 // Enter: any +2/3 prevotes at next round. 1286 func (cs *State) enterPrevoteWait(height int64, round int32) { 1287 logger := cs.Logger.With("height", height, "round", round) 1288 1289 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1290 logger.Debug( 1291 "entering prevote wait step with invalid args", 1292 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1293 ) 1294 return 1295 } 1296 1297 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1298 panic(fmt.Sprintf( 1299 "entering prevote wait step (%v/%v), but prevotes does not have any +2/3 votes", 1300 height, round, 1301 )) 1302 } 1303 1304 logger.Debug("entering prevote wait step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1305 1306 defer func() { 1307 // Done enterPrevoteWait: 1308 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1309 cs.newStep() 1310 }() 1311 1312 // Wait for some more prevotes; enterPrecommit 1313 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1314 } 1315 1316 // Enter: `timeoutPrevote` after any +2/3 prevotes. 1317 // Enter: `timeoutPrecommit` after any +2/3 precommits. 1318 // Enter: +2/3 precomits for block or nil. 1319 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 1320 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 1321 // else, precommit nil otherwise. 1322 func (cs *State) enterPrecommit(height int64, round int32) { 1323 logger := cs.Logger.With("height", height, "round", round) 1324 1325 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 1326 logger.Debug( 1327 "entering precommit step with invalid args", 1328 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1329 ) 1330 return 1331 } 1332 1333 logger.Debug("entering precommit step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1334 1335 defer func() { 1336 // Done enterPrecommit: 1337 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 1338 cs.newStep() 1339 }() 1340 1341 // check for a polka 1342 blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() 1343 1344 // If we don't have a polka, we must precommit nil. 1345 if !ok { 1346 if cs.LockedBlock != nil { 1347 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit while we are locked; precommitting nil") 1348 } else { 1349 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit; precommitting nil") 1350 } 1351 1352 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1353 return 1354 } 1355 1356 // At this point +2/3 prevoted for a particular block or nil. 1357 if err := cs.eventBus.PublishEventPolka(cs.RoundStateEvent()); err != nil { 1358 logger.Error("failed publishing polka", "err", err) 1359 } 1360 1361 // the latest POLRound should be this round. 1362 polRound, _ := cs.Votes.POLInfo() 1363 if polRound < round { 1364 panic(fmt.Sprintf("this POLRound should be %v but got %v", round, polRound)) 1365 } 1366 1367 // +2/3 prevoted nil. Unlock and precommit nil. 1368 if len(blockID.Hash) == 0 { 1369 if cs.LockedBlock == nil { 1370 logger.Debug("precommit step; +2/3 prevoted for nil") 1371 } else { 1372 logger.Debug("precommit step; +2/3 prevoted for nil; unlocking") 1373 cs.LockedRound = -1 1374 cs.LockedBlock = nil 1375 cs.LockedBlockParts = nil 1376 1377 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1378 logger.Error("failed publishing event unlock", "err", err) 1379 } 1380 } 1381 1382 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1383 return 1384 } 1385 1386 // At this point, +2/3 prevoted for a particular block. 1387 1388 // If we're already locked on that block, precommit it, and update the LockedRound 1389 if cs.LockedBlock.HashesTo(blockID.Hash) { 1390 logger.Debug("precommit step; +2/3 prevoted locked block; relocking") 1391 cs.LockedRound = round 1392 1393 if err := cs.eventBus.PublishEventRelock(cs.RoundStateEvent()); err != nil { 1394 logger.Error("failed publishing event relock", "err", err) 1395 } 1396 1397 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1398 return 1399 } 1400 1401 // If +2/3 prevoted for proposal block, stage and precommit it 1402 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1403 logger.Debug("precommit step; +2/3 prevoted proposal block; locking", "hash", blockID.Hash) 1404 1405 // Validate the block. 1406 if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { 1407 panic(fmt.Sprintf("precommit step; +2/3 prevoted for an invalid block: %v", err)) 1408 } 1409 1410 cs.LockedRound = round 1411 cs.LockedBlock = cs.ProposalBlock 1412 cs.LockedBlockParts = cs.ProposalBlockParts 1413 1414 if err := cs.eventBus.PublishEventLock(cs.RoundStateEvent()); err != nil { 1415 logger.Error("failed publishing event lock", "err", err) 1416 } 1417 1418 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1419 return 1420 } 1421 1422 // There was a polka in this round for a block we don't have. 1423 // Fetch that block, unlock, and precommit nil. 1424 // The +2/3 prevotes for this round is the POL for our unlock. 1425 logger.Debug("precommit step; +2/3 prevotes for a block we do not have; voting nil", "block_id", blockID) 1426 1427 cs.LockedRound = -1 1428 cs.LockedBlock = nil 1429 cs.LockedBlockParts = nil 1430 1431 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1432 cs.ProposalBlock = nil 1433 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1434 } 1435 1436 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1437 logger.Error("failed publishing event unlock", "err", err) 1438 } 1439 1440 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1441 } 1442 1443 // Enter: any +2/3 precommits for next round. 1444 func (cs *State) enterPrecommitWait(height int64, round int32) { 1445 logger := cs.Logger.With("height", height, "round", round) 1446 1447 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1448 logger.Debug( 1449 "entering precommit wait step with invalid args", 1450 "triggered_timeout", cs.TriggeredTimeoutPrecommit, 1451 "current", log.NewLazySprintf("%v/%v", cs.Height, cs.Round), 1452 ) 1453 return 1454 } 1455 1456 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1457 panic(fmt.Sprintf( 1458 "entering precommit wait step (%v/%v), but precommits does not have any +2/3 votes", 1459 height, round, 1460 )) 1461 } 1462 1463 logger.Debug("entering precommit wait step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1464 1465 defer func() { 1466 // Done enterPrecommitWait: 1467 cs.TriggeredTimeoutPrecommit = true 1468 cs.newStep() 1469 }() 1470 1471 // wait for some more precommits; enterNewRound 1472 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1473 } 1474 1475 // Enter: +2/3 precommits for block 1476 func (cs *State) enterCommit(height int64, commitRound int32) { 1477 logger := cs.Logger.With("height", height, "commit_round", commitRound) 1478 1479 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1480 logger.Debug( 1481 "entering commit step with invalid args", 1482 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1483 ) 1484 return 1485 } 1486 1487 logger.Debug("entering commit step", "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1488 1489 defer func() { 1490 // Done enterCommit: 1491 // keep cs.Round the same, commitRound points to the right Precommits set. 1492 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1493 cs.CommitRound = commitRound 1494 cs.CommitTime = tmtime.Now() 1495 cs.newStep() 1496 1497 // Maybe finalize immediately. 1498 cs.tryFinalizeCommit(height) 1499 }() 1500 1501 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1502 if !ok { 1503 panic("RunActionCommit() expects +2/3 precommits") 1504 } 1505 1506 // The Locked* fields no longer matter. 1507 // Move them over to ProposalBlock if they match the commit hash, 1508 // otherwise they'll be cleared in updateToState. 1509 if cs.LockedBlock.HashesTo(blockID.Hash) { 1510 logger.Debug("commit is for a locked block; set ProposalBlock=LockedBlock", "block_hash", blockID.Hash) 1511 cs.ProposalBlock = cs.LockedBlock 1512 cs.ProposalBlockParts = cs.LockedBlockParts 1513 } 1514 1515 // If we don't have the block being committed, set up to get it. 1516 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1517 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1518 logger.Info( 1519 "commit is for a block we do not know about; set ProposalBlock=nil", 1520 "proposal", log.NewLazyBlockHash(cs.ProposalBlock), 1521 "commit", blockID.Hash, 1522 ) 1523 1524 // We're getting the wrong block. 1525 // Set up ProposalBlockParts and keep waiting. 1526 cs.ProposalBlock = nil 1527 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1528 1529 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1530 logger.Error("failed publishing valid block", "err", err) 1531 } 1532 1533 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 1534 } 1535 } 1536 } 1537 1538 // If we have the block AND +2/3 commits for it, finalize. 1539 func (cs *State) tryFinalizeCommit(height int64) { 1540 logger := cs.Logger.With("height", height) 1541 1542 if cs.Height != height { 1543 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1544 } 1545 1546 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1547 if !ok || len(blockID.Hash) == 0 { 1548 logger.Error("failed attempt to finalize commit; there was no +2/3 majority or +2/3 was for nil") 1549 return 1550 } 1551 1552 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1553 // TODO: this happens every time if we're not a validator (ugly logs) 1554 // TODO: ^^ wait, why does it matter that we're a validator? 1555 logger.Debug( 1556 "failed attempt to finalize commit; we do not have the commit block", 1557 "proposal_block", log.NewLazyBlockHash(cs.ProposalBlock), 1558 "commit_block", blockID.Hash, 1559 ) 1560 return 1561 } 1562 1563 cs.finalizeCommit(height) 1564 } 1565 1566 // Increment height and goto cstypes.RoundStepNewHeight 1567 func (cs *State) finalizeCommit(height int64) { 1568 logger := cs.Logger.With("height", height) 1569 1570 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1571 logger.Debug( 1572 "entering finalize commit step", 1573 "current", log.NewLazySprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1574 ) 1575 return 1576 } 1577 1578 cs.calculatePrevoteMessageDelayMetrics() 1579 1580 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1581 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1582 1583 if !ok { 1584 panic("cannot finalize commit; commit does not have 2/3 majority") 1585 } 1586 if !blockParts.HasHeader(blockID.PartSetHeader) { 1587 panic("expected ProposalBlockParts header to be commit header") 1588 } 1589 if !block.HashesTo(blockID.Hash) { 1590 panic("cannot finalize commit; proposal block does not hash to commit hash") 1591 } 1592 1593 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1594 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1595 } 1596 1597 logger.Info( 1598 "finalizing commit of block", 1599 "hash", log.NewLazyBlockHash(block), 1600 "root", block.AppHash, 1601 "num_txs", len(block.Txs), 1602 ) 1603 logger.Debug("committed block", "block", log.NewLazySprintf("%v", block)) 1604 1605 fail.Fail() // XXX 1606 1607 // Save to blockStore. 1608 if cs.blockStore.Height() < block.Height { 1609 // NOTE: the seenCommit is local justification to commit this block, 1610 // but may differ from the LastCommit included in the next block 1611 precommits := cs.Votes.Precommits(cs.CommitRound) 1612 seenCommit := precommits.MakeCommit() 1613 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1614 } else { 1615 // Happens during replay if we already saved the block but didn't commit 1616 logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1617 } 1618 1619 fail.Fail() // XXX 1620 1621 // Write EndHeightMessage{} for this height, implying that the blockstore 1622 // has saved the block. 1623 // 1624 // If we crash before writing this EndHeightMessage{}, we will recover by 1625 // running ApplyBlock during the ABCI handshake when we restart. If we 1626 // didn't save the block to the blockstore before writing 1627 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1628 // complains about replaying for heights where an #ENDHEIGHT entry already 1629 // exists. 1630 // 1631 // Either way, the State should not be resumed until we 1632 // successfully call ApplyBlock (ie. later here, or in Handshake after 1633 // restart). 1634 endMsg := EndHeightMessage{height} 1635 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1636 panic(fmt.Sprintf( 1637 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 1638 endMsg, err, 1639 )) 1640 } 1641 1642 fail.Fail() // XXX 1643 1644 // Create a copy of the state for staging and an event cache for txs. 1645 stateCopy := cs.state.Copy() 1646 1647 // Execute and commit the block, update and save the state, and update the mempool. 1648 // NOTE The block.AppHash wont reflect these txs until the next block. 1649 var ( 1650 err error 1651 retainHeight int64 1652 ) 1653 1654 stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( 1655 stateCopy, 1656 types.BlockID{ 1657 Hash: block.Hash(), 1658 PartSetHeader: blockParts.Header(), 1659 }, 1660 block, 1661 ) 1662 if err != nil { 1663 logger.Error("failed to apply block", "err", err) 1664 return 1665 } 1666 1667 fail.Fail() // XXX 1668 1669 // Prune old heights, if requested by ABCI app. 1670 if retainHeight > 0 { 1671 pruned, err := cs.pruneBlocks(retainHeight) 1672 if err != nil { 1673 logger.Error("failed to prune blocks", "retain_height", retainHeight, "err", err) 1674 } else { 1675 logger.Debug("pruned blocks", "pruned", pruned, "retain_height", retainHeight) 1676 } 1677 } 1678 1679 // must be called before we update state 1680 cs.recordMetrics(height, block) 1681 1682 // NewHeightStep! 1683 cs.updateToState(stateCopy) 1684 1685 fail.Fail() // XXX 1686 1687 // Private validator might have changed it's key pair => refetch pubkey. 1688 if err := cs.updatePrivValidatorPubKey(); err != nil { 1689 logger.Error("failed to get private validator pubkey", "err", err) 1690 } 1691 1692 // cs.StartTime is already set. 1693 // Schedule Round0 to start soon. 1694 cs.scheduleRound0(&cs.RoundState) 1695 1696 // By here, 1697 // * cs.Height has been increment to height+1 1698 // * cs.Step is now cstypes.RoundStepNewHeight 1699 // * cs.StartTime is set to when we will start round0. 1700 } 1701 1702 func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { 1703 base := cs.blockStore.Base() 1704 if retainHeight <= base { 1705 return 0, nil 1706 } 1707 pruned, err := cs.blockStore.PruneBlocks(retainHeight) 1708 if err != nil { 1709 return 0, fmt.Errorf("failed to prune block store: %w", err) 1710 } 1711 err = cs.blockExec.Store().PruneStates(base, retainHeight) 1712 if err != nil { 1713 return 0, fmt.Errorf("failed to prune state database: %w", err) 1714 } 1715 return pruned, nil 1716 } 1717 1718 func (cs *State) recordMetrics(height int64, block *types.Block) { 1719 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1720 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1721 1722 var ( 1723 missingValidators int 1724 missingValidatorsPower int64 1725 ) 1726 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1727 // Remember that the first LastCommit is intentionally empty, so it's not 1728 // fair to increment missing validators number. 1729 if height > cs.state.InitialHeight { 1730 // Sanity check that commit size matches validator set size - only applies 1731 // after first block. 1732 var ( 1733 commitSize = block.LastCommit.Size() 1734 valSetLen = len(cs.LastValidators.Validators) 1735 address types.Address 1736 ) 1737 if commitSize != valSetLen { 1738 panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1739 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1740 } 1741 1742 if cs.privValidator != nil { 1743 if cs.privValidatorPubKey == nil { 1744 // Metrics won't be updated, but it's not critical. 1745 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1746 } else { 1747 address = cs.privValidatorPubKey.Address() 1748 } 1749 } 1750 1751 for i, val := range cs.LastValidators.Validators { 1752 commitSig := block.LastCommit.Signatures[i] 1753 if commitSig.Absent() { 1754 missingValidators++ 1755 missingValidatorsPower += val.VotingPower 1756 } 1757 1758 if bytes.Equal(val.Address, address) { 1759 label := []string{ 1760 "validator_address", val.Address.String(), 1761 } 1762 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1763 if commitSig.ForBlock() { 1764 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1765 } else { 1766 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1767 } 1768 } 1769 1770 } 1771 } 1772 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1773 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1774 1775 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1776 var ( 1777 byzantineValidatorsPower = int64(0) 1778 byzantineValidatorsCount = int64(0) 1779 ) 1780 for _, ev := range block.Evidence.Evidence { 1781 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1782 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1783 byzantineValidatorsCount++ 1784 byzantineValidatorsPower += val.VotingPower 1785 } 1786 } 1787 } 1788 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1789 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1790 1791 if height > 1 { 1792 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1793 if lastBlockMeta != nil { 1794 cs.metrics.BlockIntervalSeconds.Observe( 1795 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1796 ) 1797 } 1798 } 1799 1800 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1801 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1802 cs.metrics.BlockSizeBytes.Set(float64(block.Size())) 1803 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1804 } 1805 1806 //----------------------------------------------------------------------------- 1807 1808 func (cs *State) defaultSetProposal(proposal *types.Proposal) error { 1809 // Already have one 1810 // TODO: possibly catch double proposals 1811 if cs.Proposal != nil { 1812 return nil 1813 } 1814 1815 // Does not apply 1816 if proposal.Height != cs.Height || proposal.Round != cs.Round { 1817 return nil 1818 } 1819 1820 // Verify POLRound, which must be -1 or in range [0, proposal.Round). 1821 if proposal.POLRound < -1 || 1822 (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { 1823 return ErrInvalidProposalPOLRound 1824 } 1825 1826 p := proposal.ToProto() 1827 // Verify signature 1828 if !cs.Validators.GetProposer().PubKey.VerifySignature( 1829 types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature, 1830 ) { 1831 return ErrInvalidProposalSignature 1832 } 1833 1834 proposal.Signature = p.Signature 1835 cs.Proposal = proposal 1836 // We don't update cs.ProposalBlockParts if it is already set. 1837 // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. 1838 // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! 1839 if cs.ProposalBlockParts == nil { 1840 cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) 1841 } 1842 1843 cs.Logger.Info("received proposal", "proposal", proposal) 1844 return nil 1845 } 1846 1847 // NOTE: block is not necessarily valid. 1848 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1849 // once we have the full block. 1850 func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (added bool, err error) { 1851 height, round, part := msg.Height, msg.Round, msg.Part 1852 1853 // Blocks might be reused, so round mismatch is OK 1854 if cs.Height != height { 1855 cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) 1856 return false, nil 1857 } 1858 1859 // We're not expecting a block part. 1860 if cs.ProposalBlockParts == nil { 1861 // NOTE: this can happen when we've gone to a higher round and 1862 // then receive parts from the previous round - not necessarily a bad peer. 1863 cs.Logger.Debug( 1864 "received a block part when we are not expecting any", 1865 "height", height, 1866 "round", round, 1867 "index", part.Index, 1868 "peer", peerID, 1869 ) 1870 return false, nil 1871 } 1872 1873 added, err = cs.ProposalBlockParts.AddPart(part) 1874 if err != nil { 1875 return added, err 1876 } 1877 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1878 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1879 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1880 ) 1881 } 1882 if added && cs.ProposalBlockParts.IsComplete() { 1883 bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader()) 1884 if err != nil { 1885 return added, err 1886 } 1887 1888 pbb := new(tmproto.Block) 1889 err = proto.Unmarshal(bz, pbb) 1890 if err != nil { 1891 return added, err 1892 } 1893 1894 block, err := types.BlockFromProto(pbb) 1895 if err != nil { 1896 return added, err 1897 } 1898 1899 cs.ProposalBlock = block 1900 1901 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1902 cs.Logger.Info("received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1903 1904 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1905 cs.Logger.Error("failed publishing event complete proposal", "err", err) 1906 } 1907 } 1908 return added, nil 1909 } 1910 1911 func (cs *State) handleCompleteProposal(blockHeight int64) { 1912 // Update Valid* if we can. 1913 prevotes := cs.Votes.Prevotes(cs.Round) 1914 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1915 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1916 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1917 cs.Logger.Debug( 1918 "updating valid block to new proposal block", 1919 "valid_round", cs.Round, 1920 "valid_block_hash", log.NewLazyBlockHash(cs.ProposalBlock), 1921 ) 1922 1923 cs.ValidRound = cs.Round 1924 cs.ValidBlock = cs.ProposalBlock 1925 cs.ValidBlockParts = cs.ProposalBlockParts 1926 } 1927 // TODO: In case there is +2/3 majority in Prevotes set for some 1928 // block and cs.ProposalBlock contains different block, either 1929 // proposer is faulty or voting power of faulty processes is more 1930 // than 1/3. We should trigger in the future accountability 1931 // procedure at this point. 1932 } 1933 1934 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1935 // Move onto the next step 1936 cs.enterPrevote(blockHeight, cs.Round) 1937 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 1938 cs.enterPrecommit(blockHeight, cs.Round) 1939 } 1940 } else if cs.Step == cstypes.RoundStepCommit { 1941 // If we're waiting on the proposal block... 1942 cs.tryFinalizeCommit(blockHeight) 1943 } 1944 } 1945 1946 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 1947 func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { 1948 added, err := cs.addVote(vote, peerID) 1949 if err != nil { 1950 // If the vote height is off, we'll just ignore it, 1951 // But if it's a conflicting sig, add it to the cs.evpool. 1952 // If it's otherwise invalid, punish peer. 1953 //nolint: gocritic 1954 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 1955 if cs.privValidatorPubKey == nil { 1956 return false, errPubKeyIsNotSet 1957 } 1958 1959 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 1960 cs.Logger.Error( 1961 "found conflicting vote from ourselves; did you unsafe_reset a validator?", 1962 "height", vote.Height, 1963 "round", vote.Round, 1964 "type", vote.Type, 1965 ) 1966 1967 return added, err 1968 } 1969 1970 // report conflicting votes to the evidence pool 1971 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 1972 cs.Logger.Debug( 1973 "found and sent conflicting votes to the evidence pool", 1974 "vote_a", voteErr.VoteA, 1975 "vote_b", voteErr.VoteB, 1976 ) 1977 1978 return added, err 1979 } else if errors.Is(err, types.ErrVoteNonDeterministicSignature) { 1980 cs.Logger.Debug("vote has non-deterministic signature", "err", err) 1981 } else { 1982 // Either 1983 // 1) bad peer OR 1984 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 1985 // 3) tmkms use with multiple validators connecting to a single tmkms instance 1986 // (https://github.com/vipernet-xyz/tm/issues/3839). 1987 cs.Logger.Info("failed attempting to add vote", "err", err) 1988 return added, ErrAddingVote 1989 } 1990 } 1991 1992 return added, nil 1993 } 1994 1995 func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 1996 cs.Logger.Debug( 1997 "adding vote", 1998 "vote_height", vote.Height, 1999 "vote_type", vote.Type, 2000 "val_index", vote.ValidatorIndex, 2001 "cs_height", cs.Height, 2002 ) 2003 2004 // A precommit for the previous height? 2005 // These come in while we wait timeoutCommit 2006 if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { 2007 if cs.Step != cstypes.RoundStepNewHeight { 2008 // Late precommit at prior height is ignored 2009 cs.Logger.Debug("precommit vote came in after commit timeout and has been ignored", "vote", vote) 2010 return 2011 } 2012 2013 added, err = cs.LastCommit.AddVote(vote) 2014 if !added { 2015 return 2016 } 2017 2018 cs.Logger.Debug("added vote to last precommits", "last_commit", cs.LastCommit.StringShort()) 2019 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2020 return added, err 2021 } 2022 2023 cs.evsw.FireEvent(types.EventVote, vote) 2024 2025 // if we can skip timeoutCommit and have all the votes now, 2026 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 2027 // go straight to new round (skip timeout commit) 2028 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 2029 cs.enterNewRound(cs.Height, 0) 2030 } 2031 2032 return 2033 } 2034 2035 // Height mismatch is ignored. 2036 // Not necessarily a bad peer, but not favourable behaviour. 2037 if vote.Height != cs.Height { 2038 cs.Logger.Debug("vote ignored and not added", "vote_height", vote.Height, "cs_height", cs.Height, "peer", peerID) 2039 return 2040 } 2041 2042 height := cs.Height 2043 added, err = cs.Votes.AddVote(vote, peerID) 2044 if !added { 2045 // Either duplicate, or error upon cs.Votes.AddByIndex() 2046 return 2047 } 2048 2049 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2050 return added, err 2051 } 2052 cs.evsw.FireEvent(types.EventVote, vote) 2053 2054 switch vote.Type { 2055 case tmproto.PrevoteType: 2056 prevotes := cs.Votes.Prevotes(vote.Round) 2057 cs.Logger.Debug("added vote to prevote", "vote", vote, "prevotes", prevotes.StringShort()) 2058 2059 // If +2/3 prevotes for a block or nil for *any* round: 2060 if blockID, ok := prevotes.TwoThirdsMajority(); ok { 2061 // There was a polka! 2062 // If we're locked but this is a recent polka, unlock. 2063 // If it matches our ProposalBlock, update the ValidBlock 2064 2065 // Unlock if `cs.LockedRound < vote.Round <= cs.Round` 2066 // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round 2067 if (cs.LockedBlock != nil) && 2068 (cs.LockedRound < vote.Round) && 2069 (vote.Round <= cs.Round) && 2070 !cs.LockedBlock.HashesTo(blockID.Hash) { 2071 2072 cs.Logger.Debug("unlocking because of POL", "locked_round", cs.LockedRound, "pol_round", vote.Round) 2073 2074 cs.LockedRound = -1 2075 cs.LockedBlock = nil 2076 cs.LockedBlockParts = nil 2077 2078 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 2079 return added, err 2080 } 2081 } 2082 2083 // Update Valid* if we can. 2084 // NOTE: our proposal block may be nil or not what received a polka.. 2085 if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { 2086 if cs.ProposalBlock.HashesTo(blockID.Hash) { 2087 cs.Logger.Debug("updating valid block because of POL", "valid_round", cs.ValidRound, "pol_round", vote.Round) 2088 cs.ValidRound = vote.Round 2089 cs.ValidBlock = cs.ProposalBlock 2090 cs.ValidBlockParts = cs.ProposalBlockParts 2091 } else { 2092 cs.Logger.Debug( 2093 "valid block we do not know about; set ProposalBlock=nil", 2094 "proposal", log.NewLazyBlockHash(cs.ProposalBlock), 2095 "block_id", blockID.Hash, 2096 ) 2097 2098 // we're getting the wrong block 2099 cs.ProposalBlock = nil 2100 } 2101 2102 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 2103 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 2104 } 2105 2106 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 2107 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 2108 return added, err 2109 } 2110 } 2111 } 2112 2113 // If +2/3 prevotes for *anything* for future round: 2114 switch { 2115 case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): 2116 // Round-skip if there is any 2/3+ of votes ahead of us 2117 cs.enterNewRound(height, vote.Round) 2118 2119 case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round 2120 blockID, ok := prevotes.TwoThirdsMajority() 2121 if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { 2122 cs.enterPrecommit(height, vote.Round) 2123 } else if prevotes.HasTwoThirdsAny() { 2124 cs.enterPrevoteWait(height, vote.Round) 2125 } 2126 2127 case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: 2128 // If the proposal is now complete, enter prevote of cs.Round. 2129 if cs.isProposalComplete() { 2130 cs.enterPrevote(height, cs.Round) 2131 } 2132 } 2133 2134 case tmproto.PrecommitType: 2135 precommits := cs.Votes.Precommits(vote.Round) 2136 cs.Logger.Debug("added vote to precommit", 2137 "height", vote.Height, 2138 "round", vote.Round, 2139 "validator", vote.ValidatorAddress.String(), 2140 "vote_timestamp", vote.Timestamp, 2141 "data", precommits.LogString()) 2142 2143 blockID, ok := precommits.TwoThirdsMajority() 2144 if ok { 2145 // Executed as TwoThirdsMajority could be from a higher round 2146 cs.enterNewRound(height, vote.Round) 2147 cs.enterPrecommit(height, vote.Round) 2148 2149 if len(blockID.Hash) != 0 { 2150 cs.enterCommit(height, vote.Round) 2151 if cs.config.SkipTimeoutCommit && precommits.HasAll() { 2152 cs.enterNewRound(cs.Height, 0) 2153 } 2154 } else { 2155 cs.enterPrecommitWait(height, vote.Round) 2156 } 2157 } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { 2158 cs.enterNewRound(height, vote.Round) 2159 cs.enterPrecommitWait(height, vote.Round) 2160 } 2161 2162 default: 2163 panic(fmt.Sprintf("unexpected vote type %v", vote.Type)) 2164 } 2165 2166 return added, err 2167 } 2168 2169 // CONTRACT: cs.privValidator is not nil. 2170 func (cs *State) signVote( 2171 msgType tmproto.SignedMsgType, 2172 hash []byte, 2173 header types.PartSetHeader, 2174 ) (*types.Vote, error) { 2175 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 2176 // and the privValidator will refuse to sign anything. 2177 if err := cs.wal.FlushAndSync(); err != nil { 2178 return nil, err 2179 } 2180 2181 if cs.privValidatorPubKey == nil { 2182 return nil, errPubKeyIsNotSet 2183 } 2184 2185 addr := cs.privValidatorPubKey.Address() 2186 valIdx, _ := cs.Validators.GetByAddress(addr) 2187 2188 vote := &types.Vote{ 2189 ValidatorAddress: addr, 2190 ValidatorIndex: valIdx, 2191 Height: cs.Height, 2192 Round: cs.Round, 2193 Timestamp: cs.voteTime(), 2194 Type: msgType, 2195 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 2196 } 2197 2198 v := vote.ToProto() 2199 err := cs.privValidator.SignVote(cs.state.ChainID, v) 2200 vote.Signature = v.Signature 2201 vote.Timestamp = v.Timestamp 2202 2203 return vote, err 2204 } 2205 2206 func (cs *State) voteTime() time.Time { 2207 now := tmtime.Now() 2208 minVoteTime := now 2209 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 2210 // even if cs.LockedBlock != nil. See https://github.com/vipernet-xyz/tm/tree/v0.34.x/spec/. 2211 timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond 2212 if cs.LockedBlock != nil { 2213 // See the BFT time spec 2214 // https://github.com/vipernet-xyz/tm/blob/v0.34.x/spec/consensus/bft-time.md 2215 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 2216 } else if cs.ProposalBlock != nil { 2217 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 2218 } 2219 2220 if now.After(minVoteTime) { 2221 return now 2222 } 2223 return minVoteTime 2224 } 2225 2226 // sign the vote and publish on internalMsgQueue 2227 func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 2228 if cs.privValidator == nil { // the node does not have a key 2229 return nil 2230 } 2231 2232 if cs.privValidatorPubKey == nil { 2233 // Vote won't be signed, but it's not critical. 2234 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 2235 return nil 2236 } 2237 2238 // If the node not in the validator set, do nothing. 2239 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 2240 return nil 2241 } 2242 2243 // TODO: pass pubKey to signVote 2244 vote, err := cs.signVote(msgType, hash, header) 2245 if err == nil { 2246 cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""}) 2247 cs.Logger.Debug("signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 2248 return vote 2249 } 2250 2251 cs.Logger.Error("failed signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 2252 return nil 2253 } 2254 2255 // updatePrivValidatorPubKey get's the private validator public key and 2256 // memoizes it. This func returns an error if the private validator is not 2257 // responding or responds with an error. 2258 func (cs *State) updatePrivValidatorPubKey() error { 2259 if cs.privValidator == nil { 2260 return nil 2261 } 2262 2263 pubKey, err := cs.privValidator.GetPubKey() 2264 if err != nil { 2265 return err 2266 } 2267 cs.privValidatorPubKey = pubKey 2268 return nil 2269 } 2270 2271 // look back to check existence of the node's consensus votes before joining consensus 2272 func (cs *State) checkDoubleSigningRisk(height int64) error { 2273 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 2274 valAddr := cs.privValidatorPubKey.Address() 2275 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 2276 if doubleSignCheckHeight > height { 2277 doubleSignCheckHeight = height 2278 } 2279 2280 for i := int64(1); i < doubleSignCheckHeight; i++ { 2281 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 2282 if lastCommit != nil { 2283 for sigIdx, s := range lastCommit.Signatures { 2284 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 2285 cs.Logger.Info("found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 2286 return ErrSignatureFoundInPastBlocks 2287 } 2288 } 2289 } 2290 } 2291 } 2292 2293 return nil 2294 } 2295 2296 func (cs *State) calculatePrevoteMessageDelayMetrics() { 2297 if cs.Proposal == nil { 2298 return 2299 } 2300 2301 ps := cs.Votes.Prevotes(cs.Round) 2302 pl := ps.List() 2303 2304 sort.Slice(pl, func(i, j int) bool { 2305 return pl[i].Timestamp.Before(pl[j].Timestamp) 2306 }) 2307 2308 var votingPowerSeen int64 2309 for _, v := range pl { 2310 _, val := cs.Validators.GetByAddress(v.ValidatorAddress) 2311 votingPowerSeen += val.VotingPower 2312 if votingPowerSeen >= cs.Validators.TotalVotingPower()*2/3+1 { 2313 cs.metrics.QuorumPrevoteMessageDelay.Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) 2314 break 2315 } 2316 } 2317 if ps.HasAll() { 2318 cs.metrics.FullPrevoteMessageDelay.Set(pl[len(pl)-1].Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) 2319 } 2320 } 2321 2322 //--------------------------------------------------------- 2323 2324 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 2325 if h1 < h2 { 2326 return -1 2327 } else if h1 > h2 { 2328 return 1 2329 } 2330 if r1 < r2 { 2331 return -1 2332 } else if r1 > r2 { 2333 return 1 2334 } 2335 if s1 < s2 { 2336 return -1 2337 } else if s1 > s2 { 2338 return 1 2339 } 2340 return 0 2341 } 2342 2343 // repairWalFile decodes messages from src (until the decoder errors) and 2344 // writes them to dst. 2345 func repairWalFile(src, dst string) error { 2346 in, err := os.Open(src) 2347 if err != nil { 2348 return err 2349 } 2350 defer in.Close() 2351 2352 out, err := os.Create(dst) 2353 if err != nil { 2354 return err 2355 } 2356 defer out.Close() 2357 2358 var ( 2359 dec = NewWALDecoder(in) 2360 enc = NewWALEncoder(out) 2361 ) 2362 2363 // best-case repair (until first error is encountered) 2364 for { 2365 msg, err := dec.Decode() 2366 if err != nil { 2367 break 2368 } 2369 2370 err = enc.Encode(msg) 2371 if err != nil { 2372 return fmt.Errorf("failed to encode msg: %w", err) 2373 } 2374 } 2375 2376 return nil 2377 }