github.com/noirx94/tendermintmp@v0.0.1/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "runtime/debug" 10 "time" 11 12 "github.com/gogo/protobuf/proto" 13 14 cfg "github.com/tendermint/tendermint/config" 15 cstypes "github.com/tendermint/tendermint/consensus/types" 16 "github.com/tendermint/tendermint/crypto" 17 tmevents "github.com/tendermint/tendermint/libs/events" 18 "github.com/tendermint/tendermint/libs/fail" 19 tmjson "github.com/tendermint/tendermint/libs/json" 20 "github.com/tendermint/tendermint/libs/log" 21 tmmath "github.com/tendermint/tendermint/libs/math" 22 tmos "github.com/tendermint/tendermint/libs/os" 23 "github.com/tendermint/tendermint/libs/service" 24 tmsync "github.com/tendermint/tendermint/libs/sync" 25 "github.com/tendermint/tendermint/p2p" 26 tmproto "github.com/tendermint/tendermint/proto/tendermint/types" 27 sm "github.com/tendermint/tendermint/state" 28 "github.com/tendermint/tendermint/types" 29 tmtime "github.com/tendermint/tendermint/types/time" 30 ) 31 32 // Consensus sentinel errors 33 var ( 34 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 35 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 36 ErrAddingVote = errors.New("error adding vote") 37 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 38 39 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 40 ) 41 42 var msgQueueSize = 1000 43 44 // msgs from the reactor which may update the state 45 type msgInfo struct { 46 Msg Message `json:"msg"` 47 PeerID p2p.ID `json:"peer_key"` 48 } 49 50 // internally generated messages which may update the state 51 type timeoutInfo struct { 52 Duration time.Duration `json:"duration"` 53 Height int64 `json:"height"` 54 Round int32 `json:"round"` 55 Step cstypes.RoundStepType `json:"step"` 56 } 57 58 func (ti *timeoutInfo) String() string { 59 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 60 } 61 62 // interface to the mempool 63 type txNotifier interface { 64 TxsAvailable() <-chan struct{} 65 } 66 67 // interface to the evidence pool 68 type evidencePool interface { 69 // reports conflicting votes to the evidence pool to be processed into evidence 70 ReportConflictingVotes(voteA, voteB *types.Vote) 71 } 72 73 // State handles execution of the consensus algorithm. 74 // It processes votes and proposals, and upon reaching agreement, 75 // commits blocks to the chain and executes them against the application. 76 // The internal state machine receives input from peers, the internal validator, and from a timer. 77 type State struct { 78 service.BaseService 79 80 // config details 81 config *cfg.ConsensusConfig 82 privValidator types.PrivValidator // for signing votes 83 84 // store blocks and commits 85 blockStore sm.BlockStore 86 87 // create and execute blocks 88 blockExec *sm.BlockExecutor 89 90 // notify us if txs are available 91 txNotifier txNotifier 92 93 // add evidence to the pool 94 // when it's detected 95 evpool evidencePool 96 97 // internal state 98 mtx tmsync.RWMutex 99 cstypes.RoundState 100 state sm.State // State until height-1. 101 // privValidator pubkey, memoized for the duration of one block 102 // to avoid extra requests to HSM 103 privValidatorPubKey crypto.PubKey 104 105 // state changes may be triggered by: msgs from peers, 106 // msgs from ourself, or by timeouts 107 peerMsgQueue chan msgInfo 108 internalMsgQueue chan msgInfo 109 timeoutTicker TimeoutTicker 110 111 // information about about added votes and block parts are written on this channel 112 // so statistics can be computed by reactor 113 statsMsgQueue chan msgInfo 114 115 // we use eventBus to trigger msg broadcasts in the reactor, 116 // and to notify external subscribers, eg. through a websocket 117 eventBus *types.EventBus 118 119 // a Write-Ahead Log ensures we can recover from any kind of crash 120 // and helps us avoid signing conflicting votes 121 wal WAL 122 replayMode bool // so we don't log signing errors during replay 123 doWALCatchup bool // determines if we even try to do the catchup 124 125 // for tests where we want to limit the number of transitions the state makes 126 nSteps int 127 128 // some functions can be overwritten for testing 129 decideProposal func(height int64, round int32) 130 doPrevote func(height int64, round int32) 131 setProposal func(proposal *types.Proposal) error 132 133 // closed when we finish shutting down 134 done chan struct{} 135 136 // synchronous pubsub between consensus state and reactor. 137 // state only emits EventNewRoundStep and EventVote 138 evsw tmevents.EventSwitch 139 140 // for reporting metrics 141 metrics *Metrics 142 } 143 144 // StateOption sets an optional parameter on the State. 145 type StateOption func(*State) 146 147 // NewState returns a new State. 148 func NewState( 149 config *cfg.ConsensusConfig, 150 state sm.State, 151 blockExec *sm.BlockExecutor, 152 blockStore sm.BlockStore, 153 txNotifier txNotifier, 154 evpool evidencePool, 155 options ...StateOption, 156 ) *State { 157 cs := &State{ 158 config: config, 159 blockExec: blockExec, 160 blockStore: blockStore, 161 txNotifier: txNotifier, 162 peerMsgQueue: make(chan msgInfo, msgQueueSize), 163 internalMsgQueue: make(chan msgInfo, msgQueueSize), 164 timeoutTicker: NewTimeoutTicker(), 165 statsMsgQueue: make(chan msgInfo, msgQueueSize), 166 done: make(chan struct{}), 167 doWALCatchup: true, 168 wal: nilWAL{}, 169 evpool: evpool, 170 evsw: tmevents.NewEventSwitch(), 171 metrics: NopMetrics(), 172 } 173 174 // set function defaults (may be overwritten before calling Start) 175 cs.decideProposal = cs.defaultDecideProposal 176 cs.doPrevote = cs.defaultDoPrevote 177 cs.setProposal = cs.defaultSetProposal 178 179 // We have no votes, so reconstruct LastCommit from SeenCommit. 180 if state.LastBlockHeight > 0 { 181 cs.reconstructLastCommit(state) 182 } 183 184 cs.updateToState(state) 185 186 // NOTE: we do not call scheduleRound0 yet, we do that upon Start() 187 188 cs.BaseService = *service.NewBaseService(nil, "State", cs) 189 for _, option := range options { 190 option(cs) 191 } 192 193 return cs 194 } 195 196 // SetLogger implements Service. 197 func (cs *State) SetLogger(l log.Logger) { 198 cs.BaseService.Logger = l 199 cs.timeoutTicker.SetLogger(l) 200 } 201 202 // SetEventBus sets event bus. 203 func (cs *State) SetEventBus(b *types.EventBus) { 204 cs.eventBus = b 205 cs.blockExec.SetEventBus(b) 206 } 207 208 // StateMetrics sets the metrics. 209 func StateMetrics(metrics *Metrics) StateOption { 210 return func(cs *State) { cs.metrics = metrics } 211 } 212 213 // String returns a string. 214 func (cs *State) String() string { 215 // better not to access shared variables 216 return "ConsensusState" 217 } 218 219 // GetState returns a copy of the chain state. 220 func (cs *State) GetState() sm.State { 221 cs.mtx.RLock() 222 defer cs.mtx.RUnlock() 223 return cs.state.Copy() 224 } 225 226 // GetLastHeight returns the last height committed. 227 // If there were no blocks, returns 0. 228 func (cs *State) GetLastHeight() int64 { 229 cs.mtx.RLock() 230 defer cs.mtx.RUnlock() 231 return cs.RoundState.Height - 1 232 } 233 234 // GetRoundState returns a shallow copy of the internal consensus state. 235 func (cs *State) GetRoundState() *cstypes.RoundState { 236 cs.mtx.RLock() 237 rs := cs.RoundState // copy 238 cs.mtx.RUnlock() 239 return &rs 240 } 241 242 // GetRoundStateJSON returns a json of RoundState. 243 func (cs *State) GetRoundStateJSON() ([]byte, error) { 244 cs.mtx.RLock() 245 defer cs.mtx.RUnlock() 246 return tmjson.Marshal(cs.RoundState) 247 } 248 249 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 250 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 251 cs.mtx.RLock() 252 defer cs.mtx.RUnlock() 253 return tmjson.Marshal(cs.RoundState.RoundStateSimple()) 254 } 255 256 // GetValidators returns a copy of the current validators. 257 func (cs *State) GetValidators() (int64, []*types.Validator) { 258 cs.mtx.RLock() 259 defer cs.mtx.RUnlock() 260 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 261 } 262 263 // SetPrivValidator sets the private validator account for signing votes. It 264 // immediately requests pubkey and caches it. 265 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 266 cs.mtx.Lock() 267 defer cs.mtx.Unlock() 268 269 cs.privValidator = priv 270 271 if err := cs.updatePrivValidatorPubKey(); err != nil { 272 cs.Logger.Error("failed to get private validator pubkey", "err", err) 273 } 274 } 275 276 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for 277 // testing. 278 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 279 cs.mtx.Lock() 280 cs.timeoutTicker = timeoutTicker 281 cs.mtx.Unlock() 282 } 283 284 // LoadCommit loads the commit for a given height. 285 func (cs *State) LoadCommit(height int64) *types.Commit { 286 cs.mtx.RLock() 287 defer cs.mtx.RUnlock() 288 289 if height == cs.blockStore.Height() { 290 return cs.blockStore.LoadSeenCommit(height) 291 } 292 293 return cs.blockStore.LoadBlockCommit(height) 294 } 295 296 // OnStart loads the latest state via the WAL, and starts the timeout and 297 // receive routines. 298 func (cs *State) OnStart() error { 299 // We may set the WAL in testing before calling Start, so only OpenWAL if its 300 // still the nilWAL. 301 if _, ok := cs.wal.(nilWAL); ok { 302 if err := cs.loadWalFile(); err != nil { 303 return err 304 } 305 } 306 307 // We may have lost some votes if the process crashed reload from consensus 308 // log to catchup. 309 if cs.doWALCatchup { 310 repairAttempted := false 311 312 LOOP: 313 for { 314 err := cs.catchupReplay(cs.Height) 315 switch { 316 case err == nil: 317 break LOOP 318 319 case !IsDataCorruptionError(err): 320 cs.Logger.Error("error on catchup replay; proceeding to start state anyway", "err", err) 321 break LOOP 322 323 case repairAttempted: 324 return err 325 } 326 327 cs.Logger.Error("the WAL file is corrupted; attempting repair", "err", err) 328 329 // 1) prep work 330 if err := cs.wal.Stop(); err != nil { 331 return err 332 } 333 334 repairAttempted = true 335 336 // 2) backup original WAL file 337 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 338 if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 339 return err 340 } 341 342 cs.Logger.Debug("backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 343 344 // 3) try to repair (WAL file will be overwritten!) 345 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 346 cs.Logger.Error("the WAL repair failed", "err", err) 347 return err 348 } 349 350 cs.Logger.Info("successful WAL repair") 351 352 // reload WAL file 353 if err := cs.loadWalFile(); err != nil { 354 return err 355 } 356 } 357 } 358 359 if err := cs.evsw.Start(); err != nil { 360 return err 361 } 362 363 // we need the timeoutRoutine for replay so 364 // we don't block on the tick chan. 365 // NOTE: we will get a build up of garbage go routines 366 // firing on the tockChan until the receiveRoutine is started 367 // to deal with them (by that point, at most one will be valid) 368 if err := cs.timeoutTicker.Start(); err != nil { 369 return err 370 } 371 372 // Double Signing Risk Reduction 373 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 374 return err 375 } 376 377 // now start the receiveRoutine 378 go cs.receiveRoutine(0) 379 380 // schedule the first round! 381 // use GetRoundState so we don't race the receiveRoutine for access 382 cs.scheduleRound0(cs.GetRoundState()) 383 384 return nil 385 } 386 387 // timeoutRoutine: receive requests for timeouts on tickChan and fire timeouts on tockChan 388 // receiveRoutine: serializes processing of proposoals, block parts, votes; coordinates state transitions 389 func (cs *State) startRoutines(maxSteps int) { 390 err := cs.timeoutTicker.Start() 391 if err != nil { 392 cs.Logger.Error("failed to start timeout ticker", "err", err) 393 return 394 } 395 396 go cs.receiveRoutine(maxSteps) 397 } 398 399 // loadWalFile loads WAL data from file. It overwrites cs.wal. 400 func (cs *State) loadWalFile() error { 401 wal, err := cs.OpenWAL(cs.config.WalFile()) 402 if err != nil { 403 cs.Logger.Error("failed to load state WAL", "err", err) 404 return err 405 } 406 407 cs.wal = wal 408 return nil 409 } 410 411 // OnStop implements service.Service. 412 func (cs *State) OnStop() { 413 if err := cs.evsw.Stop(); err != nil { 414 cs.Logger.Error("failed trying to stop eventSwitch", "error", err) 415 } 416 417 if err := cs.timeoutTicker.Stop(); err != nil { 418 cs.Logger.Error("failed trying to stop timeoutTicket", "error", err) 419 } 420 // WAL is stopped in receiveRoutine. 421 } 422 423 // Wait waits for the the main routine to return. 424 // NOTE: be sure to Stop() the event switch and drain 425 // any event channels or this may deadlock 426 func (cs *State) Wait() { 427 <-cs.done 428 } 429 430 // OpenWAL opens a file to log all consensus messages and timeouts for 431 // deterministic accountability. 432 func (cs *State) OpenWAL(walFile string) (WAL, error) { 433 wal, err := NewWAL(walFile) 434 if err != nil { 435 cs.Logger.Error("failed to open WAL", "file", walFile, "err", err) 436 return nil, err 437 } 438 439 wal.SetLogger(cs.Logger.With("wal", walFile)) 440 441 if err := wal.Start(); err != nil { 442 cs.Logger.Error("failed to start WAL", "err", err) 443 return nil, err 444 } 445 446 return wal, nil 447 } 448 449 //------------------------------------------------------------ 450 // Public interface for passing messages into the consensus state, possibly causing a state transition. 451 // If peerID == "", the msg is considered internal. 452 // Messages are added to the appropriate queue (peer or internal). 453 // If the queue is full, the function may block. 454 // TODO: should these return anything or let callers just use events? 455 456 // AddVote inputs a vote. 457 func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 458 if peerID == "" { 459 cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""} 460 } else { 461 cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID} 462 } 463 464 // TODO: wait for event?! 465 return false, nil 466 } 467 468 // SetProposal inputs a proposal. 469 func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { 470 471 if peerID == "" { 472 cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""} 473 } else { 474 cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID} 475 } 476 477 // TODO: wait for event?! 478 return nil 479 } 480 481 // AddProposalBlockPart inputs a part of the proposal block. 482 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { 483 484 if peerID == "" { 485 cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""} 486 } else { 487 cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID} 488 } 489 490 // TODO: wait for event?! 491 return nil 492 } 493 494 // SetProposalAndBlock inputs the proposal and all block parts. 495 func (cs *State) SetProposalAndBlock( 496 proposal *types.Proposal, 497 block *types.Block, 498 parts *types.PartSet, 499 peerID p2p.ID, 500 ) error { 501 502 if err := cs.SetProposal(proposal, peerID); err != nil { 503 return err 504 } 505 506 for i := 0; i < int(parts.Total()); i++ { 507 part := parts.GetPart(i) 508 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 509 return err 510 } 511 } 512 513 return nil 514 } 515 516 //------------------------------------------------------------ 517 // internal functions for managing the state 518 519 func (cs *State) updateHeight(height int64) { 520 cs.metrics.Height.Set(float64(height)) 521 cs.Height = height 522 } 523 524 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 525 cs.Round = round 526 cs.Step = step 527 } 528 529 // enterNewRound(height, 0) at cs.StartTime. 530 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 531 // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) 532 sleepDuration := rs.StartTime.Sub(tmtime.Now()) 533 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 534 } 535 536 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 537 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 538 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 539 } 540 541 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 542 func (cs *State) sendInternalMessage(mi msgInfo) { 543 select { 544 case cs.internalMsgQueue <- mi: 545 default: 546 // NOTE: using the go-routine means our votes can 547 // be processed out of order. 548 // TODO: use CList here for strict determinism and 549 // attempt push to internalMsgQueue in receiveRoutine 550 cs.Logger.Debug("internal msg queue is full; using a go-routine") 551 go func() { cs.internalMsgQueue <- mi }() 552 } 553 } 554 555 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 556 // (which happens even before saving the state) 557 func (cs *State) reconstructLastCommit(state sm.State) { 558 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 559 if seenCommit == nil { 560 panic(fmt.Sprintf( 561 "failed to reconstruct last commit; seen commit for height %v not found", 562 state.LastBlockHeight, 563 )) 564 } 565 566 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 567 if !lastPrecommits.HasTwoThirdsMajority() { 568 panic("failed to reconstruct last commit; does not have +2/3 maj") 569 } 570 571 cs.LastCommit = lastPrecommits 572 } 573 574 // Updates State and increments height to match that of state. 575 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 576 func (cs *State) updateToState(state sm.State) { 577 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 578 panic(fmt.Sprintf( 579 "updateToState() expected state height of %v but found %v", 580 cs.Height, state.LastBlockHeight, 581 )) 582 } 583 584 if !cs.state.IsEmpty() { 585 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 586 // This might happen when someone else is mutating cs.state. 587 // Someone forgot to pass in state.Copy() somewhere?! 588 panic(fmt.Sprintf( 589 "inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 590 cs.state.LastBlockHeight+1, cs.Height, 591 )) 592 } 593 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 594 panic(fmt.Sprintf( 595 "inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 596 cs.state.LastBlockHeight, cs.state.InitialHeight, 597 )) 598 } 599 600 // If state isn't further out than cs.state, just ignore. 601 // This happens when SwitchToConsensus() is called in the reactor. 602 // We don't want to reset e.g. the Votes, but we still want to 603 // signal the new round step, because other services (eg. txNotifier) 604 // depend on having an up-to-date peer state! 605 if state.LastBlockHeight <= cs.state.LastBlockHeight { 606 cs.Logger.Debug( 607 "ignoring updateToState()", 608 "new_height", state.LastBlockHeight+1, 609 "old_height", cs.state.LastBlockHeight+1, 610 ) 611 cs.newStep() 612 return 613 } 614 } 615 616 // Reset fields based on state. 617 validators := state.Validators 618 619 switch { 620 case state.LastBlockHeight == 0: // Very first commit should be empty. 621 cs.LastCommit = (*types.VoteSet)(nil) 622 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 623 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 624 panic(fmt.Sprintf( 625 "wanted to form a commit, but precommits (H/R: %d/%d) didn't have 2/3+: %v", 626 state.LastBlockHeight, cs.CommitRound, cs.Votes.Precommits(cs.CommitRound), 627 )) 628 } 629 630 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 631 632 case cs.LastCommit == nil: 633 // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit 634 // must be called to reconstruct LastCommit from SeenCommit. 635 panic(fmt.Sprintf( 636 "last commit cannot be empty after initial block (H:%d)", 637 state.LastBlockHeight+1, 638 )) 639 } 640 641 // Next desired block height 642 height := state.LastBlockHeight + 1 643 if height == 1 { 644 height = state.InitialHeight 645 } 646 647 // RoundState fields 648 cs.updateHeight(height) 649 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 650 651 if cs.CommitTime.IsZero() { 652 // "Now" makes it easier to sync up dev nodes. 653 // We add timeoutCommit to allow transactions 654 // to be gathered for the first block. 655 // And alternative solution that relies on clocks: 656 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 657 cs.StartTime = cs.config.Commit(tmtime.Now()) 658 } else { 659 cs.StartTime = cs.config.Commit(cs.CommitTime) 660 } 661 662 cs.Validators = validators 663 cs.Proposal = nil 664 cs.ProposalBlock = nil 665 cs.ProposalBlockParts = nil 666 cs.LockedRound = -1 667 cs.LockedBlock = nil 668 cs.LockedBlockParts = nil 669 cs.ValidRound = -1 670 cs.ValidBlock = nil 671 cs.ValidBlockParts = nil 672 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 673 cs.CommitRound = -1 674 cs.LastValidators = state.LastValidators 675 cs.TriggeredTimeoutPrecommit = false 676 677 cs.state = state 678 679 // Finally, broadcast RoundState 680 cs.newStep() 681 } 682 683 func (cs *State) newStep() { 684 rs := cs.RoundStateEvent() 685 if err := cs.wal.Write(rs); err != nil { 686 cs.Logger.Error("failed writing to WAL", "err", err) 687 } 688 689 cs.nSteps++ 690 691 // newStep is called by updateToState in NewState before the eventBus is set! 692 if cs.eventBus != nil { 693 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 694 cs.Logger.Error("failed publishing new round step", "err", err) 695 } 696 697 cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) 698 } 699 } 700 701 //----------------------------------------- 702 // the main go routines 703 704 // receiveRoutine handles messages which may cause state transitions. 705 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 706 // It keeps the RoundState and is the only thing that updates it. 707 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 708 // State must be locked before any internal state is updated. 709 func (cs *State) receiveRoutine(maxSteps int) { 710 onExit := func(cs *State) { 711 // NOTE: the internalMsgQueue may have signed messages from our 712 // priv_val that haven't hit the WAL, but its ok because 713 // priv_val tracks LastSig 714 715 // close wal now that we're done writing to it 716 if err := cs.wal.Stop(); err != nil { 717 cs.Logger.Error("failed trying to stop WAL", "error", err) 718 } 719 720 cs.wal.Wait() 721 close(cs.done) 722 } 723 724 defer func() { 725 if r := recover(); r != nil { 726 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 727 // stop gracefully 728 // 729 // NOTE: We most probably shouldn't be running any further when there is 730 // some unexpected panic. Some unknown error happened, and so we don't 731 // know if that will result in the validator signing an invalid thing. It 732 // might be worthwhile to explore a mechanism for manual resuming via 733 // some console or secure RPC system, but for now, halting the chain upon 734 // unexpected consensus bugs sounds like the better option. 735 onExit(cs) 736 } 737 }() 738 739 for { 740 if maxSteps > 0 { 741 if cs.nSteps >= maxSteps { 742 cs.Logger.Debug("reached max steps; exiting receive routine") 743 cs.nSteps = 0 744 return 745 } 746 } 747 748 rs := cs.RoundState 749 var mi msgInfo 750 751 select { 752 case <-cs.txNotifier.TxsAvailable(): 753 cs.handleTxsAvailable() 754 755 case mi = <-cs.peerMsgQueue: 756 if err := cs.wal.Write(mi); err != nil { 757 cs.Logger.Error("failed writing to WAL", "err", err) 758 } 759 760 // handles proposals, block parts, votes 761 // may generate internal events (votes, complete proposals, 2/3 majorities) 762 cs.handleMsg(mi) 763 764 case mi = <-cs.internalMsgQueue: 765 err := cs.wal.WriteSync(mi) // NOTE: fsync 766 if err != nil { 767 panic(fmt.Sprintf( 768 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 769 mi, err, 770 )) 771 } 772 773 if _, ok := mi.Msg.(*VoteMessage); ok { 774 // we actually want to simulate failing during 775 // the previous WriteSync, but this isn't easy to do. 776 // Equivalent would be to fail here and manually remove 777 // some bytes from the end of the wal. 778 fail.Fail() // XXX 779 } 780 781 // handles proposals, block parts, votes 782 cs.handleMsg(mi) 783 784 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 785 if err := cs.wal.Write(ti); err != nil { 786 cs.Logger.Error("failed writing to WAL", "err", err) 787 } 788 789 // if the timeout is relevant to the rs 790 // go to the next step 791 cs.handleTimeout(ti, rs) 792 793 case <-cs.Quit(): 794 onExit(cs) 795 return 796 } 797 } 798 } 799 800 // state transitions on complete-proposal, 2/3-any, 2/3-one 801 func (cs *State) handleMsg(mi msgInfo) { 802 cs.mtx.Lock() 803 defer cs.mtx.Unlock() 804 805 var ( 806 added bool 807 err error 808 ) 809 810 msg, peerID := mi.Msg, mi.PeerID 811 812 switch msg := msg.(type) { 813 case *ProposalMessage: 814 // will not cause transition. 815 // once proposal is set, we can receive block parts 816 err = cs.setProposal(msg.Proposal) 817 818 case *BlockPartMessage: 819 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 820 added, err = cs.addProposalBlockPart(msg, peerID) 821 if added { 822 cs.statsMsgQueue <- mi 823 } 824 825 if err != nil && msg.Round != cs.Round { 826 cs.Logger.Debug( 827 "received block part from wrong round", 828 "height", cs.Height, 829 "cs_round", cs.Round, 830 "block_round", msg.Round, 831 ) 832 err = nil 833 } 834 835 case *VoteMessage: 836 // attempt to add the vote and dupeout the validator if its a duplicate signature 837 // if the vote gives us a 2/3-any or 2/3-one, we transition 838 added, err = cs.tryAddVote(msg.Vote, peerID) 839 if added { 840 cs.statsMsgQueue <- mi 841 } 842 843 // if err == ErrAddingVote { 844 // TODO: punish peer 845 // We probably don't want to stop the peer here. The vote does not 846 // necessarily comes from a malicious peer but can be just broadcasted by 847 // a typical peer. 848 // https://github.com/tendermint/tendermint/issues/1281 849 // } 850 851 // NOTE: the vote is broadcast to peers by the reactor listening 852 // for vote events 853 854 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 855 // the peer is sending us CatchupCommit precommits. 856 // We could make note of this and help filter in broadcastHasVoteMessage(). 857 858 default: 859 cs.Logger.Error("unknown msg type", "type", fmt.Sprintf("%T", msg)) 860 return 861 } 862 863 if err != nil { 864 cs.Logger.Error( 865 "failed to process message", 866 "height", cs.Height, 867 "round", cs.Round, 868 "peer", peerID, 869 "msg_type", fmt.Sprintf("%T", msg), 870 "err", err, 871 ) 872 } 873 } 874 875 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 876 cs.Logger.Debug("received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 877 878 // timeouts must be for current height, round, step 879 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 880 cs.Logger.Debug("ignoring tock because we are ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 881 return 882 } 883 884 // the timeout will now cause a state transition 885 cs.mtx.Lock() 886 defer cs.mtx.Unlock() 887 888 switch ti.Step { 889 case cstypes.RoundStepNewHeight: 890 // NewRound event fired from enterNewRound. 891 // XXX: should we fire timeout here (for timeout commit)? 892 cs.enterNewRound(ti.Height, 0) 893 894 case cstypes.RoundStepNewRound: 895 cs.enterPropose(ti.Height, 0) 896 897 case cstypes.RoundStepPropose: 898 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 899 cs.Logger.Error("failed publishing timeout propose", "err", err) 900 } 901 902 cs.enterPrevote(ti.Height, ti.Round) 903 904 case cstypes.RoundStepPrevoteWait: 905 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 906 cs.Logger.Error("failed publishing timeout wait", "err", err) 907 } 908 909 cs.enterPrecommit(ti.Height, ti.Round) 910 911 case cstypes.RoundStepPrecommitWait: 912 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 913 cs.Logger.Error("failed publishing timeout wait", "err", err) 914 } 915 916 cs.enterPrecommit(ti.Height, ti.Round) 917 cs.enterNewRound(ti.Height, ti.Round+1) 918 919 default: 920 panic(fmt.Sprintf("invalid timeout step: %v", ti.Step)) 921 } 922 923 } 924 925 func (cs *State) handleTxsAvailable() { 926 cs.mtx.Lock() 927 defer cs.mtx.Unlock() 928 929 // We only need to do this for round 0. 930 if cs.Round != 0 { 931 return 932 } 933 934 switch cs.Step { 935 case cstypes.RoundStepNewHeight: // timeoutCommit phase 936 if cs.needProofBlock(cs.Height) { 937 // enterPropose will be called by enterNewRound 938 return 939 } 940 941 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 942 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 943 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 944 945 case cstypes.RoundStepNewRound: // after timeoutCommit 946 cs.enterPropose(cs.Height, 0) 947 } 948 } 949 950 //----------------------------------------------------------------------------- 951 // State functions 952 // Used internally by handleTimeout and handleMsg to make state transitions 953 954 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 955 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 956 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 957 // Enter: +2/3 precommits for nil at (height,round-1) 958 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 959 // NOTE: cs.StartTime was already set for height. 960 func (cs *State) enterNewRound(height int64, round int32) { 961 logger := cs.Logger.With("height", height, "round", round) 962 963 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 964 logger.Debug( 965 "entering new round with invalid args", 966 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 967 ) 968 return 969 } 970 971 if now := tmtime.Now(); cs.StartTime.After(now) { 972 logger.Debug("need to set a buffer and log message here for sanity", "start_time", cs.StartTime, "now", now) 973 } 974 975 logger.Debug("entering new round", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 976 977 // increment validators if necessary 978 validators := cs.Validators 979 if cs.Round < round { 980 validators = validators.Copy() 981 validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) 982 } 983 984 // Setup new round 985 // we don't fire newStep for this step, 986 // but we fire an event, so update the round step first 987 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 988 cs.Validators = validators 989 if round == 0 { 990 // We've already reset these upon new height, 991 // and meanwhile we might have received a proposal 992 // for round 0. 993 } else { 994 logger.Debug("resetting proposal info") 995 cs.Proposal = nil 996 cs.ProposalBlock = nil 997 cs.ProposalBlockParts = nil 998 } 999 1000 cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1001 cs.TriggeredTimeoutPrecommit = false 1002 1003 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1004 cs.Logger.Error("failed publishing new round", "err", err) 1005 } 1006 1007 cs.metrics.Rounds.Set(float64(round)) 1008 1009 // Wait for txs to be available in the mempool 1010 // before we enterPropose in round 0. If the last block changed the app hash, 1011 // we may need an empty "proof" block, and enterPropose immediately. 1012 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1013 if waitForTxs { 1014 if cs.config.CreateEmptyBlocksInterval > 0 { 1015 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1016 cstypes.RoundStepNewRound) 1017 } 1018 } else { 1019 cs.enterPropose(height, round) 1020 } 1021 } 1022 1023 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1024 // and where the last block (height-1) caused the app hash to change 1025 func (cs *State) needProofBlock(height int64) bool { 1026 if height == cs.state.InitialHeight { 1027 return true 1028 } 1029 1030 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1031 if lastBlockMeta == nil { 1032 panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) 1033 } 1034 1035 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1036 } 1037 1038 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 1039 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 1040 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 1041 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 1042 func (cs *State) enterPropose(height int64, round int32) { 1043 logger := cs.Logger.With("height", height, "round", round) 1044 1045 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 1046 logger.Debug( 1047 "entering propose step with invalid args", 1048 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1049 ) 1050 return 1051 } 1052 1053 logger.Debug("entering propose step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1054 1055 defer func() { 1056 // Done enterPropose: 1057 cs.updateRoundStep(round, cstypes.RoundStepPropose) 1058 cs.newStep() 1059 1060 // If we have the whole proposal + POL, then goto Prevote now. 1061 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 1062 // or else after timeoutPropose 1063 if cs.isProposalComplete() { 1064 cs.enterPrevote(height, cs.Round) 1065 } 1066 }() 1067 1068 // If we don't get the proposal and all block parts quick enough, enterPrevote 1069 cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) 1070 1071 // Nothing more to do if we're not a validator 1072 if cs.privValidator == nil { 1073 logger.Debug("node is not a validator") 1074 return 1075 } 1076 1077 logger.Debug("node is a validator") 1078 1079 if cs.privValidatorPubKey == nil { 1080 // If this node is a validator & proposer in the current round, it will 1081 // miss the opportunity to create a block. 1082 logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1083 return 1084 } 1085 1086 address := cs.privValidatorPubKey.Address() 1087 1088 // if not a validator, we're done 1089 if !cs.Validators.HasAddress(address) { 1090 logger.Debug("node is not a validator", "addr", address, "vals", cs.Validators) 1091 return 1092 } 1093 1094 if cs.isProposer(address) { 1095 logger.Debug("propose step; our turn to propose", "proposer", address) 1096 cs.decideProposal(height, round) 1097 } else { 1098 logger.Debug("propose step; not our turn to propose", "proposer", cs.Validators.GetProposer().Address) 1099 } 1100 } 1101 1102 func (cs *State) isProposer(address []byte) bool { 1103 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1104 } 1105 1106 func (cs *State) defaultDecideProposal(height int64, round int32) { 1107 var block *types.Block 1108 var blockParts *types.PartSet 1109 1110 // Decide on block 1111 if cs.ValidBlock != nil { 1112 // If there is valid block, choose that. 1113 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1114 } else { 1115 // Create a new proposal block from state/txs from the mempool. 1116 block, blockParts = cs.createProposalBlock() 1117 if block == nil { 1118 return 1119 } 1120 } 1121 1122 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1123 // and the privValidator will refuse to sign anything. 1124 if err := cs.wal.FlushAndSync(); err != nil { 1125 cs.Logger.Error("failed flushing WAL to disk") 1126 } 1127 1128 // Make proposal 1129 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1130 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1131 p := proposal.ToProto() 1132 if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { 1133 proposal.Signature = p.Signature 1134 1135 // send proposal and block parts on internal msg queue 1136 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 1137 1138 for i := 0; i < int(blockParts.Total()); i++ { 1139 part := blockParts.GetPart(i) 1140 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 1141 } 1142 1143 cs.Logger.Debug("signed proposal", "height", height, "round", round, "proposal", proposal) 1144 } else if !cs.replayMode { 1145 cs.Logger.Error("propose step; failed signing proposal", "height", height, "round", round, "err", err) 1146 } 1147 } 1148 1149 // Returns true if the proposal block is complete && 1150 // (if POLRound was proposed, we have +2/3 prevotes from there). 1151 func (cs *State) isProposalComplete() bool { 1152 if cs.Proposal == nil || cs.ProposalBlock == nil { 1153 return false 1154 } 1155 // we have the proposal. if there's a POLRound, 1156 // make sure we have the prevotes from it too 1157 if cs.Proposal.POLRound < 0 { 1158 return true 1159 } 1160 // if this is false the proposer is lying or we haven't received the POL yet 1161 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1162 1163 } 1164 1165 // Create the next block to propose and return it. Returns nil block upon error. 1166 // 1167 // We really only need to return the parts, but the block is returned for 1168 // convenience so we can log the proposal block. 1169 // 1170 // NOTE: keep it side-effect free for clarity. 1171 // CONTRACT: cs.privValidator is not nil. 1172 func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { 1173 if cs.privValidator == nil { 1174 panic("entered createProposalBlock with privValidator being nil") 1175 } 1176 1177 var commit *types.Commit 1178 switch { 1179 case cs.Height == cs.state.InitialHeight: 1180 // We're creating a proposal for the first block. 1181 // The commit is empty, but not nil. 1182 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1183 1184 case cs.LastCommit.HasTwoThirdsMajority(): 1185 // Make the commit from LastCommit 1186 commit = cs.LastCommit.MakeCommit() 1187 1188 default: // This shouldn't happen. 1189 cs.Logger.Error("propose step; cannot propose anything without commit for the previous block") 1190 return 1191 } 1192 1193 if cs.privValidatorPubKey == nil { 1194 // If this node is a validator & proposer in the current round, it will 1195 // miss the opportunity to create a block. 1196 cs.Logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1197 return 1198 } 1199 1200 proposerAddr := cs.privValidatorPubKey.Address() 1201 1202 return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1203 } 1204 1205 // Enter: `timeoutPropose` after entering Propose. 1206 // Enter: proposal block and POL is ready. 1207 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 1208 // Otherwise vote nil. 1209 func (cs *State) enterPrevote(height int64, round int32) { 1210 logger := cs.Logger.With("height", height, "round", round) 1211 1212 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 1213 logger.Debug( 1214 "entering prevote step with invalid args", 1215 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1216 ) 1217 return 1218 } 1219 1220 defer func() { 1221 // Done enterPrevote: 1222 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 1223 cs.newStep() 1224 }() 1225 1226 logger.Debug("entering prevote step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1227 1228 // Sign and broadcast vote as necessary 1229 cs.doPrevote(height, round) 1230 1231 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 1232 // (so we have more time to try and collect +2/3 prevotes for a single block) 1233 } 1234 1235 func (cs *State) defaultDoPrevote(height int64, round int32) { 1236 logger := cs.Logger.With("height", height, "round", round) 1237 1238 // If a block is locked, prevote that. 1239 if cs.LockedBlock != nil { 1240 logger.Debug("prevote step; already locked on a block; prevoting locked block") 1241 cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) 1242 return 1243 } 1244 1245 // If ProposalBlock is nil, prevote nil. 1246 if cs.ProposalBlock == nil { 1247 logger.Debug("prevote step: ProposalBlock is nil") 1248 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1249 return 1250 } 1251 1252 // Validate proposal block 1253 err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) 1254 if err != nil { 1255 // ProposalBlock is invalid, prevote nil. 1256 logger.Error("prevote step: ProposalBlock is invalid", "err", err) 1257 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1258 return 1259 } 1260 1261 // Prevote cs.ProposalBlock 1262 // NOTE: the proposal signature is validated when it is received, 1263 // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) 1264 logger.Debug("prevote step: ProposalBlock is valid") 1265 cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) 1266 } 1267 1268 // Enter: any +2/3 prevotes at next round. 1269 func (cs *State) enterPrevoteWait(height int64, round int32) { 1270 logger := cs.Logger.With("height", height, "round", round) 1271 1272 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1273 logger.Debug( 1274 "entering prevote wait step with invalid args", 1275 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1276 ) 1277 return 1278 } 1279 1280 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1281 panic(fmt.Sprintf( 1282 "entering prevote wait step (%v/%v), but prevotes does not have any +2/3 votes", 1283 height, round, 1284 )) 1285 } 1286 1287 logger.Debug("entering prevote wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1288 1289 defer func() { 1290 // Done enterPrevoteWait: 1291 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1292 cs.newStep() 1293 }() 1294 1295 // Wait for some more prevotes; enterPrecommit 1296 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1297 } 1298 1299 // Enter: `timeoutPrevote` after any +2/3 prevotes. 1300 // Enter: `timeoutPrecommit` after any +2/3 precommits. 1301 // Enter: +2/3 precomits for block or nil. 1302 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 1303 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 1304 // else, precommit nil otherwise. 1305 func (cs *State) enterPrecommit(height int64, round int32) { 1306 logger := cs.Logger.With("height", height, "round", round) 1307 1308 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 1309 logger.Debug( 1310 "entering precommit step with invalid args", 1311 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1312 ) 1313 return 1314 } 1315 1316 logger.Debug("entering precommit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1317 1318 defer func() { 1319 // Done enterPrecommit: 1320 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 1321 cs.newStep() 1322 }() 1323 1324 // check for a polka 1325 blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() 1326 1327 // If we don't have a polka, we must precommit nil. 1328 if !ok { 1329 if cs.LockedBlock != nil { 1330 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit while we are locked; precommitting nil") 1331 } else { 1332 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit; precommitting nil") 1333 } 1334 1335 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1336 return 1337 } 1338 1339 // At this point +2/3 prevoted for a particular block or nil. 1340 if err := cs.eventBus.PublishEventPolka(cs.RoundStateEvent()); err != nil { 1341 logger.Error("failed publishing polka", "err", err) 1342 } 1343 1344 // the latest POLRound should be this round. 1345 polRound, _ := cs.Votes.POLInfo() 1346 if polRound < round { 1347 panic(fmt.Sprintf("this POLRound should be %v but got %v", round, polRound)) 1348 } 1349 1350 // +2/3 prevoted nil. Unlock and precommit nil. 1351 if len(blockID.Hash) == 0 { 1352 if cs.LockedBlock == nil { 1353 logger.Debug("precommit step; +2/3 prevoted for nil") 1354 } else { 1355 logger.Debug("precommit step; +2/3 prevoted for nil; unlocking") 1356 cs.LockedRound = -1 1357 cs.LockedBlock = nil 1358 cs.LockedBlockParts = nil 1359 1360 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1361 logger.Error("failed publishing event unlock", "err", err) 1362 } 1363 } 1364 1365 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1366 return 1367 } 1368 1369 // At this point, +2/3 prevoted for a particular block. 1370 1371 // If we're already locked on that block, precommit it, and update the LockedRound 1372 if cs.LockedBlock.HashesTo(blockID.Hash) { 1373 logger.Debug("precommit step; +2/3 prevoted locked block; relocking") 1374 cs.LockedRound = round 1375 1376 if err := cs.eventBus.PublishEventRelock(cs.RoundStateEvent()); err != nil { 1377 logger.Error("failed publishing event relock", "err", err) 1378 } 1379 1380 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1381 return 1382 } 1383 1384 // If +2/3 prevoted for proposal block, stage and precommit it 1385 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1386 logger.Debug("precommit step; +2/3 prevoted proposal block; locking", "hash", blockID.Hash) 1387 1388 // Validate the block. 1389 if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { 1390 panic(fmt.Sprintf("precommit step; +2/3 prevoted for an invalid block: %v", err)) 1391 } 1392 1393 cs.LockedRound = round 1394 cs.LockedBlock = cs.ProposalBlock 1395 cs.LockedBlockParts = cs.ProposalBlockParts 1396 1397 if err := cs.eventBus.PublishEventLock(cs.RoundStateEvent()); err != nil { 1398 logger.Error("failed publishing event lock", "err", err) 1399 } 1400 1401 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1402 return 1403 } 1404 1405 // There was a polka in this round for a block we don't have. 1406 // Fetch that block, unlock, and precommit nil. 1407 // The +2/3 prevotes for this round is the POL for our unlock. 1408 logger.Debug("precommit step; +2/3 prevotes for a block we do not have; voting nil", "block_id", blockID) 1409 1410 cs.LockedRound = -1 1411 cs.LockedBlock = nil 1412 cs.LockedBlockParts = nil 1413 1414 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1415 cs.ProposalBlock = nil 1416 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1417 } 1418 1419 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1420 logger.Error("failed publishing event unlock", "err", err) 1421 } 1422 1423 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1424 } 1425 1426 // Enter: any +2/3 precommits for next round. 1427 func (cs *State) enterPrecommitWait(height int64, round int32) { 1428 logger := cs.Logger.With("height", height, "round", round) 1429 1430 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1431 logger.Debug( 1432 "entering precommit wait step with invalid args", 1433 "triggered_timeout", cs.TriggeredTimeoutPrecommit, 1434 "current", fmt.Sprintf("%v/%v", cs.Height, cs.Round), 1435 ) 1436 return 1437 } 1438 1439 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1440 panic(fmt.Sprintf( 1441 "entering precommit wait step (%v/%v), but precommits does not have any +2/3 votes", 1442 height, round, 1443 )) 1444 } 1445 1446 logger.Debug("entering precommit wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1447 1448 defer func() { 1449 // Done enterPrecommitWait: 1450 cs.TriggeredTimeoutPrecommit = true 1451 cs.newStep() 1452 }() 1453 1454 // wait for some more precommits; enterNewRound 1455 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1456 } 1457 1458 // Enter: +2/3 precommits for block 1459 func (cs *State) enterCommit(height int64, commitRound int32) { 1460 logger := cs.Logger.With("height", height, "commit_round", commitRound) 1461 1462 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1463 logger.Debug( 1464 "entering commit step with invalid args", 1465 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1466 ) 1467 return 1468 } 1469 1470 logger.Debug("entering commit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1471 1472 defer func() { 1473 // Done enterCommit: 1474 // keep cs.Round the same, commitRound points to the right Precommits set. 1475 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1476 cs.CommitRound = commitRound 1477 cs.CommitTime = tmtime.Now() 1478 cs.newStep() 1479 1480 // Maybe finalize immediately. 1481 cs.tryFinalizeCommit(height) 1482 }() 1483 1484 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1485 if !ok { 1486 panic("RunActionCommit() expects +2/3 precommits") 1487 } 1488 1489 // The Locked* fields no longer matter. 1490 // Move them over to ProposalBlock if they match the commit hash, 1491 // otherwise they'll be cleared in updateToState. 1492 if cs.LockedBlock.HashesTo(blockID.Hash) { 1493 logger.Debug("commit is for a locked block; set ProposalBlock=LockedBlock", "block_hash", blockID.Hash) 1494 cs.ProposalBlock = cs.LockedBlock 1495 cs.ProposalBlockParts = cs.LockedBlockParts 1496 } 1497 1498 // If we don't have the block being committed, set up to get it. 1499 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1500 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1501 logger.Info( 1502 "commit is for a block we do not know about; set ProposalBlock=nil", 1503 "proposal", cs.ProposalBlock.Hash(), 1504 "commit", blockID.Hash, 1505 ) 1506 1507 // We're getting the wrong block. 1508 // Set up ProposalBlockParts and keep waiting. 1509 cs.ProposalBlock = nil 1510 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1511 1512 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1513 logger.Error("failed publishing valid block", "err", err) 1514 } 1515 1516 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 1517 } 1518 } 1519 } 1520 1521 // If we have the block AND +2/3 commits for it, finalize. 1522 func (cs *State) tryFinalizeCommit(height int64) { 1523 logger := cs.Logger.With("height", height) 1524 1525 if cs.Height != height { 1526 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1527 } 1528 1529 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1530 if !ok || len(blockID.Hash) == 0 { 1531 logger.Error("failed attempt to finalize commit; there was no +2/3 majority or +2/3 was for nil") 1532 return 1533 } 1534 1535 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1536 // TODO: this happens every time if we're not a validator (ugly logs) 1537 // TODO: ^^ wait, why does it matter that we're a validator? 1538 logger.Debug( 1539 "failed attempt to finalize commit; we do not have the commit block", 1540 "proposal_block", cs.ProposalBlock.Hash(), 1541 "commit_block", blockID.Hash, 1542 ) 1543 return 1544 } 1545 1546 cs.finalizeCommit(height) 1547 } 1548 1549 // Increment height and goto cstypes.RoundStepNewHeight 1550 func (cs *State) finalizeCommit(height int64) { 1551 logger := cs.Logger.With("height", height) 1552 1553 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1554 logger.Debug( 1555 "entering finalize commit step", 1556 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1557 ) 1558 return 1559 } 1560 1561 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1562 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1563 1564 if !ok { 1565 panic("cannot finalize commit; commit does not have 2/3 majority") 1566 } 1567 if !blockParts.HasHeader(blockID.PartSetHeader) { 1568 panic("expected ProposalBlockParts header to be commit header") 1569 } 1570 if !block.HashesTo(blockID.Hash) { 1571 panic("cannot finalize commit; proposal block does not hash to commit hash") 1572 } 1573 1574 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1575 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1576 } 1577 1578 logger.Info( 1579 "finalizing commit of block", 1580 "hash", block.Hash(), 1581 "root", block.AppHash, 1582 "num_txs", len(block.Txs), 1583 ) 1584 logger.Debug(fmt.Sprintf("%v", block)) 1585 1586 fail.Fail() // XXX 1587 1588 // Save to blockStore. 1589 if cs.blockStore.Height() < block.Height { 1590 // NOTE: the seenCommit is local justification to commit this block, 1591 // but may differ from the LastCommit included in the next block 1592 precommits := cs.Votes.Precommits(cs.CommitRound) 1593 seenCommit := precommits.MakeCommit() 1594 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1595 } else { 1596 // Happens during replay if we already saved the block but didn't commit 1597 logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1598 } 1599 1600 fail.Fail() // XXX 1601 1602 // Write EndHeightMessage{} for this height, implying that the blockstore 1603 // has saved the block. 1604 // 1605 // If we crash before writing this EndHeightMessage{}, we will recover by 1606 // running ApplyBlock during the ABCI handshake when we restart. If we 1607 // didn't save the block to the blockstore before writing 1608 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1609 // complains about replaying for heights where an #ENDHEIGHT entry already 1610 // exists. 1611 // 1612 // Either way, the State should not be resumed until we 1613 // successfully call ApplyBlock (ie. later here, or in Handshake after 1614 // restart). 1615 endMsg := EndHeightMessage{height} 1616 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1617 panic(fmt.Sprintf( 1618 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 1619 endMsg, err, 1620 )) 1621 } 1622 1623 fail.Fail() // XXX 1624 1625 // Create a copy of the state for staging and an event cache for txs. 1626 stateCopy := cs.state.Copy() 1627 1628 // Execute and commit the block, update and save the state, and update the mempool. 1629 // NOTE The block.AppHash wont reflect these txs until the next block. 1630 var ( 1631 err error 1632 retainHeight int64 1633 ) 1634 1635 stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( 1636 stateCopy, 1637 types.BlockID{ 1638 Hash: block.Hash(), 1639 PartSetHeader: blockParts.Header(), 1640 }, 1641 block, 1642 ) 1643 if err != nil { 1644 logger.Error("failed to apply block", "err", err) 1645 return 1646 } 1647 1648 fail.Fail() // XXX 1649 1650 // Prune old heights, if requested by ABCI app. 1651 if retainHeight > 0 { 1652 pruned, err := cs.pruneBlocks(retainHeight) 1653 if err != nil { 1654 logger.Error("failed to prune blocks", "retain_height", retainHeight, "err", err) 1655 } else { 1656 logger.Debug("pruned blocks", "pruned", pruned, "retain_height", retainHeight) 1657 } 1658 } 1659 1660 // must be called before we update state 1661 cs.recordMetrics(height, block) 1662 1663 // NewHeightStep! 1664 cs.updateToState(stateCopy) 1665 1666 fail.Fail() // XXX 1667 1668 // Private validator might have changed it's key pair => refetch pubkey. 1669 if err := cs.updatePrivValidatorPubKey(); err != nil { 1670 logger.Error("failed to get private validator pubkey", "err", err) 1671 } 1672 1673 // cs.StartTime is already set. 1674 // Schedule Round0 to start soon. 1675 cs.scheduleRound0(&cs.RoundState) 1676 1677 // By here, 1678 // * cs.Height has been increment to height+1 1679 // * cs.Step is now cstypes.RoundStepNewHeight 1680 // * cs.StartTime is set to when we will start round0. 1681 } 1682 1683 func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { 1684 base := cs.blockStore.Base() 1685 if retainHeight <= base { 1686 return 0, nil 1687 } 1688 pruned, err := cs.blockStore.PruneBlocks(retainHeight) 1689 if err != nil { 1690 return 0, fmt.Errorf("failed to prune block store: %w", err) 1691 } 1692 err = cs.blockExec.Store().PruneStates(base, retainHeight) 1693 if err != nil { 1694 return 0, fmt.Errorf("failed to prune state database: %w", err) 1695 } 1696 return pruned, nil 1697 } 1698 1699 func (cs *State) recordMetrics(height int64, block *types.Block) { 1700 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1701 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1702 1703 var ( 1704 missingValidators int 1705 missingValidatorsPower int64 1706 ) 1707 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1708 // Remember that the first LastCommit is intentionally empty, so it's not 1709 // fair to increment missing validators number. 1710 if height > cs.state.InitialHeight { 1711 // Sanity check that commit size matches validator set size - only applies 1712 // after first block. 1713 var ( 1714 commitSize = block.LastCommit.Size() 1715 valSetLen = len(cs.LastValidators.Validators) 1716 address types.Address 1717 ) 1718 if commitSize != valSetLen { 1719 panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1720 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1721 } 1722 1723 if cs.privValidator != nil { 1724 if cs.privValidatorPubKey == nil { 1725 // Metrics won't be updated, but it's not critical. 1726 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1727 } else { 1728 address = cs.privValidatorPubKey.Address() 1729 } 1730 } 1731 1732 for i, val := range cs.LastValidators.Validators { 1733 commitSig := block.LastCommit.Signatures[i] 1734 if commitSig.Absent() { 1735 missingValidators++ 1736 missingValidatorsPower += val.VotingPower 1737 } 1738 1739 if bytes.Equal(val.Address, address) { 1740 label := []string{ 1741 "validator_address", val.Address.String(), 1742 } 1743 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1744 if commitSig.ForBlock() { 1745 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1746 } else { 1747 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1748 } 1749 } 1750 1751 } 1752 } 1753 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1754 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1755 1756 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1757 var ( 1758 byzantineValidatorsPower = int64(0) 1759 byzantineValidatorsCount = int64(0) 1760 ) 1761 for _, ev := range block.Evidence.Evidence { 1762 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1763 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1764 byzantineValidatorsCount++ 1765 byzantineValidatorsPower += val.VotingPower 1766 } 1767 } 1768 } 1769 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1770 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1771 1772 if height > 1 { 1773 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1774 if lastBlockMeta != nil { 1775 cs.metrics.BlockIntervalSeconds.Observe( 1776 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1777 ) 1778 } 1779 } 1780 1781 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1782 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1783 cs.metrics.BlockSizeBytes.Set(float64(block.Size())) 1784 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1785 } 1786 1787 //----------------------------------------------------------------------------- 1788 1789 func (cs *State) defaultSetProposal(proposal *types.Proposal) error { 1790 // Already have one 1791 // TODO: possibly catch double proposals 1792 if cs.Proposal != nil { 1793 return nil 1794 } 1795 1796 // Does not apply 1797 if proposal.Height != cs.Height || proposal.Round != cs.Round { 1798 return nil 1799 } 1800 1801 // Verify POLRound, which must be -1 or in range [0, proposal.Round). 1802 if proposal.POLRound < -1 || 1803 (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { 1804 return ErrInvalidProposalPOLRound 1805 } 1806 1807 p := proposal.ToProto() 1808 // Verify signature 1809 if !cs.Validators.GetProposer().PubKey.VerifySignature( 1810 types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature, 1811 ) { 1812 return ErrInvalidProposalSignature 1813 } 1814 1815 proposal.Signature = p.Signature 1816 cs.Proposal = proposal 1817 // We don't update cs.ProposalBlockParts if it is already set. 1818 // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. 1819 // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! 1820 if cs.ProposalBlockParts == nil { 1821 cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) 1822 } 1823 1824 cs.Logger.Info("received proposal", "proposal", proposal) 1825 return nil 1826 } 1827 1828 // NOTE: block is not necessarily valid. 1829 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1830 // once we have the full block. 1831 func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (added bool, err error) { 1832 height, round, part := msg.Height, msg.Round, msg.Part 1833 1834 // Blocks might be reused, so round mismatch is OK 1835 if cs.Height != height { 1836 cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) 1837 return false, nil 1838 } 1839 1840 // We're not expecting a block part. 1841 if cs.ProposalBlockParts == nil { 1842 // NOTE: this can happen when we've gone to a higher round and 1843 // then receive parts from the previous round - not necessarily a bad peer. 1844 cs.Logger.Debug( 1845 "received a block part when we are not expecting any", 1846 "height", height, 1847 "round", round, 1848 "index", part.Index, 1849 "peer", peerID, 1850 ) 1851 return false, nil 1852 } 1853 1854 added, err = cs.ProposalBlockParts.AddPart(part) 1855 if err != nil { 1856 return added, err 1857 } 1858 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1859 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1860 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1861 ) 1862 } 1863 if added && cs.ProposalBlockParts.IsComplete() { 1864 bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) 1865 if err != nil { 1866 return added, err 1867 } 1868 1869 var pbb = new(tmproto.Block) 1870 err = proto.Unmarshal(bz, pbb) 1871 if err != nil { 1872 return added, err 1873 } 1874 1875 block, err := types.BlockFromProto(pbb) 1876 if err != nil { 1877 return added, err 1878 } 1879 1880 cs.ProposalBlock = block 1881 1882 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1883 cs.Logger.Info("received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1884 1885 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1886 cs.Logger.Error("failed publishing event complete proposal", "err", err) 1887 } 1888 1889 // Update Valid* if we can. 1890 prevotes := cs.Votes.Prevotes(cs.Round) 1891 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1892 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1893 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1894 cs.Logger.Debug( 1895 "updating valid block to new proposal block", 1896 "valid_round", cs.Round, 1897 "valid_block_hash", cs.ProposalBlock.Hash(), 1898 ) 1899 1900 cs.ValidRound = cs.Round 1901 cs.ValidBlock = cs.ProposalBlock 1902 cs.ValidBlockParts = cs.ProposalBlockParts 1903 } 1904 // TODO: In case there is +2/3 majority in Prevotes set for some 1905 // block and cs.ProposalBlock contains different block, either 1906 // proposer is faulty or voting power of faulty processes is more 1907 // than 1/3. We should trigger in the future accountability 1908 // procedure at this point. 1909 } 1910 1911 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1912 // Move onto the next step 1913 cs.enterPrevote(height, cs.Round) 1914 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 1915 cs.enterPrecommit(height, cs.Round) 1916 } 1917 } else if cs.Step == cstypes.RoundStepCommit { 1918 // If we're waiting on the proposal block... 1919 cs.tryFinalizeCommit(height) 1920 } 1921 1922 return added, nil 1923 } 1924 1925 return added, nil 1926 } 1927 1928 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 1929 func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { 1930 added, err := cs.addVote(vote, peerID) 1931 if err != nil { 1932 // If the vote height is off, we'll just ignore it, 1933 // But if it's a conflicting sig, add it to the cs.evpool. 1934 // If it's otherwise invalid, punish peer. 1935 // nolint: gocritic 1936 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 1937 if cs.privValidatorPubKey == nil { 1938 return false, errPubKeyIsNotSet 1939 } 1940 1941 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 1942 cs.Logger.Error( 1943 "found conflicting vote from ourselves; did you unsafe_reset a validator?", 1944 "height", vote.Height, 1945 "round", vote.Round, 1946 "type", vote.Type, 1947 ) 1948 1949 return added, err 1950 } 1951 1952 // report conflicting votes to the evidence pool 1953 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 1954 cs.Logger.Debug( 1955 "found and sent conflicting votes to the evidence pool", 1956 "vote_a", voteErr.VoteA, 1957 "vote_b", voteErr.VoteB, 1958 ) 1959 1960 return added, err 1961 } else if errors.Is(err, types.ErrVoteNonDeterministicSignature) { 1962 cs.Logger.Debug("vote has non-deterministic signature", "err", err) 1963 } else { 1964 // Either 1965 // 1) bad peer OR 1966 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 1967 // 3) tmkms use with multiple validators connecting to a single tmkms instance 1968 // (https://github.com/tendermint/tendermint/issues/3839). 1969 cs.Logger.Info("failed attempting to add vote", "err", err) 1970 return added, ErrAddingVote 1971 } 1972 } 1973 1974 return added, nil 1975 } 1976 1977 func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 1978 cs.Logger.Debug( 1979 "adding vote", 1980 "vote_height", vote.Height, 1981 "vote_type", vote.Type, 1982 "val_index", vote.ValidatorIndex, 1983 "cs_height", cs.Height, 1984 ) 1985 1986 // A precommit for the previous height? 1987 // These come in while we wait timeoutCommit 1988 if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { 1989 if cs.Step != cstypes.RoundStepNewHeight { 1990 // Late precommit at prior height is ignored 1991 cs.Logger.Debug("precommit vote came in after commit timeout and has been ignored", "vote", vote) 1992 return 1993 } 1994 1995 added, err = cs.LastCommit.AddVote(vote) 1996 if !added { 1997 return 1998 } 1999 2000 cs.Logger.Debug("added vote to last precommits", "last_commit", cs.LastCommit.StringShort()) 2001 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2002 return added, err 2003 } 2004 2005 cs.evsw.FireEvent(types.EventVote, vote) 2006 2007 // if we can skip timeoutCommit and have all the votes now, 2008 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 2009 // go straight to new round (skip timeout commit) 2010 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 2011 cs.enterNewRound(cs.Height, 0) 2012 } 2013 2014 return 2015 } 2016 2017 // Height mismatch is ignored. 2018 // Not necessarily a bad peer, but not favourable behaviour. 2019 if vote.Height != cs.Height { 2020 cs.Logger.Debug("vote ignored and not added", "vote_height", vote.Height, "cs_height", cs.Height, "peer", peerID) 2021 return 2022 } 2023 2024 height := cs.Height 2025 added, err = cs.Votes.AddVote(vote, peerID) 2026 if !added { 2027 // Either duplicate, or error upon cs.Votes.AddByIndex() 2028 return 2029 } 2030 2031 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2032 return added, err 2033 } 2034 cs.evsw.FireEvent(types.EventVote, vote) 2035 2036 switch vote.Type { 2037 case tmproto.PrevoteType: 2038 prevotes := cs.Votes.Prevotes(vote.Round) 2039 cs.Logger.Debug("added vote to prevote", "vote", vote, "prevotes", prevotes.StringShort()) 2040 2041 // If +2/3 prevotes for a block or nil for *any* round: 2042 if blockID, ok := prevotes.TwoThirdsMajority(); ok { 2043 // There was a polka! 2044 // If we're locked but this is a recent polka, unlock. 2045 // If it matches our ProposalBlock, update the ValidBlock 2046 2047 // Unlock if `cs.LockedRound < vote.Round <= cs.Round` 2048 // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round 2049 if (cs.LockedBlock != nil) && 2050 (cs.LockedRound < vote.Round) && 2051 (vote.Round <= cs.Round) && 2052 !cs.LockedBlock.HashesTo(blockID.Hash) { 2053 2054 cs.Logger.Debug("unlocking because of POL", "locked_round", cs.LockedRound, "pol_round", vote.Round) 2055 2056 cs.LockedRound = -1 2057 cs.LockedBlock = nil 2058 cs.LockedBlockParts = nil 2059 2060 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 2061 return added, err 2062 } 2063 } 2064 2065 // Update Valid* if we can. 2066 // NOTE: our proposal block may be nil or not what received a polka.. 2067 if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { 2068 if cs.ProposalBlock.HashesTo(blockID.Hash) { 2069 cs.Logger.Debug("updating valid block because of POL", "valid_round", cs.ValidRound, "pol_round", vote.Round) 2070 cs.ValidRound = vote.Round 2071 cs.ValidBlock = cs.ProposalBlock 2072 cs.ValidBlockParts = cs.ProposalBlockParts 2073 } else { 2074 cs.Logger.Debug( 2075 "valid block we do not know about; set ProposalBlock=nil", 2076 "proposal", cs.ProposalBlock.Hash(), 2077 "block_id", blockID.Hash, 2078 ) 2079 2080 // we're getting the wrong block 2081 cs.ProposalBlock = nil 2082 } 2083 2084 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 2085 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 2086 } 2087 2088 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 2089 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 2090 return added, err 2091 } 2092 } 2093 } 2094 2095 // If +2/3 prevotes for *anything* for future round: 2096 switch { 2097 case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): 2098 // Round-skip if there is any 2/3+ of votes ahead of us 2099 cs.enterNewRound(height, vote.Round) 2100 2101 case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round 2102 blockID, ok := prevotes.TwoThirdsMajority() 2103 if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { 2104 cs.enterPrecommit(height, vote.Round) 2105 } else if prevotes.HasTwoThirdsAny() { 2106 cs.enterPrevoteWait(height, vote.Round) 2107 } 2108 2109 case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: 2110 // If the proposal is now complete, enter prevote of cs.Round. 2111 if cs.isProposalComplete() { 2112 cs.enterPrevote(height, cs.Round) 2113 } 2114 } 2115 2116 case tmproto.PrecommitType: 2117 precommits := cs.Votes.Precommits(vote.Round) 2118 cs.Logger.Debug("added vote to precommit", 2119 "height", vote.Height, 2120 "round", vote.Round, 2121 "validator", vote.ValidatorAddress.String(), 2122 "vote_timestamp", vote.Timestamp, 2123 "data", precommits.LogString()) 2124 2125 blockID, ok := precommits.TwoThirdsMajority() 2126 if ok { 2127 // Executed as TwoThirdsMajority could be from a higher round 2128 cs.enterNewRound(height, vote.Round) 2129 cs.enterPrecommit(height, vote.Round) 2130 2131 if len(blockID.Hash) != 0 { 2132 cs.enterCommit(height, vote.Round) 2133 if cs.config.SkipTimeoutCommit && precommits.HasAll() { 2134 cs.enterNewRound(cs.Height, 0) 2135 } 2136 } else { 2137 cs.enterPrecommitWait(height, vote.Round) 2138 } 2139 } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { 2140 cs.enterNewRound(height, vote.Round) 2141 cs.enterPrecommitWait(height, vote.Round) 2142 } 2143 2144 default: 2145 panic(fmt.Sprintf("unexpected vote type %v", vote.Type)) 2146 } 2147 2148 return added, err 2149 } 2150 2151 // CONTRACT: cs.privValidator is not nil. 2152 func (cs *State) signVote( 2153 msgType tmproto.SignedMsgType, 2154 hash []byte, 2155 header types.PartSetHeader, 2156 ) (*types.Vote, error) { 2157 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 2158 // and the privValidator will refuse to sign anything. 2159 if err := cs.wal.FlushAndSync(); err != nil { 2160 return nil, err 2161 } 2162 2163 if cs.privValidatorPubKey == nil { 2164 return nil, errPubKeyIsNotSet 2165 } 2166 2167 addr := cs.privValidatorPubKey.Address() 2168 valIdx, _ := cs.Validators.GetByAddress(addr) 2169 2170 vote := &types.Vote{ 2171 ValidatorAddress: addr, 2172 ValidatorIndex: valIdx, 2173 Height: cs.Height, 2174 Round: cs.Round, 2175 Timestamp: cs.voteTime(), 2176 Type: msgType, 2177 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 2178 } 2179 2180 v := vote.ToProto() 2181 err := cs.privValidator.SignVote(cs.state.ChainID, v) 2182 vote.Signature = v.Signature 2183 vote.Timestamp = v.Timestamp 2184 2185 return vote, err 2186 } 2187 2188 func (cs *State) voteTime() time.Time { 2189 now := tmtime.Now() 2190 minVoteTime := now 2191 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 2192 // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/. 2193 timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond 2194 if cs.LockedBlock != nil { 2195 // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html 2196 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 2197 } else if cs.ProposalBlock != nil { 2198 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 2199 } 2200 2201 if now.After(minVoteTime) { 2202 return now 2203 } 2204 return minVoteTime 2205 } 2206 2207 // sign the vote and publish on internalMsgQueue 2208 func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 2209 if cs.privValidator == nil { // the node does not have a key 2210 return nil 2211 } 2212 2213 if cs.privValidatorPubKey == nil { 2214 // Vote won't be signed, but it's not critical. 2215 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 2216 return nil 2217 } 2218 2219 // If the node not in the validator set, do nothing. 2220 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 2221 return nil 2222 } 2223 2224 // TODO: pass pubKey to signVote 2225 vote, err := cs.signVote(msgType, hash, header) 2226 if err == nil { 2227 cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""}) 2228 cs.Logger.Debug("signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 2229 return vote 2230 } 2231 2232 cs.Logger.Error("failed signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 2233 return nil 2234 } 2235 2236 // updatePrivValidatorPubKey get's the private validator public key and 2237 // memoizes it. This func returns an error if the private validator is not 2238 // responding or responds with an error. 2239 func (cs *State) updatePrivValidatorPubKey() error { 2240 if cs.privValidator == nil { 2241 return nil 2242 } 2243 2244 pubKey, err := cs.privValidator.GetPubKey() 2245 if err != nil { 2246 return err 2247 } 2248 cs.privValidatorPubKey = pubKey 2249 return nil 2250 } 2251 2252 // look back to check existence of the node's consensus votes before joining consensus 2253 func (cs *State) checkDoubleSigningRisk(height int64) error { 2254 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 2255 valAddr := cs.privValidatorPubKey.Address() 2256 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 2257 if doubleSignCheckHeight > height { 2258 doubleSignCheckHeight = height 2259 } 2260 2261 for i := int64(1); i < doubleSignCheckHeight; i++ { 2262 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 2263 if lastCommit != nil { 2264 for sigIdx, s := range lastCommit.Signatures { 2265 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 2266 cs.Logger.Info("found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 2267 return ErrSignatureFoundInPastBlocks 2268 } 2269 } 2270 } 2271 } 2272 } 2273 2274 return nil 2275 } 2276 2277 //--------------------------------------------------------- 2278 2279 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 2280 if h1 < h2 { 2281 return -1 2282 } else if h1 > h2 { 2283 return 1 2284 } 2285 if r1 < r2 { 2286 return -1 2287 } else if r1 > r2 { 2288 return 1 2289 } 2290 if s1 < s2 { 2291 return -1 2292 } else if s1 > s2 { 2293 return 1 2294 } 2295 return 0 2296 } 2297 2298 // repairWalFile decodes messages from src (until the decoder errors) and 2299 // writes them to dst. 2300 func repairWalFile(src, dst string) error { 2301 in, err := os.Open(src) 2302 if err != nil { 2303 return err 2304 } 2305 defer in.Close() 2306 2307 out, err := os.Create(dst) 2308 if err != nil { 2309 return err 2310 } 2311 defer out.Close() 2312 2313 var ( 2314 dec = NewWALDecoder(in) 2315 enc = NewWALEncoder(out) 2316 ) 2317 2318 // best-case repair (until first error is encountered) 2319 for { 2320 msg, err := dec.Decode() 2321 if err != nil { 2322 break 2323 } 2324 2325 err = enc.Encode(msg) 2326 if err != nil { 2327 return fmt.Errorf("failed to encode msg: %w", err) 2328 } 2329 } 2330 2331 return nil 2332 }