github.com/516108736/tendermint@v0.36.0/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "runtime/debug" 10 "time" 11 12 "github.com/gogo/protobuf/proto" 13 14 cfg "github.com/tendermint/tendermint/config" 15 cstypes "github.com/tendermint/tendermint/consensus/types" 16 "github.com/tendermint/tendermint/crypto" 17 tmevents "github.com/tendermint/tendermint/libs/events" 18 "github.com/tendermint/tendermint/libs/fail" 19 tmjson "github.com/tendermint/tendermint/libs/json" 20 "github.com/tendermint/tendermint/libs/log" 21 tmmath "github.com/tendermint/tendermint/libs/math" 22 tmos "github.com/tendermint/tendermint/libs/os" 23 "github.com/tendermint/tendermint/libs/service" 24 tmsync "github.com/tendermint/tendermint/libs/sync" 25 "github.com/tendermint/tendermint/p2p" 26 tmproto "github.com/tendermint/tendermint/proto/tendermint/types" 27 sm "github.com/tendermint/tendermint/state" 28 "github.com/tendermint/tendermint/types" 29 tmtime "github.com/tendermint/tendermint/types/time" 30 ) 31 32 // Consensus sentinel errors 33 var ( 34 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 35 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 36 ErrAddingVote = errors.New("error adding vote") 37 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 38 39 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 40 ) 41 42 var msgQueueSize = 1000 43 44 // msgs from the reactor which may update the state 45 type msgInfo struct { 46 Msg Message `json:"msg"` 47 PeerID p2p.ID `json:"peer_key"` 48 } 49 50 // internally generated messages which may update the state 51 type timeoutInfo struct { 52 Duration time.Duration `json:"duration"` 53 Height int64 `json:"height"` 54 Round int32 `json:"round"` 55 Step cstypes.RoundStepType `json:"step"` 56 } 57 58 func (ti *timeoutInfo) String() string { 59 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 60 } 61 62 // interface to the mempool 63 type txNotifier interface { 64 TxsAvailable() <-chan struct{} 65 } 66 67 // interface to the evidence pool 68 type evidencePool interface { 69 // reports conflicting votes to the evidence pool to be processed into evidence 70 ReportConflictingVotes(voteA, voteB *types.Vote) 71 } 72 73 // State handles execution of the consensus algorithm. 74 // It processes votes and proposals, and upon reaching agreement, 75 // commits blocks to the chain and executes them against the application. 76 // The internal state machine receives input from peers, the internal validator, and from a timer. 77 type State struct { 78 service.BaseService 79 80 // config details 81 config *cfg.ConsensusConfig 82 privValidator types.PrivValidator // for signing votes 83 84 // store blocks and commits 85 blockStore sm.BlockStore 86 87 // create and execute blocks 88 blockExec *sm.BlockExecutor 89 90 // notify us if txs are available 91 txNotifier txNotifier 92 93 // add evidence to the pool 94 // when it's detected 95 evpool evidencePool 96 97 // internal state 98 mtx tmsync.RWMutex 99 cstypes.RoundState 100 state sm.State // State until height-1. 101 // privValidator pubkey, memoized for the duration of one block 102 // to avoid extra requests to HSM 103 privValidatorPubKey crypto.PubKey 104 105 // state changes may be triggered by: msgs from peers, 106 // msgs from ourself, or by timeouts 107 peerMsgQueue chan msgInfo 108 internalMsgQueue chan msgInfo 109 timeoutTicker TimeoutTicker 110 111 // information about about added votes and block parts are written on this channel 112 // so statistics can be computed by reactor 113 statsMsgQueue chan msgInfo 114 115 // we use eventBus to trigger msg broadcasts in the reactor, 116 // and to notify external subscribers, eg. through a websocket 117 eventBus *types.EventBus 118 119 // a Write-Ahead Log ensures we can recover from any kind of crash 120 // and helps us avoid signing conflicting votes 121 wal WAL 122 replayMode bool // so we don't log signing errors during replay 123 doWALCatchup bool // determines if we even try to do the catchup 124 125 // for tests where we want to limit the number of transitions the state makes 126 nSteps int 127 128 // some functions can be overwritten for testing 129 decideProposal func(height int64, round int32) 130 doPrevote func(height int64, round int32) 131 setProposal func(proposal *types.Proposal) error 132 133 // closed when we finish shutting down 134 done chan struct{} 135 136 // synchronous pubsub between consensus state and reactor. 137 // state only emits EventNewRoundStep and EventVote 138 evsw tmevents.EventSwitch 139 140 // for reporting metrics 141 metrics *Metrics 142 } 143 144 // StateOption sets an optional parameter on the State. 145 type StateOption func(*State) 146 147 // NewState returns a new State. 148 func NewState( 149 config *cfg.ConsensusConfig, 150 state sm.State, 151 blockExec *sm.BlockExecutor, 152 blockStore sm.BlockStore, 153 txNotifier txNotifier, 154 evpool evidencePool, 155 options ...StateOption, 156 ) *State { 157 cs := &State{ 158 config: config, 159 blockExec: blockExec, 160 blockStore: blockStore, 161 txNotifier: txNotifier, 162 peerMsgQueue: make(chan msgInfo, msgQueueSize), 163 internalMsgQueue: make(chan msgInfo, msgQueueSize), 164 timeoutTicker: NewTimeoutTicker(), 165 statsMsgQueue: make(chan msgInfo, msgQueueSize), 166 done: make(chan struct{}), 167 doWALCatchup: true, 168 wal: nilWAL{}, 169 evpool: evpool, 170 evsw: tmevents.NewEventSwitch(), 171 metrics: NopMetrics(), 172 } 173 174 // set function defaults (may be overwritten before calling Start) 175 cs.decideProposal = cs.defaultDecideProposal 176 cs.doPrevote = cs.defaultDoPrevote 177 cs.setProposal = cs.defaultSetProposal 178 179 // We have no votes, so reconstruct LastCommit from SeenCommit. 180 if state.LastBlockHeight > 0 { 181 cs.reconstructLastCommit(state) 182 } 183 184 cs.updateToState(state) 185 186 // NOTE: we do not call scheduleRound0 yet, we do that upon Start() 187 188 cs.BaseService = *service.NewBaseService(nil, "State", cs) 189 for _, option := range options { 190 option(cs) 191 } 192 193 return cs 194 } 195 196 // SetLogger implements Service. 197 func (cs *State) SetLogger(l log.Logger) { 198 cs.BaseService.Logger = l 199 cs.timeoutTicker.SetLogger(l) 200 } 201 202 // SetEventBus sets event bus. 203 func (cs *State) SetEventBus(b *types.EventBus) { 204 cs.eventBus = b 205 cs.blockExec.SetEventBus(b) 206 } 207 208 // StateMetrics sets the metrics. 209 func StateMetrics(metrics *Metrics) StateOption { 210 return func(cs *State) { cs.metrics = metrics } 211 } 212 213 // String returns a string. 214 func (cs *State) String() string { 215 // better not to access shared variables 216 return "ConsensusState" 217 } 218 219 // GetState returns a copy of the chain state. 220 func (cs *State) GetState() sm.State { 221 cs.mtx.RLock() 222 defer cs.mtx.RUnlock() 223 return cs.state.Copy() 224 } 225 226 // GetLastHeight returns the last height committed. 227 // If there were no blocks, returns 0. 228 func (cs *State) GetLastHeight() int64 { 229 cs.mtx.RLock() 230 defer cs.mtx.RUnlock() 231 return cs.RoundState.Height - 1 232 } 233 234 // GetRoundState returns a shallow copy of the internal consensus state. 235 func (cs *State) GetRoundState() *cstypes.RoundState { 236 cs.mtx.RLock() 237 rs := cs.RoundState // copy 238 cs.mtx.RUnlock() 239 return &rs 240 } 241 242 // GetRoundStateJSON returns a json of RoundState. 243 func (cs *State) GetRoundStateJSON() ([]byte, error) { 244 cs.mtx.RLock() 245 defer cs.mtx.RUnlock() 246 return tmjson.Marshal(cs.RoundState) 247 } 248 249 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 250 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 251 cs.mtx.RLock() 252 defer cs.mtx.RUnlock() 253 return tmjson.Marshal(cs.RoundState.RoundStateSimple()) 254 } 255 256 // GetValidators returns a copy of the current validators. 257 func (cs *State) GetValidators() (int64, []*types.Validator) { 258 cs.mtx.RLock() 259 defer cs.mtx.RUnlock() 260 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 261 } 262 263 // SetPrivValidator sets the private validator account for signing votes. It 264 // immediately requests pubkey and caches it. 265 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 266 cs.mtx.Lock() 267 defer cs.mtx.Unlock() 268 269 cs.privValidator = priv 270 271 if err := cs.updatePrivValidatorPubKey(); err != nil { 272 cs.Logger.Error("failed to get private validator pubkey", "err", err) 273 } 274 } 275 276 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for 277 // testing. 278 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 279 cs.mtx.Lock() 280 cs.timeoutTicker = timeoutTicker 281 cs.mtx.Unlock() 282 } 283 284 // LoadCommit loads the commit for a given height. 285 func (cs *State) LoadCommit(height int64) *types.Commit { 286 cs.mtx.RLock() 287 defer cs.mtx.RUnlock() 288 289 if height == cs.blockStore.Height() { 290 return cs.blockStore.LoadSeenCommit(height) 291 } 292 293 return cs.blockStore.LoadBlockCommit(height) 294 } 295 296 // OnStart loads the latest state via the WAL, and starts the timeout and 297 // receive routines. 298 func (cs *State) OnStart() error { 299 // We may set the WAL in testing before calling Start, so only OpenWAL if its 300 // still the nilWAL. 301 if _, ok := cs.wal.(nilWAL); ok { 302 if err := cs.loadWalFile(); err != nil { 303 return err 304 } 305 } 306 307 // We may have lost some votes if the process crashed reload from consensus 308 // log to catchup. 309 if cs.doWALCatchup { 310 repairAttempted := false 311 312 LOOP: 313 for { 314 err := cs.catchupReplay(cs.Height) 315 switch { 316 case err == nil: 317 break LOOP 318 319 case !IsDataCorruptionError(err): 320 cs.Logger.Error("error on catchup replay; proceeding to start state anyway", "err", err) 321 break LOOP 322 323 case repairAttempted: 324 return err 325 } 326 327 cs.Logger.Error("the WAL file is corrupted; attempting repair", "err", err) 328 329 // 1) prep work 330 if err := cs.wal.Stop(); err != nil { 331 return err 332 } 333 334 repairAttempted = true 335 336 // 2) backup original WAL file 337 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 338 if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 339 return err 340 } 341 342 cs.Logger.Debug("backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 343 344 // 3) try to repair (WAL file will be overwritten!) 345 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 346 cs.Logger.Error("the WAL repair failed", "err", err) 347 return err 348 } 349 350 cs.Logger.Info("successful WAL repair") 351 352 // reload WAL file 353 if err := cs.loadWalFile(); err != nil { 354 return err 355 } 356 } 357 } 358 359 if err := cs.evsw.Start(); err != nil { 360 return err 361 } 362 363 // we need the timeoutRoutine for replay so 364 // we don't block on the tick chan. 365 // NOTE: we will get a build up of garbage go routines 366 // firing on the tockChan until the receiveRoutine is started 367 // to deal with them (by that point, at most one will be valid) 368 if err := cs.timeoutTicker.Start(); err != nil { 369 return err 370 } 371 372 // Double Signing Risk Reduction 373 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 374 return err 375 } 376 377 // now start the receiveRoutine 378 go cs.receiveRoutine(0) 379 380 // schedule the first round! 381 // use GetRoundState so we don't race the receiveRoutine for access 382 cs.scheduleRound0(cs.GetRoundState()) 383 384 return nil 385 } 386 387 // timeoutRoutine: receive requests for timeouts on tickChan and fire timeouts on tockChan 388 // receiveRoutine: serializes processing of proposoals, block parts, votes; coordinates state transitions 389 func (cs *State) startRoutines(maxSteps int) { 390 err := cs.timeoutTicker.Start() 391 if err != nil { 392 cs.Logger.Error("failed to start timeout ticker", "err", err) 393 return 394 } 395 396 go cs.receiveRoutine(maxSteps) 397 } 398 399 // loadWalFile loads WAL data from file. It overwrites cs.wal. 400 func (cs *State) loadWalFile() error { 401 wal, err := cs.OpenWAL(cs.config.WalFile()) 402 if err != nil { 403 cs.Logger.Error("failed to load state WAL", "err", err) 404 return err 405 } 406 407 cs.wal = wal 408 return nil 409 } 410 411 // OnStop implements service.Service. 412 func (cs *State) OnStop() { 413 if err := cs.evsw.Stop(); err != nil { 414 cs.Logger.Error("failed trying to stop eventSwitch", "error", err) 415 } 416 417 if err := cs.timeoutTicker.Stop(); err != nil { 418 cs.Logger.Error("failed trying to stop timeoutTicket", "error", err) 419 } 420 // WAL is stopped in receiveRoutine. 421 } 422 423 // Wait waits for the the main routine to return. 424 // NOTE: be sure to Stop() the event switch and drain 425 // any event channels or this may deadlock 426 func (cs *State) Wait() { 427 <-cs.done 428 } 429 430 // OpenWAL opens a file to log all consensus messages and timeouts for 431 // deterministic accountability. 432 func (cs *State) OpenWAL(walFile string) (WAL, error) { 433 wal, err := NewWAL(walFile) 434 if err != nil { 435 cs.Logger.Error("failed to open WAL", "file", walFile, "err", err) 436 return nil, err 437 } 438 439 wal.SetLogger(cs.Logger.With("wal", walFile)) 440 441 if err := wal.Start(); err != nil { 442 cs.Logger.Error("failed to start WAL", "err", err) 443 return nil, err 444 } 445 446 return wal, nil 447 } 448 449 //------------------------------------------------------------ 450 // Public interface for passing messages into the consensus state, possibly causing a state transition. 451 // If peerID == "", the msg is considered internal. 452 // Messages are added to the appropriate queue (peer or internal). 453 // If the queue is full, the function may block. 454 // TODO: should these return anything or let callers just use events? 455 456 // AddVote inputs a vote. 457 func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 458 if peerID == "" { 459 cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""} 460 } else { 461 cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID} 462 } 463 464 // TODO: wait for event?! 465 return false, nil 466 } 467 468 // SetProposal inputs a proposal. 469 func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { 470 471 if peerID == "" { 472 cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""} 473 } else { 474 cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID} 475 } 476 477 // TODO: wait for event?! 478 return nil 479 } 480 481 // AddProposalBlockPart inputs a part of the proposal block. 482 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { 483 484 if peerID == "" { 485 cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""} 486 } else { 487 cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID} 488 } 489 490 // TODO: wait for event?! 491 return nil 492 } 493 494 // SetProposalAndBlock inputs the proposal and all block parts. 495 func (cs *State) SetProposalAndBlock( 496 proposal *types.Proposal, 497 block *types.Block, 498 parts *types.PartSet, 499 peerID p2p.ID, 500 ) error { 501 502 if err := cs.SetProposal(proposal, peerID); err != nil { 503 return err 504 } 505 506 for i := 0; i < int(parts.Total()); i++ { 507 part := parts.GetPart(i) 508 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 509 return err 510 } 511 } 512 513 return nil 514 } 515 516 //------------------------------------------------------------ 517 // internal functions for managing the state 518 519 func (cs *State) updateHeight(height int64) { 520 cs.metrics.Height.Set(float64(height)) 521 cs.Height = height 522 } 523 524 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 525 cs.Round = round 526 cs.Step = step 527 } 528 529 // enterNewRound(height, 0) at cs.StartTime. 530 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 531 // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) 532 sleepDuration := rs.StartTime.Sub(tmtime.Now()) 533 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 534 } 535 536 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 537 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 538 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 539 } 540 541 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 542 func (cs *State) sendInternalMessage(mi msgInfo) { 543 select { 544 case cs.internalMsgQueue <- mi: 545 default: 546 // NOTE: using the go-routine means our votes can 547 // be processed out of order. 548 // TODO: use CList here for strict determinism and 549 // attempt push to internalMsgQueue in receiveRoutine 550 cs.Logger.Debug("internal msg queue is full; using a go-routine") 551 go func() { cs.internalMsgQueue <- mi }() 552 } 553 } 554 555 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 556 // (which happens even before saving the state) 557 func (cs *State) reconstructLastCommit(state sm.State) { 558 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 559 if seenCommit == nil { 560 panic(fmt.Sprintf( 561 "failed to reconstruct last commit; seen commit for height %v not found", 562 state.LastBlockHeight, 563 )) 564 } 565 566 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 567 if !lastPrecommits.HasTwoThirdsMajority() { 568 panic("failed to reconstruct last commit; does not have +2/3 maj") 569 } 570 571 cs.LastCommit = lastPrecommits 572 } 573 574 // Updates State and increments height to match that of state. 575 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 576 func (cs *State) updateToState(state sm.State) { 577 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 578 panic(fmt.Sprintf( 579 "updateToState() expected state height of %v but found %v", 580 cs.Height, state.LastBlockHeight, 581 )) 582 } 583 584 if !cs.state.IsEmpty() { 585 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 586 // This might happen when someone else is mutating cs.state. 587 // Someone forgot to pass in state.Copy() somewhere?! 588 panic(fmt.Sprintf( 589 "inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 590 cs.state.LastBlockHeight+1, cs.Height, 591 )) 592 } 593 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 594 panic(fmt.Sprintf( 595 "inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 596 cs.state.LastBlockHeight, cs.state.InitialHeight, 597 )) 598 } 599 600 // If state isn't further out than cs.state, just ignore. 601 // This happens when SwitchToConsensus() is called in the reactor. 602 // We don't want to reset e.g. the Votes, but we still want to 603 // signal the new round step, because other services (eg. txNotifier) 604 // depend on having an up-to-date peer state! 605 if state.LastBlockHeight <= cs.state.LastBlockHeight { 606 cs.Logger.Debug( 607 "ignoring updateToState()", 608 "new_height", state.LastBlockHeight+1, 609 "old_height", cs.state.LastBlockHeight+1, 610 ) 611 cs.newStep() 612 return 613 } 614 } 615 616 // Reset fields based on state. 617 validators := state.Validators 618 619 switch { 620 case state.LastBlockHeight == 0: // Very first commit should be empty. 621 cs.LastCommit = (*types.VoteSet)(nil) 622 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 623 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 624 panic(fmt.Sprintf( 625 "wanted to form a commit, but precommits (H/R: %d/%d) didn't have 2/3+: %v", 626 state.LastBlockHeight, cs.CommitRound, cs.Votes.Precommits(cs.CommitRound), 627 )) 628 } 629 630 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 631 632 case cs.LastCommit == nil: 633 // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit 634 // must be called to reconstruct LastCommit from SeenCommit. 635 panic(fmt.Sprintf( 636 "last commit cannot be empty after initial block (H:%d)", 637 state.LastBlockHeight+1, 638 )) 639 } 640 641 // Next desired block height 642 height := state.LastBlockHeight + 1 643 if height == 1 { 644 height = state.InitialHeight 645 } 646 647 // RoundState fields 648 cs.updateHeight(height) 649 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 650 651 if cs.CommitTime.IsZero() { 652 // "Now" makes it easier to sync up dev nodes. 653 // We add timeoutCommit to allow transactions 654 // to be gathered for the first block. 655 // And alternative solution that relies on clocks: 656 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 657 cs.StartTime = cs.config.Commit(tmtime.Now()) 658 } else { 659 cs.StartTime = cs.config.Commit(cs.CommitTime) 660 } 661 662 cs.Validators = validators 663 cs.Proposal = nil 664 cs.ProposalBlock = nil 665 cs.ProposalBlockParts = nil 666 cs.LockedRound = -1 667 cs.LockedBlock = nil 668 cs.LockedBlockParts = nil 669 cs.ValidRound = -1 670 cs.ValidBlock = nil 671 cs.ValidBlockParts = nil 672 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 673 cs.CommitRound = -1 674 cs.LastValidators = state.LastValidators 675 cs.TriggeredTimeoutPrecommit = false 676 677 cs.state = state 678 679 // Finally, broadcast RoundState 680 cs.newStep() 681 } 682 683 func (cs *State) newStep() { 684 rs := cs.RoundStateEvent() 685 if err := cs.wal.Write(rs); err != nil { 686 cs.Logger.Error("failed writing to WAL", "err", err) 687 } 688 689 cs.nSteps++ 690 691 // newStep is called by updateToState in NewState before the eventBus is set! 692 if cs.eventBus != nil { 693 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 694 cs.Logger.Error("failed publishing new round step", "err", err) 695 } 696 697 cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) 698 } 699 } 700 701 //----------------------------------------- 702 // the main go routines 703 704 // receiveRoutine handles messages which may cause state transitions. 705 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 706 // It keeps the RoundState and is the only thing that updates it. 707 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 708 // State must be locked before any internal state is updated. 709 func (cs *State) receiveRoutine(maxSteps int) { 710 onExit := func(cs *State) { 711 // NOTE: the internalMsgQueue may have signed messages from our 712 // priv_val that haven't hit the WAL, but its ok because 713 // priv_val tracks LastSig 714 715 // close wal now that we're done writing to it 716 if err := cs.wal.Stop(); err != nil { 717 cs.Logger.Error("failed trying to stop WAL", "error", err) 718 } 719 720 cs.wal.Wait() 721 close(cs.done) 722 } 723 724 defer func() { 725 if r := recover(); r != nil { 726 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 727 // stop gracefully 728 // 729 // NOTE: We most probably shouldn't be running any further when there is 730 // some unexpected panic. Some unknown error happened, and so we don't 731 // know if that will result in the validator signing an invalid thing. It 732 // might be worthwhile to explore a mechanism for manual resuming via 733 // some console or secure RPC system, but for now, halting the chain upon 734 // unexpected consensus bugs sounds like the better option. 735 onExit(cs) 736 } 737 }() 738 739 for { 740 if maxSteps > 0 { 741 if cs.nSteps >= maxSteps { 742 cs.Logger.Debug("reached max steps; exiting receive routine") 743 cs.nSteps = 0 744 return 745 } 746 } 747 748 rs := cs.RoundState 749 var mi msgInfo 750 751 select { 752 case <-cs.txNotifier.TxsAvailable(): 753 cs.handleTxsAvailable() 754 755 case mi = <-cs.peerMsgQueue: 756 if err := cs.wal.Write(mi); err != nil { 757 cs.Logger.Error("failed writing to WAL", "err", err) 758 } 759 760 // handles proposals, block parts, votes 761 // may generate internal events (votes, complete proposals, 2/3 majorities) 762 cs.handleMsg(mi) 763 764 case mi = <-cs.internalMsgQueue: 765 err := cs.wal.WriteSync(mi) // NOTE: fsync 766 if err != nil { 767 panic(fmt.Sprintf( 768 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 769 mi, err, 770 )) 771 } 772 773 if _, ok := mi.Msg.(*VoteMessage); ok { 774 // we actually want to simulate failing during 775 // the previous WriteSync, but this isn't easy to do. 776 // Equivalent would be to fail here and manually remove 777 // some bytes from the end of the wal. 778 fail.Fail() // XXX 779 } 780 781 // handles proposals, block parts, votes 782 cs.handleMsg(mi) 783 784 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 785 if err := cs.wal.Write(ti); err != nil { 786 cs.Logger.Error("failed writing to WAL", "err", err) 787 } 788 789 // if the timeout is relevant to the rs 790 // go to the next step 791 cs.handleTimeout(ti, rs) 792 793 case <-cs.Quit(): 794 onExit(cs) 795 return 796 } 797 } 798 } 799 800 // state transitions on complete-proposal, 2/3-any, 2/3-one 801 func (cs *State) handleMsg(mi msgInfo) { 802 cs.mtx.Lock() 803 defer cs.mtx.Unlock() 804 805 var ( 806 added bool 807 err error 808 ) 809 810 msg, peerID := mi.Msg, mi.PeerID 811 812 switch msg := msg.(type) { 813 case *ProposalMessage: 814 // will not cause transition. 815 // once proposal is set, we can receive block parts 816 err = cs.setProposal(msg.Proposal) 817 818 case *BlockPartMessage: 819 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 820 added, err = cs.addProposalBlockPart(msg, peerID) 821 if added { 822 cs.statsMsgQueue <- mi 823 } 824 825 if err != nil && msg.Round != cs.Round { 826 cs.Logger.Debug( 827 "received block part from wrong round", 828 "height", cs.Height, 829 "cs_round", cs.Round, 830 "block_round", msg.Round, 831 ) 832 err = nil 833 } 834 835 case *VoteMessage: 836 // attempt to add the vote and dupeout the validator if its a duplicate signature 837 // if the vote gives us a 2/3-any or 2/3-one, we transition 838 added, err = cs.tryAddVote(msg.Vote, peerID) 839 if added { 840 cs.statsMsgQueue <- mi 841 } 842 843 // if err == ErrAddingVote { 844 // TODO: punish peer 845 // We probably don't want to stop the peer here. The vote does not 846 // necessarily comes from a malicious peer but can be just broadcasted by 847 // a typical peer. 848 // https://github.com/tendermint/tendermint/issues/1281 849 // } 850 851 // NOTE: the vote is broadcast to peers by the reactor listening 852 // for vote events 853 854 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 855 // the peer is sending us CatchupCommit precommits. 856 // We could make note of this and help filter in broadcastHasVoteMessage(). 857 858 default: 859 cs.Logger.Error("unknown msg type", "type", fmt.Sprintf("%T", msg)) 860 return 861 } 862 863 if err != nil { 864 cs.Logger.Error( 865 "failed to process message", 866 "height", cs.Height, 867 "round", cs.Round, 868 "peer", peerID, 869 "err", err, 870 "msg", msg, 871 ) 872 } 873 } 874 875 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 876 cs.Logger.Debug("received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 877 878 // timeouts must be for current height, round, step 879 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 880 cs.Logger.Debug("ignoring tock because we are ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 881 return 882 } 883 884 // the timeout will now cause a state transition 885 cs.mtx.Lock() 886 defer cs.mtx.Unlock() 887 888 switch ti.Step { 889 case cstypes.RoundStepNewHeight: 890 // NewRound event fired from enterNewRound. 891 // XXX: should we fire timeout here (for timeout commit)? 892 cs.enterNewRound(ti.Height, 0) 893 894 case cstypes.RoundStepNewRound: 895 cs.enterPropose(ti.Height, 0) 896 897 case cstypes.RoundStepPropose: 898 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 899 cs.Logger.Error("failed publishing timeout propose", "err", err) 900 } 901 902 cs.enterPrevote(ti.Height, ti.Round) 903 904 case cstypes.RoundStepPrevoteWait: 905 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 906 cs.Logger.Error("failed publishing timeout wait", "err", err) 907 } 908 909 cs.enterPrecommit(ti.Height, ti.Round) 910 911 case cstypes.RoundStepPrecommitWait: 912 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 913 cs.Logger.Error("failed publishing timeout wait", "err", err) 914 } 915 916 cs.enterPrecommit(ti.Height, ti.Round) 917 cs.enterNewRound(ti.Height, ti.Round+1) 918 919 default: 920 panic(fmt.Sprintf("invalid timeout step: %v", ti.Step)) 921 } 922 923 } 924 925 func (cs *State) handleTxsAvailable() { 926 cs.mtx.Lock() 927 defer cs.mtx.Unlock() 928 929 // We only need to do this for round 0. 930 if cs.Round != 0 { 931 return 932 } 933 934 switch cs.Step { 935 case cstypes.RoundStepNewHeight: // timeoutCommit phase 936 if cs.needProofBlock(cs.Height) { 937 // enterPropose will be called by enterNewRound 938 return 939 } 940 941 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 942 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 943 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 944 945 case cstypes.RoundStepNewRound: // after timeoutCommit 946 cs.enterPropose(cs.Height, 0) 947 } 948 } 949 950 //----------------------------------------------------------------------------- 951 // State functions 952 // Used internally by handleTimeout and handleMsg to make state transitions 953 954 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 955 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 956 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 957 // Enter: +2/3 precommits for nil at (height,round-1) 958 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 959 // NOTE: cs.StartTime was already set for height. 960 func (cs *State) enterNewRound(height int64, round int32) { 961 logger := cs.Logger.With("height", height, "round", round) 962 963 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 964 logger.Debug( 965 "entering new round with invalid args", 966 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 967 ) 968 return 969 } 970 971 if now := tmtime.Now(); cs.StartTime.After(now) { 972 logger.Debug("need to set a buffer and log message here for sanity", "start_time", cs.StartTime, "now", now) 973 } 974 975 logger.Debug("entering new round", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 976 977 // increment validators if necessary 978 validators := cs.Validators 979 if cs.Round < round { 980 validators = validators.Copy() 981 validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) 982 } 983 984 // Setup new round 985 // we don't fire newStep for this step, 986 // but we fire an event, so update the round step first 987 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 988 cs.Validators = validators 989 if round == 0 { 990 // We've already reset these upon new height, 991 // and meanwhile we might have received a proposal 992 // for round 0. 993 } else { 994 logger.Debug("resetting proposal info") 995 cs.Proposal = nil 996 cs.ProposalBlock = nil 997 cs.ProposalBlockParts = nil 998 } 999 1000 cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1001 cs.TriggeredTimeoutPrecommit = false 1002 1003 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1004 cs.Logger.Error("failed publishing new round", "err", err) 1005 } 1006 1007 cs.metrics.Rounds.Set(float64(round)) 1008 1009 // Wait for txs to be available in the mempool 1010 // before we enterPropose in round 0. If the last block changed the app hash, 1011 // we may need an empty "proof" block, and enterPropose immediately. 1012 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1013 if waitForTxs { 1014 if cs.config.CreateEmptyBlocksInterval > 0 { 1015 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1016 cstypes.RoundStepNewRound) 1017 } 1018 } else { 1019 cs.enterPropose(height, round) 1020 } 1021 } 1022 1023 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1024 // and where the last block (height-1) caused the app hash to change 1025 func (cs *State) needProofBlock(height int64) bool { 1026 if height == cs.state.InitialHeight { 1027 return true 1028 } 1029 1030 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1031 if lastBlockMeta == nil { 1032 panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) 1033 } 1034 1035 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1036 } 1037 1038 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 1039 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 1040 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 1041 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 1042 func (cs *State) enterPropose(height int64, round int32) { 1043 logger := cs.Logger.With("height", height, "round", round) 1044 1045 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 1046 logger.Debug( 1047 "entering propose step with invalid args", 1048 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1049 ) 1050 return 1051 } 1052 1053 logger.Debug("entering propose step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1054 1055 defer func() { 1056 // Done enterPropose: 1057 cs.updateRoundStep(round, cstypes.RoundStepPropose) 1058 cs.newStep() 1059 1060 // If we have the whole proposal + POL, then goto Prevote now. 1061 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 1062 // or else after timeoutPropose 1063 if cs.isProposalComplete() { 1064 cs.enterPrevote(height, cs.Round) 1065 } 1066 }() 1067 1068 // If we don't get the proposal and all block parts quick enough, enterPrevote 1069 cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) 1070 1071 // Nothing more to do if we're not a validator 1072 if cs.privValidator == nil { 1073 logger.Debug("node is not a validator") 1074 return 1075 } 1076 1077 logger.Debug("node is a validator") 1078 1079 if cs.privValidatorPubKey == nil { 1080 // If this node is a validator & proposer in the current round, it will 1081 // miss the opportunity to create a block. 1082 logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1083 return 1084 } 1085 1086 address := cs.privValidatorPubKey.Address() 1087 1088 // if not a validator, we're done 1089 if !cs.Validators.HasAddress(address) { 1090 logger.Debug("node is not a validator", "addr", address, "vals", cs.Validators) 1091 return 1092 } 1093 1094 if cs.isProposer(address) { 1095 logger.Debug("propose step; our turn to propose", "proposer", address) 1096 cs.decideProposal(height, round) 1097 } else { 1098 logger.Debug("propose step; not our turn to propose", "proposer", cs.Validators.GetProposer().Address) 1099 } 1100 } 1101 1102 func (cs *State) isProposer(address []byte) bool { 1103 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1104 } 1105 1106 func (cs *State) defaultDecideProposal(height int64, round int32) { 1107 var block *types.Block 1108 var blockParts *types.PartSet 1109 1110 // Decide on block 1111 if cs.ValidBlock != nil { 1112 // If there is valid block, choose that. 1113 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1114 } else { 1115 // Create a new proposal block from state/txs from the mempool. 1116 block, blockParts = cs.createProposalBlock() 1117 if block == nil { 1118 return 1119 } 1120 } 1121 1122 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1123 // and the privValidator will refuse to sign anything. 1124 if err := cs.wal.FlushAndSync(); err != nil { 1125 cs.Logger.Error("failed flushing WAL to disk") 1126 } 1127 1128 // Make proposal 1129 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1130 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1131 p := proposal.ToProto() 1132 if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { 1133 proposal.Signature = p.Signature 1134 1135 // send proposal and block parts on internal msg queue 1136 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 1137 1138 for i := 0; i < int(blockParts.Total()); i++ { 1139 part := blockParts.GetPart(i) 1140 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 1141 } 1142 1143 cs.Logger.Info("signed proposal", "height", height, "round", round, "proposal", proposal) 1144 cs.Logger.Debug("signed proposal block", "block", block) 1145 } else if !cs.replayMode { 1146 cs.Logger.Error("propose step; failed signing proposal", "height", height, "round", round, "err", err) 1147 } 1148 } 1149 1150 // Returns true if the proposal block is complete && 1151 // (if POLRound was proposed, we have +2/3 prevotes from there). 1152 func (cs *State) isProposalComplete() bool { 1153 if cs.Proposal == nil || cs.ProposalBlock == nil { 1154 return false 1155 } 1156 // we have the proposal. if there's a POLRound, 1157 // make sure we have the prevotes from it too 1158 if cs.Proposal.POLRound < 0 { 1159 return true 1160 } 1161 // if this is false the proposer is lying or we haven't received the POL yet 1162 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1163 1164 } 1165 1166 // Create the next block to propose and return it. Returns nil block upon error. 1167 // 1168 // We really only need to return the parts, but the block is returned for 1169 // convenience so we can log the proposal block. 1170 // 1171 // NOTE: keep it side-effect free for clarity. 1172 // CONTRACT: cs.privValidator is not nil. 1173 func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { 1174 if cs.privValidator == nil { 1175 panic("entered createProposalBlock with privValidator being nil") 1176 } 1177 1178 var commit *types.Commit 1179 switch { 1180 case cs.Height == cs.state.InitialHeight: 1181 // We're creating a proposal for the first block. 1182 // The commit is empty, but not nil. 1183 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1184 1185 case cs.LastCommit.HasTwoThirdsMajority(): 1186 // Make the commit from LastCommit 1187 commit = cs.LastCommit.MakeCommit() 1188 1189 default: // This shouldn't happen. 1190 cs.Logger.Error("propose step; cannot propose anything without commit for the previous block") 1191 return 1192 } 1193 1194 if cs.privValidatorPubKey == nil { 1195 // If this node is a validator & proposer in the current round, it will 1196 // miss the opportunity to create a block. 1197 cs.Logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1198 return 1199 } 1200 1201 proposerAddr := cs.privValidatorPubKey.Address() 1202 1203 return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1204 } 1205 1206 // Enter: `timeoutPropose` after entering Propose. 1207 // Enter: proposal block and POL is ready. 1208 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 1209 // Otherwise vote nil. 1210 func (cs *State) enterPrevote(height int64, round int32) { 1211 logger := cs.Logger.With("height", height, "round", round) 1212 1213 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 1214 logger.Debug( 1215 "entering prevote step with invalid args", 1216 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1217 ) 1218 return 1219 } 1220 1221 defer func() { 1222 // Done enterPrevote: 1223 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 1224 cs.newStep() 1225 }() 1226 1227 logger.Debug("entering prevote step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1228 1229 // Sign and broadcast vote as necessary 1230 cs.doPrevote(height, round) 1231 1232 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 1233 // (so we have more time to try and collect +2/3 prevotes for a single block) 1234 } 1235 1236 func (cs *State) defaultDoPrevote(height int64, round int32) { 1237 logger := cs.Logger.With("height", height, "round", round) 1238 1239 // If a block is locked, prevote that. 1240 if cs.LockedBlock != nil { 1241 logger.Debug("prevote step; already locked on a block; prevoting locked block") 1242 cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) 1243 return 1244 } 1245 1246 // If ProposalBlock is nil, prevote nil. 1247 if cs.ProposalBlock == nil { 1248 logger.Debug("prevote step: ProposalBlock is nil") 1249 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1250 return 1251 } 1252 1253 // Validate proposal block 1254 err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) 1255 if err != nil { 1256 // ProposalBlock is invalid, prevote nil. 1257 logger.Error("prevote step: ProposalBlock is invalid", "err", err) 1258 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1259 return 1260 } 1261 1262 // Prevote cs.ProposalBlock 1263 // NOTE: the proposal signature is validated when it is received, 1264 // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) 1265 logger.Debug("prevote step: ProposalBlock is valid") 1266 cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) 1267 } 1268 1269 // Enter: any +2/3 prevotes at next round. 1270 func (cs *State) enterPrevoteWait(height int64, round int32) { 1271 logger := cs.Logger.With("height", height, "round", round) 1272 1273 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1274 logger.Debug( 1275 "entering prevote wait step with invalid args", 1276 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1277 ) 1278 return 1279 } 1280 1281 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1282 panic(fmt.Sprintf( 1283 "entering prevote wait step (%v/%v), but prevotes does not have any +2/3 votes", 1284 height, round, 1285 )) 1286 } 1287 1288 logger.Debug("entering prevote wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1289 1290 defer func() { 1291 // Done enterPrevoteWait: 1292 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1293 cs.newStep() 1294 }() 1295 1296 // Wait for some more prevotes; enterPrecommit 1297 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1298 } 1299 1300 // Enter: `timeoutPrevote` after any +2/3 prevotes. 1301 // Enter: `timeoutPrecommit` after any +2/3 precommits. 1302 // Enter: +2/3 precomits for block or nil. 1303 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 1304 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 1305 // else, precommit nil otherwise. 1306 func (cs *State) enterPrecommit(height int64, round int32) { 1307 logger := cs.Logger.With("height", height, "round", round) 1308 1309 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 1310 logger.Debug( 1311 "entering precommit step with invalid args", 1312 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1313 ) 1314 return 1315 } 1316 1317 logger.Debug("entering precommit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1318 1319 defer func() { 1320 // Done enterPrecommit: 1321 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 1322 cs.newStep() 1323 }() 1324 1325 // check for a polka 1326 blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() 1327 1328 // If we don't have a polka, we must precommit nil. 1329 if !ok { 1330 if cs.LockedBlock != nil { 1331 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit while we are locked; precommitting nil") 1332 } else { 1333 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit; precommitting nil") 1334 } 1335 1336 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1337 return 1338 } 1339 1340 // At this point +2/3 prevoted for a particular block or nil. 1341 if err := cs.eventBus.PublishEventPolka(cs.RoundStateEvent()); err != nil { 1342 logger.Error("failed publishing polka", "err", err) 1343 } 1344 1345 // the latest POLRound should be this round. 1346 polRound, _ := cs.Votes.POLInfo() 1347 if polRound < round { 1348 panic(fmt.Sprintf("this POLRound should be %v but got %v", round, polRound)) 1349 } 1350 1351 // +2/3 prevoted nil. Unlock and precommit nil. 1352 if len(blockID.Hash) == 0 { 1353 if cs.LockedBlock == nil { 1354 logger.Debug("precommit step; +2/3 prevoted for nil") 1355 } else { 1356 logger.Debug("precommit step; +2/3 prevoted for nil; unlocking") 1357 cs.LockedRound = -1 1358 cs.LockedBlock = nil 1359 cs.LockedBlockParts = nil 1360 1361 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1362 logger.Error("failed publishing event unlock", "err", err) 1363 } 1364 } 1365 1366 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1367 return 1368 } 1369 1370 // At this point, +2/3 prevoted for a particular block. 1371 1372 // If we're already locked on that block, precommit it, and update the LockedRound 1373 if cs.LockedBlock.HashesTo(blockID.Hash) { 1374 logger.Debug("precommit step; +2/3 prevoted locked block; relocking") 1375 cs.LockedRound = round 1376 1377 if err := cs.eventBus.PublishEventRelock(cs.RoundStateEvent()); err != nil { 1378 logger.Error("failed publishing event relock", "err", err) 1379 } 1380 1381 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1382 return 1383 } 1384 1385 // If +2/3 prevoted for proposal block, stage and precommit it 1386 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1387 logger.Debug("precommit step; +2/3 prevoted proposal block; locking", "hash", blockID.Hash) 1388 1389 // Validate the block. 1390 if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { 1391 panic(fmt.Sprintf("precommit step; +2/3 prevoted for an invalid block: %v", err)) 1392 } 1393 1394 cs.LockedRound = round 1395 cs.LockedBlock = cs.ProposalBlock 1396 cs.LockedBlockParts = cs.ProposalBlockParts 1397 1398 if err := cs.eventBus.PublishEventLock(cs.RoundStateEvent()); err != nil { 1399 logger.Error("failed publishing event lock", "err", err) 1400 } 1401 1402 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1403 return 1404 } 1405 1406 // There was a polka in this round for a block we don't have. 1407 // Fetch that block, unlock, and precommit nil. 1408 // The +2/3 prevotes for this round is the POL for our unlock. 1409 logger.Debug("precommit step; +2/3 prevotes for a block we do not have; voting nil", "block_id", blockID) 1410 1411 cs.LockedRound = -1 1412 cs.LockedBlock = nil 1413 cs.LockedBlockParts = nil 1414 1415 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1416 cs.ProposalBlock = nil 1417 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1418 } 1419 1420 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1421 logger.Error("failed publishing event unlock", "err", err) 1422 } 1423 1424 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1425 } 1426 1427 // Enter: any +2/3 precommits for next round. 1428 func (cs *State) enterPrecommitWait(height int64, round int32) { 1429 logger := cs.Logger.With("height", height, "round", round) 1430 1431 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1432 logger.Debug( 1433 "entering precommit wait step with invalid args", 1434 "triggered_timeout", cs.TriggeredTimeoutPrecommit, 1435 "current", fmt.Sprintf("%v/%v", cs.Height, cs.Round), 1436 ) 1437 return 1438 } 1439 1440 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1441 panic(fmt.Sprintf( 1442 "entering precommit wait step (%v/%v), but precommits does not have any +2/3 votes", 1443 height, round, 1444 )) 1445 } 1446 1447 logger.Debug("entering precommit wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1448 1449 defer func() { 1450 // Done enterPrecommitWait: 1451 cs.TriggeredTimeoutPrecommit = true 1452 cs.newStep() 1453 }() 1454 1455 // wait for some more precommits; enterNewRound 1456 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1457 } 1458 1459 // Enter: +2/3 precommits for block 1460 func (cs *State) enterCommit(height int64, commitRound int32) { 1461 logger := cs.Logger.With("height", height, "commit_round", commitRound) 1462 1463 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1464 logger.Debug( 1465 "entering commit step with invalid args", 1466 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1467 ) 1468 return 1469 } 1470 1471 logger.Debug("entering commit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1472 1473 defer func() { 1474 // Done enterCommit: 1475 // keep cs.Round the same, commitRound points to the right Precommits set. 1476 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1477 cs.CommitRound = commitRound 1478 cs.CommitTime = tmtime.Now() 1479 cs.newStep() 1480 1481 // Maybe finalize immediately. 1482 cs.tryFinalizeCommit(height) 1483 }() 1484 1485 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1486 if !ok { 1487 panic("RunActionCommit() expects +2/3 precommits") 1488 } 1489 1490 // The Locked* fields no longer matter. 1491 // Move them over to ProposalBlock if they match the commit hash, 1492 // otherwise they'll be cleared in updateToState. 1493 if cs.LockedBlock.HashesTo(blockID.Hash) { 1494 logger.Debug("commit is for a locked block; set ProposalBlock=LockedBlock", "block_hash", blockID.Hash) 1495 cs.ProposalBlock = cs.LockedBlock 1496 cs.ProposalBlockParts = cs.LockedBlockParts 1497 } 1498 1499 // If we don't have the block being committed, set up to get it. 1500 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1501 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1502 logger.Info( 1503 "commit is for a block we do not know about; set ProposalBlock=nil", 1504 "proposal", cs.ProposalBlock.Hash(), 1505 "commit", blockID.Hash, 1506 ) 1507 1508 // We're getting the wrong block. 1509 // Set up ProposalBlockParts and keep waiting. 1510 cs.ProposalBlock = nil 1511 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1512 1513 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1514 logger.Error("failed publishing valid block", "err", err) 1515 } 1516 1517 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 1518 } 1519 } 1520 } 1521 1522 // If we have the block AND +2/3 commits for it, finalize. 1523 func (cs *State) tryFinalizeCommit(height int64) { 1524 logger := cs.Logger.With("height", height) 1525 1526 if cs.Height != height { 1527 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1528 } 1529 1530 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1531 if !ok || len(blockID.Hash) == 0 { 1532 logger.Error("failed attempt to finalize commit; there was no +2/3 majority or +2/3 was for nil") 1533 return 1534 } 1535 1536 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1537 // TODO: this happens every time if we're not a validator (ugly logs) 1538 // TODO: ^^ wait, why does it matter that we're a validator? 1539 logger.Debug( 1540 "failed attempt to finalize commit; we do not have the commit block", 1541 "proposal_block", cs.ProposalBlock.Hash(), 1542 "commit_block", blockID.Hash, 1543 ) 1544 return 1545 } 1546 1547 cs.finalizeCommit(height) 1548 } 1549 1550 // Increment height and goto cstypes.RoundStepNewHeight 1551 func (cs *State) finalizeCommit(height int64) { 1552 logger := cs.Logger.With("height", height) 1553 1554 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1555 logger.Debug( 1556 "entering finalize commit step", 1557 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1558 ) 1559 return 1560 } 1561 1562 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1563 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1564 1565 if !ok { 1566 panic("cannot finalize commit; commit does not have 2/3 majority") 1567 } 1568 if !blockParts.HasHeader(blockID.PartSetHeader) { 1569 panic("expected ProposalBlockParts header to be commit header") 1570 } 1571 if !block.HashesTo(blockID.Hash) { 1572 panic("cannot finalize commit; proposal block does not hash to commit hash") 1573 } 1574 1575 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1576 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1577 } 1578 1579 logger.Info( 1580 "finalizing commit of block", 1581 "hash", block.Hash(), 1582 "root", block.AppHash, 1583 "num_txs", len(block.Txs), 1584 ) 1585 logger.Debug(fmt.Sprintf("%v", block)) 1586 1587 fail.Fail() // XXX 1588 1589 // Save to blockStore. 1590 if cs.blockStore.Height() < block.Height { 1591 // NOTE: the seenCommit is local justification to commit this block, 1592 // but may differ from the LastCommit included in the next block 1593 precommits := cs.Votes.Precommits(cs.CommitRound) 1594 seenCommit := precommits.MakeCommit() 1595 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1596 } else { 1597 // Happens during replay if we already saved the block but didn't commit 1598 logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1599 } 1600 1601 fail.Fail() // XXX 1602 1603 // Write EndHeightMessage{} for this height, implying that the blockstore 1604 // has saved the block. 1605 // 1606 // If we crash before writing this EndHeightMessage{}, we will recover by 1607 // running ApplyBlock during the ABCI handshake when we restart. If we 1608 // didn't save the block to the blockstore before writing 1609 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1610 // complains about replaying for heights where an #ENDHEIGHT entry already 1611 // exists. 1612 // 1613 // Either way, the State should not be resumed until we 1614 // successfully call ApplyBlock (ie. later here, or in Handshake after 1615 // restart). 1616 endMsg := EndHeightMessage{height} 1617 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1618 panic(fmt.Sprintf( 1619 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 1620 endMsg, err, 1621 )) 1622 } 1623 1624 fail.Fail() // XXX 1625 1626 // Create a copy of the state for staging and an event cache for txs. 1627 stateCopy := cs.state.Copy() 1628 1629 // Execute and commit the block, update and save the state, and update the mempool. 1630 // NOTE The block.AppHash wont reflect these txs until the next block. 1631 var ( 1632 err error 1633 retainHeight int64 1634 ) 1635 1636 stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( 1637 stateCopy, 1638 types.BlockID{ 1639 Hash: block.Hash(), 1640 PartSetHeader: blockParts.Header(), 1641 }, 1642 block, 1643 ) 1644 fmt.Println("applyBlock--",block.Height) 1645 if err != nil { 1646 logger.Error("failed to apply block", "err", err) 1647 return 1648 } 1649 1650 fail.Fail() // XXX 1651 1652 // Prune old heights, if requested by ABCI app. 1653 if retainHeight > 0 { 1654 pruned, err := cs.pruneBlocks(retainHeight) 1655 if err != nil { 1656 logger.Error("failed to prune blocks", "retain_height", retainHeight, "err", err) 1657 } else { 1658 logger.Info("pruned blocks", "pruned", pruned, "retain_height", retainHeight) 1659 } 1660 } 1661 1662 // must be called before we update state 1663 cs.recordMetrics(height, block) 1664 1665 // NewHeightStep! 1666 cs.updateToState(stateCopy) 1667 1668 fail.Fail() // XXX 1669 1670 // Private validator might have changed it's key pair => refetch pubkey. 1671 if err := cs.updatePrivValidatorPubKey(); err != nil { 1672 logger.Error("failed to get private validator pubkey", "err", err) 1673 } 1674 1675 // cs.StartTime is already set. 1676 // Schedule Round0 to start soon. 1677 cs.scheduleRound0(&cs.RoundState) 1678 1679 // By here, 1680 // * cs.Height has been increment to height+1 1681 // * cs.Step is now cstypes.RoundStepNewHeight 1682 // * cs.StartTime is set to when we will start round0. 1683 } 1684 1685 func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { 1686 base := cs.blockStore.Base() 1687 if retainHeight <= base { 1688 return 0, nil 1689 } 1690 pruned, err := cs.blockStore.PruneBlocks(retainHeight) 1691 if err != nil { 1692 return 0, fmt.Errorf("failed to prune block store: %w", err) 1693 } 1694 err = cs.blockExec.Store().PruneStates(base, retainHeight) 1695 if err != nil { 1696 return 0, fmt.Errorf("failed to prune state database: %w", err) 1697 } 1698 return pruned, nil 1699 } 1700 1701 func (cs *State) recordMetrics(height int64, block *types.Block) { 1702 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1703 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1704 1705 var ( 1706 missingValidators int 1707 missingValidatorsPower int64 1708 ) 1709 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1710 // Remember that the first LastCommit is intentionally empty, so it's not 1711 // fair to increment missing validators number. 1712 if height > cs.state.InitialHeight { 1713 // Sanity check that commit size matches validator set size - only applies 1714 // after first block. 1715 var ( 1716 commitSize = block.LastCommit.Size() 1717 valSetLen = len(cs.LastValidators.Validators) 1718 address types.Address 1719 ) 1720 if commitSize != valSetLen { 1721 panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1722 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1723 } 1724 1725 if cs.privValidator != nil { 1726 if cs.privValidatorPubKey == nil { 1727 // Metrics won't be updated, but it's not critical. 1728 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1729 } else { 1730 address = cs.privValidatorPubKey.Address() 1731 } 1732 } 1733 1734 for i, val := range cs.LastValidators.Validators { 1735 commitSig := block.LastCommit.Signatures[i] 1736 if commitSig.Absent() { 1737 missingValidators++ 1738 missingValidatorsPower += val.VotingPower 1739 } 1740 1741 if bytes.Equal(val.Address, address) { 1742 label := []string{ 1743 "validator_address", val.Address.String(), 1744 } 1745 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1746 if commitSig.ForBlock() { 1747 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1748 } else { 1749 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1750 } 1751 } 1752 1753 } 1754 } 1755 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1756 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1757 1758 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1759 var ( 1760 byzantineValidatorsPower = int64(0) 1761 byzantineValidatorsCount = int64(0) 1762 ) 1763 for _, ev := range block.Evidence.Evidence { 1764 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1765 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1766 byzantineValidatorsCount++ 1767 byzantineValidatorsPower += val.VotingPower 1768 } 1769 } 1770 } 1771 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1772 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1773 1774 if height > 1 { 1775 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1776 if lastBlockMeta != nil { 1777 cs.metrics.BlockIntervalSeconds.Observe( 1778 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1779 ) 1780 } 1781 } 1782 1783 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1784 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1785 cs.metrics.BlockSizeBytes.Set(float64(block.Size())) 1786 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1787 } 1788 1789 //----------------------------------------------------------------------------- 1790 1791 func (cs *State) defaultSetProposal(proposal *types.Proposal) error { 1792 // Already have one 1793 // TODO: possibly catch double proposals 1794 if cs.Proposal != nil { 1795 return nil 1796 } 1797 1798 // Does not apply 1799 if proposal.Height != cs.Height || proposal.Round != cs.Round { 1800 return nil 1801 } 1802 1803 // Verify POLRound, which must be -1 or in range [0, proposal.Round). 1804 if proposal.POLRound < -1 || 1805 (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { 1806 return ErrInvalidProposalPOLRound 1807 } 1808 1809 p := proposal.ToProto() 1810 // Verify signature 1811 if !cs.Validators.GetProposer().PubKey.VerifySignature( 1812 types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature, 1813 ) { 1814 return ErrInvalidProposalSignature 1815 } 1816 1817 proposal.Signature = p.Signature 1818 cs.Proposal = proposal 1819 // We don't update cs.ProposalBlockParts if it is already set. 1820 // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. 1821 // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! 1822 if cs.ProposalBlockParts == nil { 1823 cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) 1824 } 1825 1826 cs.Logger.Info("received proposal", "proposal", proposal) 1827 return nil 1828 } 1829 1830 // NOTE: block is not necessarily valid. 1831 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1832 // once we have the full block. 1833 func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (added bool, err error) { 1834 height, round, part := msg.Height, msg.Round, msg.Part 1835 1836 // Blocks might be reused, so round mismatch is OK 1837 if cs.Height != height { 1838 cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) 1839 return false, nil 1840 } 1841 1842 // We're not expecting a block part. 1843 if cs.ProposalBlockParts == nil { 1844 // NOTE: this can happen when we've gone to a higher round and 1845 // then receive parts from the previous round - not necessarily a bad peer. 1846 cs.Logger.Debug( 1847 "received a block part when we are not expecting any", 1848 "height", height, 1849 "round", round, 1850 "index", part.Index, 1851 "peer", peerID, 1852 ) 1853 return false, nil 1854 } 1855 1856 added, err = cs.ProposalBlockParts.AddPart(part) 1857 if err != nil { 1858 return added, err 1859 } 1860 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1861 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1862 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1863 ) 1864 } 1865 if added && cs.ProposalBlockParts.IsComplete() { 1866 bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) 1867 if err != nil { 1868 return added, err 1869 } 1870 1871 var pbb = new(tmproto.Block) 1872 err = proto.Unmarshal(bz, pbb) 1873 if err != nil { 1874 return added, err 1875 } 1876 1877 block, err := types.BlockFromProto(pbb) 1878 if err != nil { 1879 return added, err 1880 } 1881 1882 cs.ProposalBlock = block 1883 1884 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1885 cs.Logger.Info("received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1886 1887 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1888 cs.Logger.Error("failed publishing event complete proposal", "err", err) 1889 } 1890 1891 // Update Valid* if we can. 1892 prevotes := cs.Votes.Prevotes(cs.Round) 1893 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1894 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1895 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1896 cs.Logger.Debug( 1897 "updating valid block to new proposal block", 1898 "valid_round", cs.Round, 1899 "valid_block_hash", cs.ProposalBlock.Hash(), 1900 ) 1901 1902 cs.ValidRound = cs.Round 1903 cs.ValidBlock = cs.ProposalBlock 1904 cs.ValidBlockParts = cs.ProposalBlockParts 1905 } 1906 // TODO: In case there is +2/3 majority in Prevotes set for some 1907 // block and cs.ProposalBlock contains different block, either 1908 // proposer is faulty or voting power of faulty processes is more 1909 // than 1/3. We should trigger in the future accountability 1910 // procedure at this point. 1911 } 1912 1913 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1914 // Move onto the next step 1915 cs.enterPrevote(height, cs.Round) 1916 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 1917 cs.enterPrecommit(height, cs.Round) 1918 } 1919 } else if cs.Step == cstypes.RoundStepCommit { 1920 // If we're waiting on the proposal block... 1921 cs.tryFinalizeCommit(height) 1922 } 1923 1924 return added, nil 1925 } 1926 1927 return added, nil 1928 } 1929 1930 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 1931 func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { 1932 added, err := cs.addVote(vote, peerID) 1933 if err != nil { 1934 // If the vote height is off, we'll just ignore it, 1935 // But if it's a conflicting sig, add it to the cs.evpool. 1936 // If it's otherwise invalid, punish peer. 1937 // nolint: gocritic 1938 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 1939 if cs.privValidatorPubKey == nil { 1940 return false, errPubKeyIsNotSet 1941 } 1942 1943 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 1944 cs.Logger.Error( 1945 "found conflicting vote from ourselves; did you unsafe_reset a validator?", 1946 "height", vote.Height, 1947 "round", vote.Round, 1948 "type", vote.Type, 1949 ) 1950 1951 return added, err 1952 } 1953 1954 // report conflicting votes to the evidence pool 1955 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 1956 cs.Logger.Info( 1957 "found and sent conflicting votes to the evidence pool", 1958 "vote_a", voteErr.VoteA, 1959 "vote_b", voteErr.VoteB, 1960 ) 1961 1962 return added, err 1963 } else if err == types.ErrVoteNonDeterministicSignature { 1964 cs.Logger.Debug("vote has non-deterministic signature", "err", err) 1965 } else { 1966 // Either 1967 // 1) bad peer OR 1968 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 1969 // 3) tmkms use with multiple validators connecting to a single tmkms instance 1970 // (https://github.com/tendermint/tendermint/issues/3839). 1971 cs.Logger.Info("failed attempting to add vote", "err", err) 1972 return added, ErrAddingVote 1973 } 1974 } 1975 1976 return added, nil 1977 } 1978 1979 func (cs *State) addVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 1980 cs.Logger.Debug( 1981 "adding vote", 1982 "vote_height", vote.Height, 1983 "vote_type", vote.Type, 1984 "val_index", vote.ValidatorIndex, 1985 "cs_height", cs.Height, 1986 ) 1987 1988 // A precommit for the previous height? 1989 // These come in while we wait timeoutCommit 1990 if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { 1991 if cs.Step != cstypes.RoundStepNewHeight { 1992 // Late precommit at prior height is ignored 1993 cs.Logger.Debug("precommit vote came in after commit timeout and has been ignored", "vote", vote) 1994 return 1995 } 1996 1997 added, err = cs.LastCommit.AddVote(vote) 1998 if !added { 1999 return 2000 } 2001 2002 cs.Logger.Debug("added vote to last precommits", "last_commit", cs.LastCommit.StringShort()) 2003 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2004 return added, err 2005 } 2006 2007 cs.evsw.FireEvent(types.EventVote, vote) 2008 2009 // if we can skip timeoutCommit and have all the votes now, 2010 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 2011 // go straight to new round (skip timeout commit) 2012 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 2013 cs.enterNewRound(cs.Height, 0) 2014 } 2015 2016 return 2017 } 2018 2019 // Height mismatch is ignored. 2020 // Not necessarily a bad peer, but not favourable behaviour. 2021 if vote.Height != cs.Height { 2022 cs.Logger.Debug("vote ignored and not added", "vote_height", vote.Height, "cs_height", cs.Height, "peer", peerID) 2023 return 2024 } 2025 2026 height := cs.Height 2027 added, err = cs.Votes.AddVote(vote, peerID) 2028 if !added { 2029 // Either duplicate, or error upon cs.Votes.AddByIndex() 2030 return 2031 } 2032 2033 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2034 return added, err 2035 } 2036 cs.evsw.FireEvent(types.EventVote, vote) 2037 2038 switch vote.Type { 2039 case tmproto.PrevoteType: 2040 prevotes := cs.Votes.Prevotes(vote.Round) 2041 cs.Logger.Debug("added vote to prevote", "vote", vote, "prevotes", prevotes.StringShort()) 2042 2043 // If +2/3 prevotes for a block or nil for *any* round: 2044 if blockID, ok := prevotes.TwoThirdsMajority(); ok { 2045 // There was a polka! 2046 // If we're locked but this is a recent polka, unlock. 2047 // If it matches our ProposalBlock, update the ValidBlock 2048 2049 // Unlock if `cs.LockedRound < vote.Round <= cs.Round` 2050 // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round 2051 if (cs.LockedBlock != nil) && 2052 (cs.LockedRound < vote.Round) && 2053 (vote.Round <= cs.Round) && 2054 !cs.LockedBlock.HashesTo(blockID.Hash) { 2055 2056 cs.Logger.Debug("unlocking because of POL", "locked_round", cs.LockedRound, "pol_round", vote.Round) 2057 2058 cs.LockedRound = -1 2059 cs.LockedBlock = nil 2060 cs.LockedBlockParts = nil 2061 2062 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 2063 return added, err 2064 } 2065 } 2066 2067 // Update Valid* if we can. 2068 // NOTE: our proposal block may be nil or not what received a polka.. 2069 if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { 2070 if cs.ProposalBlock.HashesTo(blockID.Hash) { 2071 cs.Logger.Debug("updating valid block because of POL", "valid_round", cs.ValidRound, "pol_round", vote.Round) 2072 cs.ValidRound = vote.Round 2073 cs.ValidBlock = cs.ProposalBlock 2074 cs.ValidBlockParts = cs.ProposalBlockParts 2075 } else { 2076 cs.Logger.Debug( 2077 "valid block we do not know about; set ProposalBlock=nil", 2078 "proposal", cs.ProposalBlock.Hash(), 2079 "block_id", blockID.Hash, 2080 ) 2081 2082 // we're getting the wrong block 2083 cs.ProposalBlock = nil 2084 } 2085 2086 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 2087 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 2088 } 2089 2090 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 2091 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 2092 return added, err 2093 } 2094 } 2095 } 2096 2097 // If +2/3 prevotes for *anything* for future round: 2098 switch { 2099 case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): 2100 // Round-skip if there is any 2/3+ of votes ahead of us 2101 cs.enterNewRound(height, vote.Round) 2102 2103 case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round 2104 blockID, ok := prevotes.TwoThirdsMajority() 2105 if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { 2106 cs.enterPrecommit(height, vote.Round) 2107 } else if prevotes.HasTwoThirdsAny() { 2108 cs.enterPrevoteWait(height, vote.Round) 2109 } 2110 2111 case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: 2112 // If the proposal is now complete, enter prevote of cs.Round. 2113 if cs.isProposalComplete() { 2114 cs.enterPrevote(height, cs.Round) 2115 } 2116 } 2117 2118 case tmproto.PrecommitType: 2119 precommits := cs.Votes.Precommits(vote.Round) 2120 cs.Logger.Info("added vote to precommit", "vote", vote, "precommits", precommits.StringShort()) 2121 2122 blockID, ok := precommits.TwoThirdsMajority() 2123 if ok { 2124 // Executed as TwoThirdsMajority could be from a higher round 2125 cs.enterNewRound(height, vote.Round) 2126 cs.enterPrecommit(height, vote.Round) 2127 2128 if len(blockID.Hash) != 0 { 2129 cs.enterCommit(height, vote.Round) 2130 if cs.config.SkipTimeoutCommit && precommits.HasAll() { 2131 cs.enterNewRound(cs.Height, 0) 2132 } 2133 } else { 2134 cs.enterPrecommitWait(height, vote.Round) 2135 } 2136 } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { 2137 cs.enterNewRound(height, vote.Round) 2138 cs.enterPrecommitWait(height, vote.Round) 2139 } 2140 2141 default: 2142 panic(fmt.Sprintf("unexpected vote type %v", vote.Type)) 2143 } 2144 2145 return added, err 2146 } 2147 2148 // CONTRACT: cs.privValidator is not nil. 2149 func (cs *State) signVote( 2150 msgType tmproto.SignedMsgType, 2151 hash []byte, 2152 header types.PartSetHeader, 2153 ) (*types.Vote, error) { 2154 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 2155 // and the privValidator will refuse to sign anything. 2156 if err := cs.wal.FlushAndSync(); err != nil { 2157 return nil, err 2158 } 2159 2160 if cs.privValidatorPubKey == nil { 2161 return nil, errPubKeyIsNotSet 2162 } 2163 2164 addr := cs.privValidatorPubKey.Address() 2165 valIdx, _ := cs.Validators.GetByAddress(addr) 2166 2167 vote := &types.Vote{ 2168 ValidatorAddress: addr, 2169 ValidatorIndex: valIdx, 2170 Height: cs.Height, 2171 Round: cs.Round, 2172 Timestamp: cs.voteTime(), 2173 Type: msgType, 2174 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 2175 } 2176 2177 v := vote.ToProto() 2178 err := cs.privValidator.SignVote(cs.state.ChainID, v) 2179 vote.Signature = v.Signature 2180 2181 return vote, err 2182 } 2183 2184 func (cs *State) voteTime() time.Time { 2185 now := tmtime.Now() 2186 minVoteTime := now 2187 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 2188 // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/. 2189 timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond 2190 if cs.LockedBlock != nil { 2191 // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html 2192 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 2193 } else if cs.ProposalBlock != nil { 2194 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 2195 } 2196 2197 if now.After(minVoteTime) { 2198 return now 2199 } 2200 return minVoteTime 2201 } 2202 2203 // sign the vote and publish on internalMsgQueue 2204 func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 2205 if cs.privValidator == nil { // the node does not have a key 2206 return nil 2207 } 2208 2209 if cs.privValidatorPubKey == nil { 2210 // Vote won't be signed, but it's not critical. 2211 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 2212 return nil 2213 } 2214 2215 // If the node not in the validator set, do nothing. 2216 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 2217 return nil 2218 } 2219 2220 // TODO: pass pubKey to signVote 2221 vote, err := cs.signVote(msgType, hash, header) 2222 if err == nil { 2223 cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""}) 2224 cs.Logger.Info("signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 2225 return vote 2226 } 2227 2228 cs.Logger.Error("failed signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 2229 return nil 2230 } 2231 2232 // updatePrivValidatorPubKey get's the private validator public key and 2233 // memoizes it. This func returns an error if the private validator is not 2234 // responding or responds with an error. 2235 func (cs *State) updatePrivValidatorPubKey() error { 2236 if cs.privValidator == nil { 2237 return nil 2238 } 2239 2240 pubKey, err := cs.privValidator.GetPubKey() 2241 if err != nil { 2242 return err 2243 } 2244 cs.privValidatorPubKey = pubKey 2245 return nil 2246 } 2247 2248 // look back to check existence of the node's consensus votes before joining consensus 2249 func (cs *State) checkDoubleSigningRisk(height int64) error { 2250 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 2251 valAddr := cs.privValidatorPubKey.Address() 2252 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 2253 if doubleSignCheckHeight > height { 2254 doubleSignCheckHeight = height 2255 } 2256 2257 for i := int64(1); i < doubleSignCheckHeight; i++ { 2258 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 2259 if lastCommit != nil { 2260 for sigIdx, s := range lastCommit.Signatures { 2261 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 2262 cs.Logger.Info("found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 2263 return ErrSignatureFoundInPastBlocks 2264 } 2265 } 2266 } 2267 } 2268 } 2269 2270 return nil 2271 } 2272 2273 //--------------------------------------------------------- 2274 2275 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 2276 if h1 < h2 { 2277 return -1 2278 } else if h1 > h2 { 2279 return 1 2280 } 2281 if r1 < r2 { 2282 return -1 2283 } else if r1 > r2 { 2284 return 1 2285 } 2286 if s1 < s2 { 2287 return -1 2288 } else if s1 > s2 { 2289 return 1 2290 } 2291 return 0 2292 } 2293 2294 // repairWalFile decodes messages from src (until the decoder errors) and 2295 // writes them to dst. 2296 func repairWalFile(src, dst string) error { 2297 in, err := os.Open(src) 2298 if err != nil { 2299 return err 2300 } 2301 defer in.Close() 2302 2303 out, err := os.Create(dst) 2304 if err != nil { 2305 return err 2306 } 2307 defer out.Close() 2308 2309 var ( 2310 dec = NewWALDecoder(in) 2311 enc = NewWALEncoder(out) 2312 ) 2313 2314 // best-case repair (until first error is encountered) 2315 for { 2316 msg, err := dec.Decode() 2317 if err != nil { 2318 break 2319 } 2320 2321 err = enc.Encode(msg) 2322 if err != nil { 2323 return fmt.Errorf("failed to encode msg: %w", err) 2324 } 2325 } 2326 2327 return nil 2328 }