bitbucket.org/number571/tendermint@v0.8.14/internal/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "os" 10 "runtime/debug" 11 "time" 12 13 "github.com/gogo/protobuf/proto" 14 15 cfg "bitbucket.org/number571/tendermint/config" 16 "bitbucket.org/number571/tendermint/crypto" 17 cstypes "bitbucket.org/number571/tendermint/internal/consensus/types" 18 "bitbucket.org/number571/tendermint/internal/libs/fail" 19 tmsync "bitbucket.org/number571/tendermint/internal/libs/sync" 20 tmevents "bitbucket.org/number571/tendermint/libs/events" 21 tmjson "bitbucket.org/number571/tendermint/libs/json" 22 "bitbucket.org/number571/tendermint/libs/log" 23 tmmath "bitbucket.org/number571/tendermint/libs/math" 24 tmos "bitbucket.org/number571/tendermint/libs/os" 25 "bitbucket.org/number571/tendermint/libs/service" 26 tmtime "bitbucket.org/number571/tendermint/libs/time" 27 "bitbucket.org/number571/tendermint/privval" 28 tmgrpc "bitbucket.org/number571/tendermint/privval/grpc" 29 tmproto "bitbucket.org/number571/tendermint/proto/tendermint/types" 30 sm "bitbucket.org/number571/tendermint/state" 31 "bitbucket.org/number571/tendermint/types" 32 ) 33 34 // Consensus sentinel errors 35 var ( 36 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 37 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 38 ErrAddingVote = errors.New("error adding vote") 39 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 40 41 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 42 ) 43 44 var msgQueueSize = 1000 45 46 // msgs from the reactor which may update the state 47 type msgInfo struct { 48 Msg Message `json:"msg"` 49 PeerID types.NodeID `json:"peer_key"` 50 } 51 52 // internally generated messages which may update the state 53 type timeoutInfo struct { 54 Duration time.Duration `json:"duration"` 55 Height int64 `json:"height"` 56 Round int32 `json:"round"` 57 Step cstypes.RoundStepType `json:"step"` 58 } 59 60 func (ti *timeoutInfo) String() string { 61 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 62 } 63 64 // interface to the mempool 65 type txNotifier interface { 66 TxsAvailable() <-chan struct{} 67 } 68 69 // interface to the evidence pool 70 type evidencePool interface { 71 // reports conflicting votes to the evidence pool to be processed into evidence 72 ReportConflictingVotes(voteA, voteB *types.Vote) 73 } 74 75 // State handles execution of the consensus algorithm. 76 // It processes votes and proposals, and upon reaching agreement, 77 // commits blocks to the chain and executes them against the application. 78 // The internal state machine receives input from peers, the internal validator, and from a timer. 79 type State struct { 80 service.BaseService 81 82 // config details 83 config *cfg.ConsensusConfig 84 privValidator types.PrivValidator // for signing votes 85 privValidatorType types.PrivValidatorType 86 87 // store blocks and commits 88 blockStore sm.BlockStore 89 90 // create and execute blocks 91 blockExec *sm.BlockExecutor 92 93 // notify us if txs are available 94 txNotifier txNotifier 95 96 // add evidence to the pool 97 // when it's detected 98 evpool evidencePool 99 100 // internal state 101 mtx tmsync.RWMutex 102 cstypes.RoundState 103 state sm.State // State until height-1. 104 // privValidator pubkey, memoized for the duration of one block 105 // to avoid extra requests to HSM 106 privValidatorPubKey crypto.PubKey 107 108 // state changes may be triggered by: msgs from peers, 109 // msgs from ourself, or by timeouts 110 peerMsgQueue chan msgInfo 111 internalMsgQueue chan msgInfo 112 timeoutTicker TimeoutTicker 113 114 // information about about added votes and block parts are written on this channel 115 // so statistics can be computed by reactor 116 statsMsgQueue chan msgInfo 117 118 // we use eventBus to trigger msg broadcasts in the reactor, 119 // and to notify external subscribers, eg. through a websocket 120 eventBus *types.EventBus 121 122 // a Write-Ahead Log ensures we can recover from any kind of crash 123 // and helps us avoid signing conflicting votes 124 wal WAL 125 replayMode bool // so we don't log signing errors during replay 126 doWALCatchup bool // determines if we even try to do the catchup 127 128 // for tests where we want to limit the number of transitions the state makes 129 nSteps int 130 131 // some functions can be overwritten for testing 132 decideProposal func(height int64, round int32) 133 doPrevote func(height int64, round int32) 134 setProposal func(proposal *types.Proposal) error 135 136 // closed when we finish shutting down 137 done chan struct{} 138 139 // synchronous pubsub between consensus state and reactor. 140 // state only emits EventNewRoundStep and EventVote 141 evsw tmevents.EventSwitch 142 143 // for reporting metrics 144 metrics *Metrics 145 146 // wait the channel event happening for shutting down the state gracefully 147 onStopCh chan *cstypes.RoundState 148 } 149 150 // StateOption sets an optional parameter on the State. 151 type StateOption func(*State) 152 153 // NewState returns a new State. 154 func NewState( 155 config *cfg.ConsensusConfig, 156 state sm.State, 157 blockExec *sm.BlockExecutor, 158 blockStore sm.BlockStore, 159 txNotifier txNotifier, 160 evpool evidencePool, 161 options ...StateOption, 162 ) *State { 163 cs := &State{ 164 config: config, 165 blockExec: blockExec, 166 blockStore: blockStore, 167 txNotifier: txNotifier, 168 peerMsgQueue: make(chan msgInfo, msgQueueSize), 169 internalMsgQueue: make(chan msgInfo, msgQueueSize), 170 timeoutTicker: NewTimeoutTicker(), 171 statsMsgQueue: make(chan msgInfo, msgQueueSize), 172 done: make(chan struct{}), 173 doWALCatchup: true, 174 wal: nilWAL{}, 175 evpool: evpool, 176 evsw: tmevents.NewEventSwitch(), 177 metrics: NopMetrics(), 178 onStopCh: make(chan *cstypes.RoundState), 179 } 180 181 // set function defaults (may be overwritten before calling Start) 182 cs.decideProposal = cs.defaultDecideProposal 183 cs.doPrevote = cs.defaultDoPrevote 184 cs.setProposal = cs.defaultSetProposal 185 186 // We have no votes, so reconstruct LastCommit from SeenCommit. 187 if state.LastBlockHeight > 0 { 188 cs.reconstructLastCommit(state) 189 } 190 191 cs.updateToState(state) 192 193 // NOTE: we do not call scheduleRound0 yet, we do that upon Start() 194 195 cs.BaseService = *service.NewBaseService(nil, "State", cs) 196 for _, option := range options { 197 option(cs) 198 } 199 200 return cs 201 } 202 203 // SetLogger implements Service. 204 func (cs *State) SetLogger(l log.Logger) { 205 cs.BaseService.Logger = l 206 cs.timeoutTicker.SetLogger(l) 207 } 208 209 // SetEventBus sets event bus. 210 func (cs *State) SetEventBus(b *types.EventBus) { 211 cs.eventBus = b 212 cs.blockExec.SetEventBus(b) 213 } 214 215 // StateMetrics sets the metrics. 216 func StateMetrics(metrics *Metrics) StateOption { 217 return func(cs *State) { cs.metrics = metrics } 218 } 219 220 // String returns a string. 221 func (cs *State) String() string { 222 // better not to access shared variables 223 return "ConsensusState" 224 } 225 226 // GetState returns a copy of the chain state. 227 func (cs *State) GetState() sm.State { 228 cs.mtx.RLock() 229 defer cs.mtx.RUnlock() 230 return cs.state.Copy() 231 } 232 233 // GetLastHeight returns the last height committed. 234 // If there were no blocks, returns 0. 235 func (cs *State) GetLastHeight() int64 { 236 cs.mtx.RLock() 237 defer cs.mtx.RUnlock() 238 return cs.RoundState.Height - 1 239 } 240 241 // GetRoundState returns a shallow copy of the internal consensus state. 242 func (cs *State) GetRoundState() *cstypes.RoundState { 243 cs.mtx.RLock() 244 rs := cs.RoundState // copy 245 cs.mtx.RUnlock() 246 return &rs 247 } 248 249 // GetRoundStateJSON returns a json of RoundState. 250 func (cs *State) GetRoundStateJSON() ([]byte, error) { 251 cs.mtx.RLock() 252 defer cs.mtx.RUnlock() 253 return tmjson.Marshal(cs.RoundState) 254 } 255 256 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 257 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 258 cs.mtx.RLock() 259 defer cs.mtx.RUnlock() 260 return tmjson.Marshal(cs.RoundState.RoundStateSimple()) 261 } 262 263 // GetValidators returns a copy of the current validators. 264 func (cs *State) GetValidators() (int64, []*types.Validator) { 265 cs.mtx.RLock() 266 defer cs.mtx.RUnlock() 267 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 268 } 269 270 // SetPrivValidator sets the private validator account for signing votes. It 271 // immediately requests pubkey and caches it. 272 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 273 cs.mtx.Lock() 274 defer cs.mtx.Unlock() 275 276 cs.privValidator = priv 277 278 if priv != nil { 279 switch t := priv.(type) { 280 case *privval.RetrySignerClient: 281 cs.privValidatorType = types.RetrySignerClient 282 case *privval.FilePV: 283 cs.privValidatorType = types.FileSignerClient 284 case *privval.SignerClient: 285 cs.privValidatorType = types.SignerSocketClient 286 case *tmgrpc.SignerClient: 287 cs.privValidatorType = types.SignerGRPCClient 288 case types.MockPV: 289 cs.privValidatorType = types.MockSignerClient 290 case *types.ErroringMockPV: 291 cs.privValidatorType = types.ErrorMockSignerClient 292 default: 293 cs.Logger.Error("unsupported priv validator type", "err", 294 fmt.Errorf("error privValidatorType %s", t)) 295 } 296 } 297 298 if err := cs.updatePrivValidatorPubKey(); err != nil { 299 cs.Logger.Error("failed to get private validator pubkey", "err", err) 300 } 301 } 302 303 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for 304 // testing. 305 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 306 cs.mtx.Lock() 307 cs.timeoutTicker = timeoutTicker 308 cs.mtx.Unlock() 309 } 310 311 // LoadCommit loads the commit for a given height. 312 func (cs *State) LoadCommit(height int64) *types.Commit { 313 cs.mtx.RLock() 314 defer cs.mtx.RUnlock() 315 316 if height == cs.blockStore.Height() { 317 return cs.blockStore.LoadSeenCommit(height) 318 } 319 320 return cs.blockStore.LoadBlockCommit(height) 321 } 322 323 // OnStart loads the latest state via the WAL, and starts the timeout and 324 // receive routines. 325 func (cs *State) OnStart() error { 326 // We may set the WAL in testing before calling Start, so only OpenWAL if its 327 // still the nilWAL. 328 if _, ok := cs.wal.(nilWAL); ok { 329 if err := cs.loadWalFile(); err != nil { 330 return err 331 } 332 } 333 334 // We may have lost some votes if the process crashed reload from consensus 335 // log to catchup. 336 if cs.doWALCatchup { 337 repairAttempted := false 338 339 LOOP: 340 for { 341 err := cs.catchupReplay(cs.Height) 342 switch { 343 case err == nil: 344 break LOOP 345 346 case !IsDataCorruptionError(err): 347 cs.Logger.Error("error on catchup replay; proceeding to start state anyway", "err", err) 348 break LOOP 349 350 case repairAttempted: 351 return err 352 } 353 354 cs.Logger.Error("the WAL file is corrupted; attempting repair", "err", err) 355 356 // 1) prep work 357 if err := cs.wal.Stop(); err != nil { 358 return err 359 } 360 361 repairAttempted = true 362 363 // 2) backup original WAL file 364 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 365 if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 366 return err 367 } 368 369 cs.Logger.Debug("backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 370 371 // 3) try to repair (WAL file will be overwritten!) 372 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 373 cs.Logger.Error("the WAL repair failed", "err", err) 374 return err 375 } 376 377 cs.Logger.Info("successful WAL repair") 378 379 // reload WAL file 380 if err := cs.loadWalFile(); err != nil { 381 return err 382 } 383 } 384 } 385 386 if err := cs.evsw.Start(); err != nil { 387 return err 388 } 389 390 // we need the timeoutRoutine for replay so 391 // we don't block on the tick chan. 392 // NOTE: we will get a build up of garbage go routines 393 // firing on the tockChan until the receiveRoutine is started 394 // to deal with them (by that point, at most one will be valid) 395 if err := cs.timeoutTicker.Start(); err != nil { 396 return err 397 } 398 399 // Double Signing Risk Reduction 400 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 401 return err 402 } 403 404 // now start the receiveRoutine 405 go cs.receiveRoutine(0) 406 407 // schedule the first round! 408 // use GetRoundState so we don't race the receiveRoutine for access 409 cs.scheduleRound0(cs.GetRoundState()) 410 411 return nil 412 } 413 414 // timeoutRoutine: receive requests for timeouts on tickChan and fire timeouts on tockChan 415 // receiveRoutine: serializes processing of proposoals, block parts, votes; coordinates state transitions 416 func (cs *State) startRoutines(maxSteps int) { 417 err := cs.timeoutTicker.Start() 418 if err != nil { 419 cs.Logger.Error("failed to start timeout ticker", "err", err) 420 return 421 } 422 423 go cs.receiveRoutine(maxSteps) 424 } 425 426 // loadWalFile loads WAL data from file. It overwrites cs.wal. 427 func (cs *State) loadWalFile() error { 428 wal, err := cs.OpenWAL(cs.config.WalFile()) 429 if err != nil { 430 cs.Logger.Error("failed to load state WAL", "err", err) 431 return err 432 } 433 434 cs.wal = wal 435 return nil 436 } 437 438 // OnStop implements service.Service. 439 func (cs *State) OnStop() { 440 441 // If the node is committing a new block, wait until it is finished! 442 if cs.GetRoundState().Step == cstypes.RoundStepCommit { 443 select { 444 case <-cs.onStopCh: 445 case <-time.After(cs.config.TimeoutCommit): 446 cs.Logger.Error("OnStop: timeout waiting for commit to finish", "time", cs.config.TimeoutCommit) 447 } 448 } 449 450 close(cs.onStopCh) 451 452 if err := cs.evsw.Stop(); err != nil { 453 cs.Logger.Error("failed trying to stop eventSwitch", "error", err) 454 } 455 456 if err := cs.timeoutTicker.Stop(); err != nil { 457 cs.Logger.Error("failed trying to stop timeoutTicket", "error", err) 458 } 459 // WAL is stopped in receiveRoutine. 460 } 461 462 // Wait waits for the the main routine to return. 463 // NOTE: be sure to Stop() the event switch and drain 464 // any event channels or this may deadlock 465 func (cs *State) Wait() { 466 <-cs.done 467 } 468 469 // OpenWAL opens a file to log all consensus messages and timeouts for 470 // deterministic accountability. 471 func (cs *State) OpenWAL(walFile string) (WAL, error) { 472 wal, err := NewWAL(walFile) 473 if err != nil { 474 cs.Logger.Error("failed to open WAL", "file", walFile, "err", err) 475 return nil, err 476 } 477 478 wal.SetLogger(cs.Logger.With("wal", walFile)) 479 480 if err := wal.Start(); err != nil { 481 cs.Logger.Error("failed to start WAL", "err", err) 482 return nil, err 483 } 484 485 return wal, nil 486 } 487 488 //------------------------------------------------------------ 489 // Public interface for passing messages into the consensus state, possibly causing a state transition. 490 // If peerID == "", the msg is considered internal. 491 // Messages are added to the appropriate queue (peer or internal). 492 // If the queue is full, the function may block. 493 // TODO: should these return anything or let callers just use events? 494 495 // AddVote inputs a vote. 496 func (cs *State) AddVote(vote *types.Vote, peerID types.NodeID) (added bool, err error) { 497 if peerID == "" { 498 cs.internalMsgQueue <- msgInfo{&VoteMessage{vote}, ""} 499 } else { 500 cs.peerMsgQueue <- msgInfo{&VoteMessage{vote}, peerID} 501 } 502 503 // TODO: wait for event?! 504 return false, nil 505 } 506 507 // SetProposal inputs a proposal. 508 func (cs *State) SetProposal(proposal *types.Proposal, peerID types.NodeID) error { 509 510 if peerID == "" { 511 cs.internalMsgQueue <- msgInfo{&ProposalMessage{proposal}, ""} 512 } else { 513 cs.peerMsgQueue <- msgInfo{&ProposalMessage{proposal}, peerID} 514 } 515 516 // TODO: wait for event?! 517 return nil 518 } 519 520 // AddProposalBlockPart inputs a part of the proposal block. 521 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID types.NodeID) error { 522 523 if peerID == "" { 524 cs.internalMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, ""} 525 } else { 526 cs.peerMsgQueue <- msgInfo{&BlockPartMessage{height, round, part}, peerID} 527 } 528 529 // TODO: wait for event?! 530 return nil 531 } 532 533 // SetProposalAndBlock inputs the proposal and all block parts. 534 func (cs *State) SetProposalAndBlock( 535 proposal *types.Proposal, 536 block *types.Block, 537 parts *types.PartSet, 538 peerID types.NodeID, 539 ) error { 540 541 if err := cs.SetProposal(proposal, peerID); err != nil { 542 return err 543 } 544 545 for i := 0; i < int(parts.Total()); i++ { 546 part := parts.GetPart(i) 547 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 548 return err 549 } 550 } 551 552 return nil 553 } 554 555 //------------------------------------------------------------ 556 // internal functions for managing the state 557 558 func (cs *State) updateHeight(height int64) { 559 cs.metrics.Height.Set(float64(height)) 560 cs.Height = height 561 } 562 563 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 564 cs.Round = round 565 cs.Step = step 566 } 567 568 // enterNewRound(height, 0) at cs.StartTime. 569 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 570 // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) 571 sleepDuration := rs.StartTime.Sub(tmtime.Now()) 572 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 573 } 574 575 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 576 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 577 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 578 } 579 580 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 581 func (cs *State) sendInternalMessage(mi msgInfo) { 582 select { 583 case cs.internalMsgQueue <- mi: 584 default: 585 // NOTE: using the go-routine means our votes can 586 // be processed out of order. 587 // TODO: use CList here for strict determinism and 588 // attempt push to internalMsgQueue in receiveRoutine 589 cs.Logger.Debug("internal msg queue is full; using a go-routine") 590 go func() { cs.internalMsgQueue <- mi }() 591 } 592 } 593 594 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 595 // (which happens even before saving the state) 596 func (cs *State) reconstructLastCommit(state sm.State) { 597 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 598 if seenCommit == nil { 599 panic(fmt.Sprintf( 600 "failed to reconstruct last commit; seen commit for height %v not found", 601 state.LastBlockHeight, 602 )) 603 } 604 605 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 606 if !lastPrecommits.HasTwoThirdsMajority() { 607 panic("failed to reconstruct last commit; does not have +2/3 maj") 608 } 609 610 cs.LastCommit = lastPrecommits 611 } 612 613 // Updates State and increments height to match that of state. 614 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 615 func (cs *State) updateToState(state sm.State) { 616 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 617 panic(fmt.Sprintf( 618 "updateToState() expected state height of %v but found %v", 619 cs.Height, state.LastBlockHeight, 620 )) 621 } 622 623 if !cs.state.IsEmpty() { 624 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 625 // This might happen when someone else is mutating cs.state. 626 // Someone forgot to pass in state.Copy() somewhere?! 627 panic(fmt.Sprintf( 628 "inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 629 cs.state.LastBlockHeight+1, cs.Height, 630 )) 631 } 632 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 633 panic(fmt.Sprintf( 634 "inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 635 cs.state.LastBlockHeight, cs.state.InitialHeight, 636 )) 637 } 638 639 // If state isn't further out than cs.state, just ignore. 640 // This happens when SwitchToConsensus() is called in the reactor. 641 // We don't want to reset e.g. the Votes, but we still want to 642 // signal the new round step, because other services (eg. txNotifier) 643 // depend on having an up-to-date peer state! 644 if state.LastBlockHeight <= cs.state.LastBlockHeight { 645 cs.Logger.Debug( 646 "ignoring updateToState()", 647 "new_height", state.LastBlockHeight+1, 648 "old_height", cs.state.LastBlockHeight+1, 649 ) 650 cs.newStep() 651 return 652 } 653 } 654 655 // Reset fields based on state. 656 validators := state.Validators 657 658 switch { 659 case state.LastBlockHeight == 0: // Very first commit should be empty. 660 cs.LastCommit = (*types.VoteSet)(nil) 661 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 662 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 663 panic(fmt.Sprintf( 664 "wanted to form a commit, but precommits (H/R: %d/%d) didn't have 2/3+: %v", 665 state.LastBlockHeight, cs.CommitRound, cs.Votes.Precommits(cs.CommitRound), 666 )) 667 } 668 669 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 670 671 case cs.LastCommit == nil: 672 // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit 673 // must be called to reconstruct LastCommit from SeenCommit. 674 panic(fmt.Sprintf( 675 "last commit cannot be empty after initial block (H:%d)", 676 state.LastBlockHeight+1, 677 )) 678 } 679 680 // Next desired block height 681 height := state.LastBlockHeight + 1 682 if height == 1 { 683 height = state.InitialHeight 684 } 685 686 // RoundState fields 687 cs.updateHeight(height) 688 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 689 690 if cs.CommitTime.IsZero() { 691 // "Now" makes it easier to sync up dev nodes. 692 // We add timeoutCommit to allow transactions 693 // to be gathered for the first block. 694 // And alternative solution that relies on clocks: 695 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 696 cs.StartTime = cs.config.Commit(tmtime.Now()) 697 } else { 698 cs.StartTime = cs.config.Commit(cs.CommitTime) 699 } 700 701 cs.Validators = validators 702 cs.Proposal = nil 703 cs.ProposalBlock = nil 704 cs.ProposalBlockParts = nil 705 cs.LockedRound = -1 706 cs.LockedBlock = nil 707 cs.LockedBlockParts = nil 708 cs.ValidRound = -1 709 cs.ValidBlock = nil 710 cs.ValidBlockParts = nil 711 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 712 cs.CommitRound = -1 713 cs.LastValidators = state.LastValidators 714 cs.TriggeredTimeoutPrecommit = false 715 716 cs.state = state 717 718 // Finally, broadcast RoundState 719 cs.newStep() 720 } 721 722 func (cs *State) newStep() { 723 rs := cs.RoundStateEvent() 724 if err := cs.wal.Write(rs); err != nil { 725 cs.Logger.Error("failed writing to WAL", "err", err) 726 } 727 728 cs.nSteps++ 729 730 // newStep is called by updateToState in NewState before the eventBus is set! 731 if cs.eventBus != nil { 732 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 733 cs.Logger.Error("failed publishing new round step", "err", err) 734 } 735 736 cs.evsw.FireEvent(types.EventNewRoundStepValue, &cs.RoundState) 737 } 738 } 739 740 //----------------------------------------- 741 // the main go routines 742 743 // receiveRoutine handles messages which may cause state transitions. 744 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 745 // It keeps the RoundState and is the only thing that updates it. 746 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 747 // State must be locked before any internal state is updated. 748 func (cs *State) receiveRoutine(maxSteps int) { 749 onExit := func(cs *State) { 750 // NOTE: the internalMsgQueue may have signed messages from our 751 // priv_val that haven't hit the WAL, but its ok because 752 // priv_val tracks LastSig 753 754 // close wal now that we're done writing to it 755 if err := cs.wal.Stop(); err != nil { 756 cs.Logger.Error("failed trying to stop WAL", "error", err) 757 } 758 759 cs.wal.Wait() 760 close(cs.done) 761 } 762 763 defer func() { 764 if r := recover(); r != nil { 765 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 766 // stop gracefully 767 // 768 // NOTE: We most probably shouldn't be running any further when there is 769 // some unexpected panic. Some unknown error happened, and so we don't 770 // know if that will result in the validator signing an invalid thing. It 771 // might be worthwhile to explore a mechanism for manual resuming via 772 // some console or secure RPC system, but for now, halting the chain upon 773 // unexpected consensus bugs sounds like the better option. 774 onExit(cs) 775 } 776 }() 777 778 for { 779 if maxSteps > 0 { 780 if cs.nSteps >= maxSteps { 781 cs.Logger.Debug("reached max steps; exiting receive routine") 782 cs.nSteps = 0 783 return 784 } 785 } 786 787 rs := cs.RoundState 788 var mi msgInfo 789 790 select { 791 case <-cs.txNotifier.TxsAvailable(): 792 cs.handleTxsAvailable() 793 794 case mi = <-cs.peerMsgQueue: 795 if err := cs.wal.Write(mi); err != nil { 796 cs.Logger.Error("failed writing to WAL", "err", err) 797 } 798 799 // handles proposals, block parts, votes 800 // may generate internal events (votes, complete proposals, 2/3 majorities) 801 cs.handleMsg(mi) 802 803 case mi = <-cs.internalMsgQueue: 804 err := cs.wal.WriteSync(mi) // NOTE: fsync 805 if err != nil { 806 panic(fmt.Sprintf( 807 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 808 mi, err, 809 )) 810 } 811 812 if _, ok := mi.Msg.(*VoteMessage); ok { 813 // we actually want to simulate failing during 814 // the previous WriteSync, but this isn't easy to do. 815 // Equivalent would be to fail here and manually remove 816 // some bytes from the end of the wal. 817 fail.Fail() // XXX 818 } 819 820 // handles proposals, block parts, votes 821 cs.handleMsg(mi) 822 823 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 824 if err := cs.wal.Write(ti); err != nil { 825 cs.Logger.Error("failed writing to WAL", "err", err) 826 } 827 828 // if the timeout is relevant to the rs 829 // go to the next step 830 cs.handleTimeout(ti, rs) 831 832 case <-cs.Quit(): 833 onExit(cs) 834 return 835 } 836 } 837 } 838 839 // state transitions on complete-proposal, 2/3-any, 2/3-one 840 func (cs *State) handleMsg(mi msgInfo) { 841 cs.mtx.Lock() 842 defer cs.mtx.Unlock() 843 844 var ( 845 added bool 846 err error 847 ) 848 849 msg, peerID := mi.Msg, mi.PeerID 850 851 switch msg := msg.(type) { 852 case *ProposalMessage: 853 // will not cause transition. 854 // once proposal is set, we can receive block parts 855 err = cs.setProposal(msg.Proposal) 856 857 case *BlockPartMessage: 858 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 859 added, err = cs.addProposalBlockPart(msg, peerID) 860 if added { 861 cs.statsMsgQueue <- mi 862 } 863 864 if err != nil && msg.Round != cs.Round { 865 cs.Logger.Debug( 866 "received block part from wrong round", 867 "height", cs.Height, 868 "cs_round", cs.Round, 869 "block_round", msg.Round, 870 ) 871 err = nil 872 } 873 874 case *VoteMessage: 875 // attempt to add the vote and dupeout the validator if its a duplicate signature 876 // if the vote gives us a 2/3-any or 2/3-one, we transition 877 added, err = cs.tryAddVote(msg.Vote, peerID) 878 if added { 879 cs.statsMsgQueue <- mi 880 } 881 882 // if err == ErrAddingVote { 883 // TODO: punish peer 884 // We probably don't want to stop the peer here. The vote does not 885 // necessarily comes from a malicious peer but can be just broadcasted by 886 // a typical peer. 887 // https://bitbucket.org/number571/tendermint/issues/1281 888 // } 889 890 // NOTE: the vote is broadcast to peers by the reactor listening 891 // for vote events 892 893 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 894 // the peer is sending us CatchupCommit precommits. 895 // We could make note of this and help filter in broadcastHasVoteMessage(). 896 897 default: 898 cs.Logger.Error("unknown msg type", "type", fmt.Sprintf("%T", msg)) 899 return 900 } 901 902 if err != nil { 903 cs.Logger.Error( 904 "failed to process message", 905 "height", cs.Height, 906 "round", cs.Round, 907 "peer", peerID, 908 "err", err, 909 "msg", msg, 910 ) 911 } 912 } 913 914 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 915 cs.Logger.Debug("received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 916 917 // timeouts must be for current height, round, step 918 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 919 cs.Logger.Debug("ignoring tock because we are ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 920 return 921 } 922 923 // the timeout will now cause a state transition 924 cs.mtx.Lock() 925 defer cs.mtx.Unlock() 926 927 switch ti.Step { 928 case cstypes.RoundStepNewHeight: 929 // NewRound event fired from enterNewRound. 930 // XXX: should we fire timeout here (for timeout commit)? 931 cs.enterNewRound(ti.Height, 0) 932 933 case cstypes.RoundStepNewRound: 934 cs.enterPropose(ti.Height, 0) 935 936 case cstypes.RoundStepPropose: 937 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 938 cs.Logger.Error("failed publishing timeout propose", "err", err) 939 } 940 941 cs.enterPrevote(ti.Height, ti.Round) 942 943 case cstypes.RoundStepPrevoteWait: 944 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 945 cs.Logger.Error("failed publishing timeout wait", "err", err) 946 } 947 948 cs.enterPrecommit(ti.Height, ti.Round) 949 950 case cstypes.RoundStepPrecommitWait: 951 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 952 cs.Logger.Error("failed publishing timeout wait", "err", err) 953 } 954 955 cs.enterPrecommit(ti.Height, ti.Round) 956 cs.enterNewRound(ti.Height, ti.Round+1) 957 958 default: 959 panic(fmt.Sprintf("invalid timeout step: %v", ti.Step)) 960 } 961 962 } 963 964 func (cs *State) handleTxsAvailable() { 965 cs.mtx.Lock() 966 defer cs.mtx.Unlock() 967 968 // We only need to do this for round 0. 969 if cs.Round != 0 { 970 return 971 } 972 973 switch cs.Step { 974 case cstypes.RoundStepNewHeight: // timeoutCommit phase 975 if cs.needProofBlock(cs.Height) { 976 // enterPropose will be called by enterNewRound 977 return 978 } 979 980 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 981 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 982 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 983 984 case cstypes.RoundStepNewRound: // after timeoutCommit 985 cs.enterPropose(cs.Height, 0) 986 } 987 } 988 989 //----------------------------------------------------------------------------- 990 // State functions 991 // Used internally by handleTimeout and handleMsg to make state transitions 992 993 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 994 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 995 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 996 // Enter: +2/3 precommits for nil at (height,round-1) 997 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 998 // NOTE: cs.StartTime was already set for height. 999 func (cs *State) enterNewRound(height int64, round int32) { 1000 logger := cs.Logger.With("height", height, "round", round) 1001 1002 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 1003 logger.Debug( 1004 "entering new round with invalid args", 1005 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1006 ) 1007 return 1008 } 1009 1010 if now := tmtime.Now(); cs.StartTime.After(now) { 1011 logger.Debug("need to set a buffer and log message here for sanity", "start_time", cs.StartTime, "now", now) 1012 } 1013 1014 logger.Debug("entering new round", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1015 1016 // increment validators if necessary 1017 validators := cs.Validators 1018 if cs.Round < round { 1019 validators = validators.Copy() 1020 validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) 1021 } 1022 1023 // Setup new round 1024 // we don't fire newStep for this step, 1025 // but we fire an event, so update the round step first 1026 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 1027 cs.Validators = validators 1028 if round == 0 { 1029 // We've already reset these upon new height, 1030 // and meanwhile we might have received a proposal 1031 // for round 0. 1032 } else { 1033 logger.Debug("resetting proposal info") 1034 cs.Proposal = nil 1035 cs.ProposalBlock = nil 1036 cs.ProposalBlockParts = nil 1037 } 1038 1039 cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1040 cs.TriggeredTimeoutPrecommit = false 1041 1042 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1043 cs.Logger.Error("failed publishing new round", "err", err) 1044 } 1045 1046 cs.metrics.Rounds.Set(float64(round)) 1047 1048 // Wait for txs to be available in the mempool 1049 // before we enterPropose in round 0. If the last block changed the app hash, 1050 // we may need an empty "proof" block, and enterPropose immediately. 1051 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1052 if waitForTxs { 1053 if cs.config.CreateEmptyBlocksInterval > 0 { 1054 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1055 cstypes.RoundStepNewRound) 1056 } 1057 } else { 1058 cs.enterPropose(height, round) 1059 } 1060 } 1061 1062 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1063 // and where the last block (height-1) caused the app hash to change 1064 func (cs *State) needProofBlock(height int64) bool { 1065 if height == cs.state.InitialHeight { 1066 return true 1067 } 1068 1069 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1070 if lastBlockMeta == nil { 1071 panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) 1072 } 1073 1074 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1075 } 1076 1077 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 1078 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 1079 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 1080 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 1081 func (cs *State) enterPropose(height int64, round int32) { 1082 logger := cs.Logger.With("height", height, "round", round) 1083 1084 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 1085 logger.Debug( 1086 "entering propose step with invalid args", 1087 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1088 ) 1089 return 1090 } 1091 1092 logger.Debug("entering propose step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1093 1094 defer func() { 1095 // Done enterPropose: 1096 cs.updateRoundStep(round, cstypes.RoundStepPropose) 1097 cs.newStep() 1098 1099 // If we have the whole proposal + POL, then goto Prevote now. 1100 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 1101 // or else after timeoutPropose 1102 if cs.isProposalComplete() { 1103 cs.enterPrevote(height, cs.Round) 1104 } 1105 }() 1106 1107 // If we don't get the proposal and all block parts quick enough, enterPrevote 1108 cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) 1109 1110 // Nothing more to do if we're not a validator 1111 if cs.privValidator == nil { 1112 logger.Debug("node is not a validator") 1113 return 1114 } 1115 1116 logger.Debug("node is a validator") 1117 1118 if cs.privValidatorPubKey == nil { 1119 // If this node is a validator & proposer in the current round, it will 1120 // miss the opportunity to create a block. 1121 logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1122 return 1123 } 1124 1125 address := cs.privValidatorPubKey.Address() 1126 1127 // if not a validator, we're done 1128 if !cs.Validators.HasAddress(address) { 1129 logger.Debug("node is not a validator", "addr", address, "vals", cs.Validators) 1130 return 1131 } 1132 1133 if cs.isProposer(address) { 1134 logger.Debug( 1135 "propose step; our turn to propose", 1136 "proposer", address, 1137 ) 1138 1139 cs.decideProposal(height, round) 1140 } else { 1141 logger.Debug( 1142 "propose step; not our turn to propose", 1143 "proposer", cs.Validators.GetProposer().Address, 1144 ) 1145 } 1146 } 1147 1148 func (cs *State) isProposer(address []byte) bool { 1149 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1150 } 1151 1152 func (cs *State) defaultDecideProposal(height int64, round int32) { 1153 var block *types.Block 1154 var blockParts *types.PartSet 1155 1156 // Decide on block 1157 if cs.ValidBlock != nil { 1158 // If there is valid block, choose that. 1159 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1160 } else { 1161 // Create a new proposal block from state/txs from the mempool. 1162 block, blockParts = cs.createProposalBlock() 1163 if block == nil { 1164 return 1165 } 1166 } 1167 1168 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1169 // and the privValidator will refuse to sign anything. 1170 if err := cs.wal.FlushAndSync(); err != nil { 1171 cs.Logger.Error("failed flushing WAL to disk") 1172 } 1173 1174 // Make proposal 1175 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1176 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1177 p := proposal.ToProto() 1178 1179 // wait the max amount we would wait for a proposal 1180 ctx, cancel := context.WithTimeout(context.TODO(), cs.config.TimeoutPropose) 1181 defer cancel() 1182 if err := cs.privValidator.SignProposal(ctx, cs.state.ChainID, p); err == nil { 1183 proposal.Signature = p.Signature 1184 1185 // send proposal and block parts on internal msg queue 1186 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 1187 1188 for i := 0; i < int(blockParts.Total()); i++ { 1189 part := blockParts.GetPart(i) 1190 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 1191 } 1192 1193 cs.Logger.Debug("signed proposal", "height", height, "round", round, "proposal", proposal) 1194 } else if !cs.replayMode { 1195 cs.Logger.Error("propose step; failed signing proposal", "height", height, "round", round, "err", err) 1196 } 1197 } 1198 1199 // Returns true if the proposal block is complete && 1200 // (if POLRound was proposed, we have +2/3 prevotes from there). 1201 func (cs *State) isProposalComplete() bool { 1202 if cs.Proposal == nil || cs.ProposalBlock == nil { 1203 return false 1204 } 1205 // we have the proposal. if there's a POLRound, 1206 // make sure we have the prevotes from it too 1207 if cs.Proposal.POLRound < 0 { 1208 return true 1209 } 1210 // if this is false the proposer is lying or we haven't received the POL yet 1211 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1212 1213 } 1214 1215 // Create the next block to propose and return it. Returns nil block upon error. 1216 // 1217 // We really only need to return the parts, but the block is returned for 1218 // convenience so we can log the proposal block. 1219 // 1220 // NOTE: keep it side-effect free for clarity. 1221 // CONTRACT: cs.privValidator is not nil. 1222 func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { 1223 if cs.privValidator == nil { 1224 panic("entered createProposalBlock with privValidator being nil") 1225 } 1226 1227 var commit *types.Commit 1228 switch { 1229 case cs.Height == cs.state.InitialHeight: 1230 // We're creating a proposal for the first block. 1231 // The commit is empty, but not nil. 1232 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1233 1234 case cs.LastCommit.HasTwoThirdsMajority(): 1235 // Make the commit from LastCommit 1236 commit = cs.LastCommit.MakeCommit() 1237 1238 default: // This shouldn't happen. 1239 cs.Logger.Error("propose step; cannot propose anything without commit for the previous block") 1240 return 1241 } 1242 1243 if cs.privValidatorPubKey == nil { 1244 // If this node is a validator & proposer in the current round, it will 1245 // miss the opportunity to create a block. 1246 cs.Logger.Error("propose step; empty priv validator public key", "err", errPubKeyIsNotSet) 1247 return 1248 } 1249 1250 proposerAddr := cs.privValidatorPubKey.Address() 1251 1252 return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1253 } 1254 1255 // Enter: `timeoutPropose` after entering Propose. 1256 // Enter: proposal block and POL is ready. 1257 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 1258 // Otherwise vote nil. 1259 func (cs *State) enterPrevote(height int64, round int32) { 1260 logger := cs.Logger.With("height", height, "round", round) 1261 1262 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 1263 logger.Debug( 1264 "entering prevote step with invalid args", 1265 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1266 ) 1267 return 1268 } 1269 1270 defer func() { 1271 // Done enterPrevote: 1272 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 1273 cs.newStep() 1274 }() 1275 1276 logger.Debug("entering prevote step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1277 1278 // Sign and broadcast vote as necessary 1279 cs.doPrevote(height, round) 1280 1281 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 1282 // (so we have more time to try and collect +2/3 prevotes for a single block) 1283 } 1284 1285 func (cs *State) defaultDoPrevote(height int64, round int32) { 1286 logger := cs.Logger.With("height", height, "round", round) 1287 1288 // If a block is locked, prevote that. 1289 if cs.LockedBlock != nil { 1290 logger.Debug("prevote step; already locked on a block; prevoting locked block") 1291 cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) 1292 return 1293 } 1294 1295 // If ProposalBlock is nil, prevote nil. 1296 if cs.ProposalBlock == nil { 1297 logger.Debug("prevote step: ProposalBlock is nil") 1298 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1299 return 1300 } 1301 1302 // Validate proposal block 1303 err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock) 1304 if err != nil { 1305 // ProposalBlock is invalid, prevote nil. 1306 logger.Error("prevote step: ProposalBlock is invalid", "err", err) 1307 cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) 1308 return 1309 } 1310 1311 // Prevote cs.ProposalBlock 1312 // NOTE: the proposal signature is validated when it is received, 1313 // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) 1314 logger.Debug("prevote step: ProposalBlock is valid") 1315 cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) 1316 } 1317 1318 // Enter: any +2/3 prevotes at next round. 1319 func (cs *State) enterPrevoteWait(height int64, round int32) { 1320 logger := cs.Logger.With("height", height, "round", round) 1321 1322 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1323 logger.Debug( 1324 "entering prevote wait step with invalid args", 1325 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1326 ) 1327 return 1328 } 1329 1330 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1331 panic(fmt.Sprintf( 1332 "entering prevote wait step (%v/%v), but prevotes does not have any +2/3 votes", 1333 height, round, 1334 )) 1335 } 1336 1337 logger.Debug("entering prevote wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1338 1339 defer func() { 1340 // Done enterPrevoteWait: 1341 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1342 cs.newStep() 1343 }() 1344 1345 // Wait for some more prevotes; enterPrecommit 1346 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1347 } 1348 1349 // Enter: `timeoutPrevote` after any +2/3 prevotes. 1350 // Enter: `timeoutPrecommit` after any +2/3 precommits. 1351 // Enter: +2/3 precomits for block or nil. 1352 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 1353 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 1354 // else, precommit nil otherwise. 1355 func (cs *State) enterPrecommit(height int64, round int32) { 1356 logger := cs.Logger.With("height", height, "round", round) 1357 1358 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 1359 logger.Debug( 1360 "entering precommit step with invalid args", 1361 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1362 ) 1363 return 1364 } 1365 1366 logger.Debug("entering precommit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1367 1368 defer func() { 1369 // Done enterPrecommit: 1370 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 1371 cs.newStep() 1372 }() 1373 1374 // check for a polka 1375 blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() 1376 1377 // If we don't have a polka, we must precommit nil. 1378 if !ok { 1379 if cs.LockedBlock != nil { 1380 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit while we are locked; precommitting nil") 1381 } else { 1382 logger.Debug("precommit step; no +2/3 prevotes during enterPrecommit; precommitting nil") 1383 } 1384 1385 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1386 return 1387 } 1388 1389 // At this point +2/3 prevoted for a particular block or nil. 1390 if err := cs.eventBus.PublishEventPolka(cs.RoundStateEvent()); err != nil { 1391 logger.Error("failed publishing polka", "err", err) 1392 } 1393 1394 // the latest POLRound should be this round. 1395 polRound, _ := cs.Votes.POLInfo() 1396 if polRound < round { 1397 panic(fmt.Sprintf("this POLRound should be %v but got %v", round, polRound)) 1398 } 1399 1400 // +2/3 prevoted nil. Unlock and precommit nil. 1401 if len(blockID.Hash) == 0 { 1402 if cs.LockedBlock == nil { 1403 logger.Debug("precommit step; +2/3 prevoted for nil") 1404 } else { 1405 logger.Debug("precommit step; +2/3 prevoted for nil; unlocking") 1406 cs.LockedRound = -1 1407 cs.LockedBlock = nil 1408 cs.LockedBlockParts = nil 1409 1410 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1411 logger.Error("failed publishing event unlock", "err", err) 1412 } 1413 } 1414 1415 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1416 return 1417 } 1418 1419 // At this point, +2/3 prevoted for a particular block. 1420 1421 // If we're already locked on that block, precommit it, and update the LockedRound 1422 if cs.LockedBlock.HashesTo(blockID.Hash) { 1423 logger.Debug("precommit step; +2/3 prevoted locked block; relocking") 1424 cs.LockedRound = round 1425 1426 if err := cs.eventBus.PublishEventRelock(cs.RoundStateEvent()); err != nil { 1427 logger.Error("failed publishing event relock", "err", err) 1428 } 1429 1430 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1431 return 1432 } 1433 1434 // If +2/3 prevoted for proposal block, stage and precommit it 1435 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1436 logger.Debug("precommit step; +2/3 prevoted proposal block; locking", "hash", blockID.Hash) 1437 1438 // Validate the block. 1439 if err := cs.blockExec.ValidateBlock(cs.state, cs.ProposalBlock); err != nil { 1440 panic(fmt.Sprintf("precommit step; +2/3 prevoted for an invalid block: %v", err)) 1441 } 1442 1443 cs.LockedRound = round 1444 cs.LockedBlock = cs.ProposalBlock 1445 cs.LockedBlockParts = cs.ProposalBlockParts 1446 1447 if err := cs.eventBus.PublishEventLock(cs.RoundStateEvent()); err != nil { 1448 logger.Error("failed publishing event lock", "err", err) 1449 } 1450 1451 cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) 1452 return 1453 } 1454 1455 // There was a polka in this round for a block we don't have. 1456 // Fetch that block, unlock, and precommit nil. 1457 // The +2/3 prevotes for this round is the POL for our unlock. 1458 logger.Debug("precommit step; +2/3 prevotes for a block we do not have; voting nil", "block_id", blockID) 1459 1460 cs.LockedRound = -1 1461 cs.LockedBlock = nil 1462 cs.LockedBlockParts = nil 1463 1464 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1465 cs.ProposalBlock = nil 1466 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1467 } 1468 1469 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 1470 logger.Error("failed publishing event unlock", "err", err) 1471 } 1472 1473 cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) 1474 } 1475 1476 // Enter: any +2/3 precommits for next round. 1477 func (cs *State) enterPrecommitWait(height int64, round int32) { 1478 logger := cs.Logger.With("height", height, "round", round) 1479 1480 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1481 logger.Debug( 1482 "entering precommit wait step with invalid args", 1483 "triggered_timeout", cs.TriggeredTimeoutPrecommit, 1484 "current", fmt.Sprintf("%v/%v", cs.Height, cs.Round), 1485 ) 1486 return 1487 } 1488 1489 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1490 panic(fmt.Sprintf( 1491 "entering precommit wait step (%v/%v), but precommits does not have any +2/3 votes", 1492 height, round, 1493 )) 1494 } 1495 1496 logger.Debug("entering precommit wait step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1497 1498 defer func() { 1499 // Done enterPrecommitWait: 1500 cs.TriggeredTimeoutPrecommit = true 1501 cs.newStep() 1502 }() 1503 1504 // wait for some more precommits; enterNewRound 1505 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1506 } 1507 1508 // Enter: +2/3 precommits for block 1509 func (cs *State) enterCommit(height int64, commitRound int32) { 1510 logger := cs.Logger.With("height", height, "commit_round", commitRound) 1511 1512 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1513 logger.Debug( 1514 "entering commit step with invalid args", 1515 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1516 ) 1517 return 1518 } 1519 1520 logger.Debug("entering commit step", "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step)) 1521 1522 defer func() { 1523 // Done enterCommit: 1524 // keep cs.Round the same, commitRound points to the right Precommits set. 1525 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1526 cs.CommitRound = commitRound 1527 cs.CommitTime = tmtime.Now() 1528 cs.newStep() 1529 1530 // Maybe finalize immediately. 1531 cs.tryFinalizeCommit(height) 1532 }() 1533 1534 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1535 if !ok { 1536 panic("RunActionCommit() expects +2/3 precommits") 1537 } 1538 1539 // The Locked* fields no longer matter. 1540 // Move them over to ProposalBlock if they match the commit hash, 1541 // otherwise they'll be cleared in updateToState. 1542 if cs.LockedBlock.HashesTo(blockID.Hash) { 1543 logger.Debug("commit is for a locked block; set ProposalBlock=LockedBlock", "block_hash", blockID.Hash) 1544 cs.ProposalBlock = cs.LockedBlock 1545 cs.ProposalBlockParts = cs.LockedBlockParts 1546 } 1547 1548 // If we don't have the block being committed, set up to get it. 1549 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1550 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1551 logger.Info( 1552 "commit is for a block we do not know about; set ProposalBlock=nil", 1553 "proposal", cs.ProposalBlock.Hash(), 1554 "commit", blockID.Hash, 1555 ) 1556 1557 // We're getting the wrong block. 1558 // Set up ProposalBlockParts and keep waiting. 1559 cs.ProposalBlock = nil 1560 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1561 1562 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1563 logger.Error("failed publishing valid block", "err", err) 1564 } 1565 1566 cs.evsw.FireEvent(types.EventValidBlockValue, &cs.RoundState) 1567 } 1568 } 1569 } 1570 1571 // If we have the block AND +2/3 commits for it, finalize. 1572 func (cs *State) tryFinalizeCommit(height int64) { 1573 logger := cs.Logger.With("height", height) 1574 1575 if cs.Height != height { 1576 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1577 } 1578 1579 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1580 if !ok || len(blockID.Hash) == 0 { 1581 logger.Error("failed attempt to finalize commit; there was no +2/3 majority or +2/3 was for nil") 1582 return 1583 } 1584 1585 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1586 // TODO: this happens every time if we're not a validator (ugly logs) 1587 // TODO: ^^ wait, why does it matter that we're a validator? 1588 logger.Debug( 1589 "failed attempt to finalize commit; we do not have the commit block", 1590 "proposal_block", cs.ProposalBlock.Hash(), 1591 "commit_block", blockID.Hash, 1592 ) 1593 return 1594 } 1595 1596 cs.finalizeCommit(height) 1597 } 1598 1599 // Increment height and goto cstypes.RoundStepNewHeight 1600 func (cs *State) finalizeCommit(height int64) { 1601 logger := cs.Logger.With("height", height) 1602 1603 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1604 logger.Debug( 1605 "entering finalize commit step", 1606 "current", fmt.Sprintf("%v/%v/%v", cs.Height, cs.Round, cs.Step), 1607 ) 1608 return 1609 } 1610 1611 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1612 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1613 1614 if !ok { 1615 panic("cannot finalize commit; commit does not have 2/3 majority") 1616 } 1617 if !blockParts.HasHeader(blockID.PartSetHeader) { 1618 panic("expected ProposalBlockParts header to be commit header") 1619 } 1620 if !block.HashesTo(blockID.Hash) { 1621 panic("cannot finalize commit; proposal block does not hash to commit hash") 1622 } 1623 1624 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1625 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1626 } 1627 1628 logger.Info( 1629 "finalizing commit of block", 1630 "hash", block.Hash(), 1631 "root", block.AppHash, 1632 "num_txs", len(block.Txs), 1633 ) 1634 logger.Debug(fmt.Sprintf("%v", block)) 1635 1636 fail.Fail() // XXX 1637 1638 // Save to blockStore. 1639 if cs.blockStore.Height() < block.Height { 1640 // NOTE: the seenCommit is local justification to commit this block, 1641 // but may differ from the LastCommit included in the next block 1642 precommits := cs.Votes.Precommits(cs.CommitRound) 1643 seenCommit := precommits.MakeCommit() 1644 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1645 } else { 1646 // Happens during replay if we already saved the block but didn't commit 1647 logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1648 } 1649 1650 fail.Fail() // XXX 1651 1652 // Write EndHeightMessage{} for this height, implying that the blockstore 1653 // has saved the block. 1654 // 1655 // If we crash before writing this EndHeightMessage{}, we will recover by 1656 // running ApplyBlock during the ABCI handshake when we restart. If we 1657 // didn't save the block to the blockstore before writing 1658 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1659 // complains about replaying for heights where an #ENDHEIGHT entry already 1660 // exists. 1661 // 1662 // Either way, the State should not be resumed until we 1663 // successfully call ApplyBlock (ie. later here, or in Handshake after 1664 // restart). 1665 endMsg := EndHeightMessage{height} 1666 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1667 panic(fmt.Sprintf( 1668 "failed to write %v msg to consensus WAL due to %v; check your file system and restart the node", 1669 endMsg, err, 1670 )) 1671 } 1672 1673 fail.Fail() // XXX 1674 1675 // Create a copy of the state for staging and an event cache for txs. 1676 stateCopy := cs.state.Copy() 1677 1678 // Execute and commit the block, update and save the state, and update the mempool. 1679 // NOTE The block.AppHash wont reflect these txs until the next block. 1680 stateCopy, err := cs.blockExec.ApplyBlock( 1681 stateCopy, 1682 types.BlockID{ 1683 Hash: block.Hash(), 1684 PartSetHeader: blockParts.Header(), 1685 }, 1686 block, 1687 ) 1688 if err != nil { 1689 logger.Error("failed to apply block", "err", err) 1690 return 1691 } 1692 1693 fail.Fail() // XXX 1694 1695 // must be called before we update state 1696 cs.RecordMetrics(height, block) 1697 1698 // NewHeightStep! 1699 cs.updateToState(stateCopy) 1700 1701 fail.Fail() // XXX 1702 1703 // Private validator might have changed it's key pair => refetch pubkey. 1704 if err := cs.updatePrivValidatorPubKey(); err != nil { 1705 logger.Error("failed to get private validator pubkey", "err", err) 1706 } 1707 1708 // cs.StartTime is already set. 1709 // Schedule Round0 to start soon. 1710 cs.scheduleRound0(&cs.RoundState) 1711 1712 // By here, 1713 // * cs.Height has been increment to height+1 1714 // * cs.Step is now cstypes.RoundStepNewHeight 1715 // * cs.StartTime is set to when we will start round0. 1716 } 1717 1718 func (cs *State) RecordMetrics(height int64, block *types.Block) { 1719 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1720 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1721 1722 var ( 1723 missingValidators int 1724 missingValidatorsPower int64 1725 ) 1726 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1727 // Remember that the first LastCommit is intentionally empty, so it's not 1728 // fair to increment missing validators number. 1729 if height > cs.state.InitialHeight { 1730 // Sanity check that commit size matches validator set size - only applies 1731 // after first block. 1732 var ( 1733 commitSize = block.LastCommit.Size() 1734 valSetLen = len(cs.LastValidators.Validators) 1735 address types.Address 1736 ) 1737 if commitSize != valSetLen { 1738 cs.Logger.Error(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1739 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1740 return 1741 } 1742 1743 if cs.privValidator != nil { 1744 if cs.privValidatorPubKey == nil { 1745 // Metrics won't be updated, but it's not critical. 1746 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1747 } else { 1748 address = cs.privValidatorPubKey.Address() 1749 } 1750 } 1751 1752 for i, val := range cs.LastValidators.Validators { 1753 commitSig := block.LastCommit.Signatures[i] 1754 if commitSig.Absent() { 1755 missingValidators++ 1756 missingValidatorsPower += val.VotingPower 1757 } 1758 1759 if bytes.Equal(val.Address, address) { 1760 label := []string{ 1761 "validator_address", val.Address.String(), 1762 } 1763 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1764 if commitSig.ForBlock() { 1765 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1766 } else { 1767 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1768 } 1769 } 1770 1771 } 1772 } 1773 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1774 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1775 1776 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1777 var ( 1778 byzantineValidatorsPower int64 1779 byzantineValidatorsCount int64 1780 ) 1781 1782 for _, ev := range block.Evidence.Evidence { 1783 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1784 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1785 byzantineValidatorsCount++ 1786 byzantineValidatorsPower += val.VotingPower 1787 } 1788 } 1789 } 1790 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1791 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1792 1793 if height > 1 { 1794 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1795 if lastBlockMeta != nil { 1796 cs.metrics.BlockIntervalSeconds.Observe( 1797 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1798 ) 1799 } 1800 } 1801 1802 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1803 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1804 cs.metrics.BlockSizeBytes.Observe(float64(block.Size())) 1805 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1806 } 1807 1808 //----------------------------------------------------------------------------- 1809 1810 func (cs *State) defaultSetProposal(proposal *types.Proposal) error { 1811 // Already have one 1812 // TODO: possibly catch double proposals 1813 if cs.Proposal != nil { 1814 return nil 1815 } 1816 1817 // Does not apply 1818 if proposal.Height != cs.Height || proposal.Round != cs.Round { 1819 return nil 1820 } 1821 1822 // Verify POLRound, which must be -1 or in range [0, proposal.Round). 1823 if proposal.POLRound < -1 || 1824 (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { 1825 return ErrInvalidProposalPOLRound 1826 } 1827 1828 p := proposal.ToProto() 1829 // Verify signature 1830 if !cs.Validators.GetProposer().PubKey.VerifySignature( 1831 types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature, 1832 ) { 1833 return ErrInvalidProposalSignature 1834 } 1835 1836 proposal.Signature = p.Signature 1837 cs.Proposal = proposal 1838 // We don't update cs.ProposalBlockParts if it is already set. 1839 // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. 1840 // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! 1841 if cs.ProposalBlockParts == nil { 1842 cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) 1843 } 1844 1845 cs.Logger.Info("received proposal", "proposal", proposal) 1846 return nil 1847 } 1848 1849 // NOTE: block is not necessarily valid. 1850 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1851 // once we have the full block. 1852 func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID types.NodeID) (added bool, err error) { 1853 height, round, part := msg.Height, msg.Round, msg.Part 1854 1855 // Blocks might be reused, so round mismatch is OK 1856 if cs.Height != height { 1857 cs.Logger.Debug("received block part from wrong height", "height", height, "round", round) 1858 return false, nil 1859 } 1860 1861 // We're not expecting a block part. 1862 if cs.ProposalBlockParts == nil { 1863 // NOTE: this can happen when we've gone to a higher round and 1864 // then receive parts from the previous round - not necessarily a bad peer. 1865 cs.Logger.Debug( 1866 "received a block part when we are not expecting any", 1867 "height", height, 1868 "round", round, 1869 "index", part.Index, 1870 "peer", peerID, 1871 ) 1872 return false, nil 1873 } 1874 1875 added, err = cs.ProposalBlockParts.AddPart(part) 1876 if err != nil { 1877 return added, err 1878 } 1879 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1880 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1881 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1882 ) 1883 } 1884 if added && cs.ProposalBlockParts.IsComplete() { 1885 bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) 1886 if err != nil { 1887 return added, err 1888 } 1889 1890 var pbb = new(tmproto.Block) 1891 err = proto.Unmarshal(bz, pbb) 1892 if err != nil { 1893 return added, err 1894 } 1895 1896 block, err := types.BlockFromProto(pbb) 1897 if err != nil { 1898 return added, err 1899 } 1900 1901 cs.ProposalBlock = block 1902 1903 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1904 cs.Logger.Info("received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1905 1906 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1907 cs.Logger.Error("failed publishing event complete proposal", "err", err) 1908 } 1909 1910 // Update Valid* if we can. 1911 prevotes := cs.Votes.Prevotes(cs.Round) 1912 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1913 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1914 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1915 cs.Logger.Debug( 1916 "updating valid block to new proposal block", 1917 "valid_round", cs.Round, 1918 "valid_block_hash", cs.ProposalBlock.Hash(), 1919 ) 1920 1921 cs.ValidRound = cs.Round 1922 cs.ValidBlock = cs.ProposalBlock 1923 cs.ValidBlockParts = cs.ProposalBlockParts 1924 } 1925 // TODO: In case there is +2/3 majority in Prevotes set for some 1926 // block and cs.ProposalBlock contains different block, either 1927 // proposer is faulty or voting power of faulty processes is more 1928 // than 1/3. We should trigger in the future accountability 1929 // procedure at this point. 1930 } 1931 1932 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1933 // Move onto the next step 1934 cs.enterPrevote(height, cs.Round) 1935 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 1936 cs.enterPrecommit(height, cs.Round) 1937 } 1938 } else if cs.Step == cstypes.RoundStepCommit { 1939 // If we're waiting on the proposal block... 1940 cs.tryFinalizeCommit(height) 1941 } 1942 1943 return added, nil 1944 } 1945 1946 return added, nil 1947 } 1948 1949 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 1950 func (cs *State) tryAddVote(vote *types.Vote, peerID types.NodeID) (bool, error) { 1951 added, err := cs.addVote(vote, peerID) 1952 if err != nil { 1953 // If the vote height is off, we'll just ignore it, 1954 // But if it's a conflicting sig, add it to the cs.evpool. 1955 // If it's otherwise invalid, punish peer. 1956 // nolint: gocritic 1957 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 1958 if cs.privValidatorPubKey == nil { 1959 return false, errPubKeyIsNotSet 1960 } 1961 1962 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 1963 cs.Logger.Error( 1964 "found conflicting vote from ourselves; did you unsafe_reset a validator?", 1965 "height", vote.Height, 1966 "round", vote.Round, 1967 "type", vote.Type, 1968 ) 1969 1970 return added, err 1971 } 1972 1973 // report conflicting votes to the evidence pool 1974 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 1975 cs.Logger.Debug( 1976 "found and sent conflicting votes to the evidence pool", 1977 "vote_a", voteErr.VoteA, 1978 "vote_b", voteErr.VoteB, 1979 ) 1980 1981 return added, err 1982 } else if errors.Is(err, types.ErrVoteNonDeterministicSignature) { 1983 cs.Logger.Debug("vote has non-deterministic signature", "err", err) 1984 } else { 1985 // Either 1986 // 1) bad peer OR 1987 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 1988 // 3) tmkms use with multiple validators connecting to a single tmkms instance 1989 // (https://bitbucket.org/number571/tendermint/issues/3839). 1990 cs.Logger.Info("failed attempting to add vote", "err", err) 1991 return added, ErrAddingVote 1992 } 1993 } 1994 1995 return added, nil 1996 } 1997 1998 func (cs *State) addVote(vote *types.Vote, peerID types.NodeID) (added bool, err error) { 1999 cs.Logger.Debug( 2000 "adding vote", 2001 "vote_height", vote.Height, 2002 "vote_type", vote.Type, 2003 "val_index", vote.ValidatorIndex, 2004 "cs_height", cs.Height, 2005 ) 2006 2007 // A precommit for the previous height? 2008 // These come in while we wait timeoutCommit 2009 if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { 2010 if cs.Step != cstypes.RoundStepNewHeight { 2011 // Late precommit at prior height is ignored 2012 cs.Logger.Debug("precommit vote came in after commit timeout and has been ignored", "vote", vote) 2013 return 2014 } 2015 2016 added, err = cs.LastCommit.AddVote(vote) 2017 if !added { 2018 return 2019 } 2020 2021 cs.Logger.Debug("added vote to last precommits", "last_commit", cs.LastCommit.StringShort()) 2022 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2023 return added, err 2024 } 2025 2026 cs.evsw.FireEvent(types.EventVoteValue, vote) 2027 2028 // if we can skip timeoutCommit and have all the votes now, 2029 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 2030 // go straight to new round (skip timeout commit) 2031 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 2032 cs.enterNewRound(cs.Height, 0) 2033 } 2034 2035 return 2036 } 2037 2038 // Height mismatch is ignored. 2039 // Not necessarily a bad peer, but not favorable behavior. 2040 if vote.Height != cs.Height { 2041 cs.Logger.Debug("vote ignored and not added", "vote_height", vote.Height, "cs_height", cs.Height, "peer", peerID) 2042 return 2043 } 2044 2045 height := cs.Height 2046 added, err = cs.Votes.AddVote(vote, peerID) 2047 if !added { 2048 // Either duplicate, or error upon cs.Votes.AddByIndex() 2049 return 2050 } 2051 2052 if err := cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}); err != nil { 2053 return added, err 2054 } 2055 cs.evsw.FireEvent(types.EventVoteValue, vote) 2056 2057 switch vote.Type { 2058 case tmproto.PrevoteType: 2059 prevotes := cs.Votes.Prevotes(vote.Round) 2060 cs.Logger.Debug("added vote to prevote", "vote", vote, "prevotes", prevotes.StringShort()) 2061 2062 // If +2/3 prevotes for a block or nil for *any* round: 2063 if blockID, ok := prevotes.TwoThirdsMajority(); ok { 2064 // There was a polka! 2065 // If we're locked but this is a recent polka, unlock. 2066 // If it matches our ProposalBlock, update the ValidBlock 2067 2068 // Unlock if `cs.LockedRound < vote.Round <= cs.Round` 2069 // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round 2070 if (cs.LockedBlock != nil) && 2071 (cs.LockedRound < vote.Round) && 2072 (vote.Round <= cs.Round) && 2073 !cs.LockedBlock.HashesTo(blockID.Hash) { 2074 2075 cs.Logger.Debug("unlocking because of POL", "locked_round", cs.LockedRound, "pol_round", vote.Round) 2076 2077 cs.LockedRound = -1 2078 cs.LockedBlock = nil 2079 cs.LockedBlockParts = nil 2080 2081 if err := cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()); err != nil { 2082 return added, err 2083 } 2084 } 2085 2086 // Update Valid* if we can. 2087 // NOTE: our proposal block may be nil or not what received a polka.. 2088 if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { 2089 if cs.ProposalBlock.HashesTo(blockID.Hash) { 2090 cs.Logger.Debug("updating valid block because of POL", "valid_round", cs.ValidRound, "pol_round", vote.Round) 2091 cs.ValidRound = vote.Round 2092 cs.ValidBlock = cs.ProposalBlock 2093 cs.ValidBlockParts = cs.ProposalBlockParts 2094 } else { 2095 cs.Logger.Debug( 2096 "valid block we do not know about; set ProposalBlock=nil", 2097 "proposal", cs.ProposalBlock.Hash(), 2098 "block_id", blockID.Hash, 2099 ) 2100 2101 // we're getting the wrong block 2102 cs.ProposalBlock = nil 2103 } 2104 2105 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 2106 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 2107 } 2108 2109 cs.evsw.FireEvent(types.EventValidBlockValue, &cs.RoundState) 2110 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 2111 return added, err 2112 } 2113 } 2114 } 2115 2116 // If +2/3 prevotes for *anything* for future round: 2117 switch { 2118 case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): 2119 // Round-skip if there is any 2/3+ of votes ahead of us 2120 cs.enterNewRound(height, vote.Round) 2121 2122 case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round 2123 blockID, ok := prevotes.TwoThirdsMajority() 2124 if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { 2125 cs.enterPrecommit(height, vote.Round) 2126 } else if prevotes.HasTwoThirdsAny() { 2127 cs.enterPrevoteWait(height, vote.Round) 2128 } 2129 2130 case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: 2131 // If the proposal is now complete, enter prevote of cs.Round. 2132 if cs.isProposalComplete() { 2133 cs.enterPrevote(height, cs.Round) 2134 } 2135 } 2136 2137 case tmproto.PrecommitType: 2138 precommits := cs.Votes.Precommits(vote.Round) 2139 cs.Logger.Debug("added vote to precommit", 2140 "height", vote.Height, 2141 "round", vote.Round, 2142 "validator", vote.ValidatorAddress.String(), 2143 "vote_timestamp", vote.Timestamp, 2144 "data", precommits.LogString()) 2145 2146 blockID, ok := precommits.TwoThirdsMajority() 2147 if ok { 2148 // Executed as TwoThirdsMajority could be from a higher round 2149 cs.enterNewRound(height, vote.Round) 2150 cs.enterPrecommit(height, vote.Round) 2151 2152 if len(blockID.Hash) != 0 { 2153 cs.enterCommit(height, vote.Round) 2154 if cs.config.SkipTimeoutCommit && precommits.HasAll() { 2155 cs.enterNewRound(cs.Height, 0) 2156 } 2157 } else { 2158 cs.enterPrecommitWait(height, vote.Round) 2159 } 2160 } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { 2161 cs.enterNewRound(height, vote.Round) 2162 cs.enterPrecommitWait(height, vote.Round) 2163 } 2164 2165 default: 2166 panic(fmt.Sprintf("unexpected vote type %v", vote.Type)) 2167 } 2168 2169 return added, err 2170 } 2171 2172 // CONTRACT: cs.privValidator is not nil. 2173 func (cs *State) signVote( 2174 msgType tmproto.SignedMsgType, 2175 hash []byte, 2176 header types.PartSetHeader, 2177 ) (*types.Vote, error) { 2178 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 2179 // and the privValidator will refuse to sign anything. 2180 if err := cs.wal.FlushAndSync(); err != nil { 2181 return nil, err 2182 } 2183 2184 if cs.privValidatorPubKey == nil { 2185 return nil, errPubKeyIsNotSet 2186 } 2187 2188 addr := cs.privValidatorPubKey.Address() 2189 valIdx, _ := cs.Validators.GetByAddress(addr) 2190 2191 vote := &types.Vote{ 2192 ValidatorAddress: addr, 2193 ValidatorIndex: valIdx, 2194 Height: cs.Height, 2195 Round: cs.Round, 2196 Timestamp: cs.voteTime(), 2197 Type: msgType, 2198 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 2199 } 2200 2201 v := vote.ToProto() 2202 2203 // If the signedMessageType is for precommit, 2204 // use our local precommit Timeout as the max wait time for getting a singed commit. The same goes for prevote. 2205 var timeout time.Duration 2206 2207 switch msgType { 2208 case tmproto.PrecommitType: 2209 timeout = cs.config.TimeoutPrecommit 2210 case tmproto.PrevoteType: 2211 timeout = cs.config.TimeoutPrevote 2212 default: 2213 timeout = time.Second 2214 } 2215 2216 ctx, cancel := context.WithTimeout(context.TODO(), timeout) 2217 defer cancel() 2218 2219 err := cs.privValidator.SignVote(ctx, cs.state.ChainID, v) 2220 vote.Signature = v.Signature 2221 2222 return vote, err 2223 } 2224 2225 // voteTime ensures monotonicity of the time a validator votes on. 2226 // It ensures that for a prior block with a BFT-timestamp of T, 2227 // any vote from this validator will have time at least time T + 1ms. 2228 // This is needed, as monotonicity of time is a guarantee that BFT time provides. 2229 func (cs *State) voteTime() time.Time { 2230 now := tmtime.Now() 2231 minVoteTime := now 2232 // Minimum time increment between blocks 2233 const timeIota = time.Millisecond 2234 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 2235 // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/. 2236 if cs.LockedBlock != nil { 2237 // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html 2238 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 2239 } else if cs.ProposalBlock != nil { 2240 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 2241 } 2242 2243 if now.After(minVoteTime) { 2244 return now 2245 } 2246 return minVoteTime 2247 } 2248 2249 // sign the vote and publish on internalMsgQueue 2250 func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 2251 if cs.privValidator == nil { // the node does not have a key 2252 return nil 2253 } 2254 2255 if cs.privValidatorPubKey == nil { 2256 // Vote won't be signed, but it's not critical. 2257 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 2258 return nil 2259 } 2260 2261 // If the node not in the validator set, do nothing. 2262 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 2263 return nil 2264 } 2265 2266 // TODO: pass pubKey to signVote 2267 vote, err := cs.signVote(msgType, hash, header) 2268 if err == nil { 2269 cs.sendInternalMessage(msgInfo{&VoteMessage{vote}, ""}) 2270 cs.Logger.Debug("signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 2271 return vote 2272 } 2273 2274 cs.Logger.Error("failed signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 2275 return nil 2276 } 2277 2278 // updatePrivValidatorPubKey get's the private validator public key and 2279 // memoizes it. This func returns an error if the private validator is not 2280 // responding or responds with an error. 2281 func (cs *State) updatePrivValidatorPubKey() error { 2282 if cs.privValidator == nil { 2283 return nil 2284 } 2285 2286 var timeout time.Duration 2287 if cs.config.TimeoutPrecommit > cs.config.TimeoutPrevote { 2288 timeout = cs.config.TimeoutPrecommit 2289 } else { 2290 timeout = cs.config.TimeoutPrevote 2291 } 2292 2293 // no GetPubKey retry beyond the proposal/voting in RetrySignerClient 2294 if cs.Step >= cstypes.RoundStepPrecommit && cs.privValidatorType == types.RetrySignerClient { 2295 timeout = 0 2296 } 2297 2298 // set context timeout depending on the configuration and the State step, 2299 // this helps in avoiding blocking of the remote signer connection. 2300 ctx, cancel := context.WithTimeout(context.TODO(), timeout) 2301 defer cancel() 2302 pubKey, err := cs.privValidator.GetPubKey(ctx) 2303 if err != nil { 2304 return err 2305 } 2306 cs.privValidatorPubKey = pubKey 2307 return nil 2308 } 2309 2310 // look back to check existence of the node's consensus votes before joining consensus 2311 func (cs *State) checkDoubleSigningRisk(height int64) error { 2312 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 2313 valAddr := cs.privValidatorPubKey.Address() 2314 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 2315 if doubleSignCheckHeight > height { 2316 doubleSignCheckHeight = height 2317 } 2318 2319 for i := int64(1); i < doubleSignCheckHeight; i++ { 2320 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 2321 if lastCommit != nil { 2322 for sigIdx, s := range lastCommit.Signatures { 2323 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 2324 cs.Logger.Info("found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 2325 return ErrSignatureFoundInPastBlocks 2326 } 2327 } 2328 } 2329 } 2330 } 2331 2332 return nil 2333 } 2334 2335 //--------------------------------------------------------- 2336 2337 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 2338 if h1 < h2 { 2339 return -1 2340 } else if h1 > h2 { 2341 return 1 2342 } 2343 if r1 < r2 { 2344 return -1 2345 } else if r1 > r2 { 2346 return 1 2347 } 2348 if s1 < s2 { 2349 return -1 2350 } else if s1 > s2 { 2351 return 1 2352 } 2353 return 0 2354 } 2355 2356 // repairWalFile decodes messages from src (until the decoder errors) and 2357 // writes them to dst. 2358 func repairWalFile(src, dst string) error { 2359 in, err := os.Open(src) 2360 if err != nil { 2361 return err 2362 } 2363 defer in.Close() 2364 2365 out, err := os.Create(dst) 2366 if err != nil { 2367 return err 2368 } 2369 defer out.Close() 2370 2371 var ( 2372 dec = NewWALDecoder(in) 2373 enc = NewWALEncoder(out) 2374 ) 2375 2376 // best-case repair (until first error is encountered) 2377 for { 2378 msg, err := dec.Decode() 2379 if err != nil { 2380 break 2381 } 2382 2383 err = enc.Encode(msg) 2384 if err != nil { 2385 return fmt.Errorf("failed to encode msg: %w", err) 2386 } 2387 } 2388 2389 return nil 2390 }