github.com/vipernet-xyz/tm@v0.34.24/test/maverick/consensus/state.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "reflect" 10 "runtime/debug" 11 "sync" 12 "time" 13 14 "github.com/gogo/protobuf/proto" 15 16 cfg "github.com/vipernet-xyz/tm/config" 17 tmcon "github.com/vipernet-xyz/tm/consensus" 18 cstypes "github.com/vipernet-xyz/tm/consensus/types" 19 "github.com/vipernet-xyz/tm/crypto" 20 tmevents "github.com/vipernet-xyz/tm/libs/events" 21 "github.com/vipernet-xyz/tm/libs/fail" 22 tmjson "github.com/vipernet-xyz/tm/libs/json" 23 "github.com/vipernet-xyz/tm/libs/log" 24 tmmath "github.com/vipernet-xyz/tm/libs/math" 25 tmos "github.com/vipernet-xyz/tm/libs/os" 26 "github.com/vipernet-xyz/tm/libs/service" 27 "github.com/vipernet-xyz/tm/p2p" 28 tmproto "github.com/vipernet-xyz/tm/proto/tendermint/types" 29 sm "github.com/vipernet-xyz/tm/state" 30 "github.com/vipernet-xyz/tm/types" 31 tmtime "github.com/vipernet-xyz/tm/types/time" 32 ) 33 34 // State handles execution of the consensus algorithm. 35 // It processes votes and proposals, and upon reaching agreement, 36 // commits blocks to the chain and executes them against the application. 37 // The internal state machine receives input from peers, the internal validator, and from a timer. 38 type State struct { 39 service.BaseService 40 41 // config details 42 config *cfg.ConsensusConfig 43 privValidator types.PrivValidator // for signing votes 44 45 // store blocks and commits 46 blockStore sm.BlockStore 47 48 // create and execute blocks 49 blockExec *sm.BlockExecutor 50 51 // notify us if txs are available 52 txNotifier txNotifier 53 54 // add evidence to the pool 55 // when it's detected 56 evpool evidencePool 57 58 // internal state 59 mtx sync.RWMutex 60 cstypes.RoundState 61 state sm.State // State until height-1. 62 63 // state changes may be triggered by: msgs from peers, 64 // msgs from ourself, or by timeouts 65 peerMsgQueue chan msgInfo 66 internalMsgQueue chan msgInfo 67 timeoutTicker TimeoutTicker 68 // privValidator pubkey, memoized for the duration of one block 69 // to avoid extra requests to HSM 70 privValidatorPubKey crypto.PubKey 71 72 // information about about added votes and block parts are written on this channel 73 // so statistics can be computed by reactor 74 statsMsgQueue chan msgInfo 75 76 // we use eventBus to trigger msg broadcasts in the reactor, 77 // and to notify external subscribers, eg. through a websocket 78 eventBus *types.EventBus 79 80 // a Write-Ahead Log ensures we can recover from any kind of crash 81 // and helps us avoid signing conflicting votes 82 wal tmcon.WAL 83 replayMode bool // so we don't log signing errors during replay 84 doWALCatchup bool // determines if we even try to do the catchup 85 86 // for tests where we want to limit the number of transitions the state makes 87 nSteps int 88 89 // some functions can be overwritten for testing 90 decideProposal func(height int64, round int32) 91 92 // closed when we finish shutting down 93 done chan struct{} 94 95 // synchronous pubsub between consensus state and reactor. 96 // state only emits EventNewRoundStep and EventVote 97 evsw tmevents.EventSwitch 98 99 // for reporting metrics 100 metrics *tmcon.Metrics 101 102 // misbehaviors mapped for each height (can't have more than one misbehavior per height) 103 misbehaviors map[int64]Misbehavior 104 105 // the switch is passed to the state so that maveick misbehaviors can directly control which 106 // information they send to which nodes 107 sw *p2p.Switch 108 } 109 110 // StateOption sets an optional parameter on the State. 111 type StateOption func(*State) 112 113 // NewState returns a new State. 114 func NewState( 115 config *cfg.ConsensusConfig, 116 state sm.State, 117 blockExec *sm.BlockExecutor, 118 blockStore sm.BlockStore, 119 txNotifier txNotifier, 120 evpool evidencePool, 121 misbehaviors map[int64]Misbehavior, 122 options ...StateOption, 123 ) *State { 124 cs := &State{ 125 config: config, 126 blockExec: blockExec, 127 blockStore: blockStore, 128 txNotifier: txNotifier, 129 peerMsgQueue: make(chan msgInfo, msgQueueSize), 130 internalMsgQueue: make(chan msgInfo, msgQueueSize), 131 timeoutTicker: NewTimeoutTicker(), 132 statsMsgQueue: make(chan msgInfo, msgQueueSize), 133 done: make(chan struct{}), 134 doWALCatchup: true, 135 wal: nilWAL{}, 136 evpool: evpool, 137 evsw: tmevents.NewEventSwitch(), 138 metrics: tmcon.NopMetrics(), 139 misbehaviors: misbehaviors, 140 } 141 // set function defaults (may be overwritten before calling Start) 142 cs.decideProposal = cs.defaultDecideProposal 143 144 // We have no votes, so reconstruct LastCommit from SeenCommit. 145 if state.LastBlockHeight > 0 { 146 cs.reconstructLastCommit(state) 147 } 148 149 cs.updateToState(state) 150 151 // Don't call scheduleRound0 yet. 152 // We do that upon Start(). 153 154 cs.BaseService = *service.NewBaseService(nil, "State", cs) 155 for _, option := range options { 156 option(cs) 157 } 158 return cs 159 } 160 161 // I know this is not great but the maverick consensus state needs access to the peers 162 func (cs *State) SetSwitch(sw *p2p.Switch) { 163 cs.sw = sw 164 } 165 166 // state transitions on complete-proposal, 2/3-any, 2/3-one 167 func (cs *State) handleMsg(mi msgInfo) { 168 cs.mtx.Lock() 169 defer cs.mtx.Unlock() 170 171 var ( 172 added bool 173 err error 174 ) 175 msg, peerID := mi.Msg, mi.PeerID 176 switch msg := msg.(type) { 177 case *tmcon.ProposalMessage: 178 // will not cause transition. 179 // once proposal is set, we can receive block parts 180 // err = cs.setProposal(msg.Proposal) 181 if b, ok := cs.misbehaviors[cs.Height]; ok { 182 err = b.ReceiveProposal(cs, msg.Proposal) 183 } else { 184 err = defaultReceiveProposal(cs, msg.Proposal) 185 } 186 case *tmcon.BlockPartMessage: 187 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 188 added, err = cs.addProposalBlockPart(msg, peerID) 189 if added { 190 cs.statsMsgQueue <- mi 191 } 192 193 if err != nil && msg.Round != cs.Round { 194 cs.Logger.Debug( 195 "Received block part from wrong round", 196 "height", 197 cs.Height, 198 "csRound", 199 cs.Round, 200 "blockRound", 201 msg.Round) 202 err = nil 203 } 204 case *tmcon.VoteMessage: 205 // attempt to add the vote and dupeout the validator if its a duplicate signature 206 // if the vote gives us a 2/3-any or 2/3-one, we transition 207 added, err = cs.tryAddVote(msg.Vote, peerID) 208 if added { 209 cs.statsMsgQueue <- mi 210 } 211 212 // if err == ErrAddingVote { 213 // TODO: punish peer 214 // We probably don't want to stop the peer here. The vote does not 215 // necessarily comes from a malicious peer but can be just broadcasted by 216 // a typical peer. 217 // https://github.com/vipernet-xyz/tm/issues/1281 218 // } 219 220 // NOTE: the vote is broadcast to peers by the reactor listening 221 // for vote events 222 223 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 224 // the peer is sending us CatchupCommit precommits. 225 // We could make note of this and help filter in broadcastHasVoteMessage(). 226 default: 227 cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg)) 228 return 229 } 230 231 if err != nil { 232 cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round, 233 "peer", peerID, "err", err, "msg", msg) 234 } 235 } 236 237 // Enter (CreateEmptyBlocks): from enterNewRound(height,round) 238 // Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): 239 // 240 // after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval 241 // 242 // Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool 243 func (cs *State) enterPropose(height int64, round int32) { 244 logger := cs.Logger.With("height", height, "round", round) 245 246 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { 247 logger.Debug("enter propose", "msg", log.NewLazySprintf( 248 "enterPropose(%v/%v): Invalid args. Current step: %v/%v/%v", 249 height, 250 round, 251 cs.Height, 252 cs.Round, 253 cs.Step)) 254 return 255 } 256 logger.Info("enter propose", 257 "msg", 258 log.NewLazySprintf("enterPropose(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 259 260 defer func() { 261 // Done enterPropose: 262 cs.updateRoundStep(round, cstypes.RoundStepPropose) 263 cs.newStep() 264 265 // If we have the whole proposal + POL, then goto Prevote now. 266 // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), 267 // or else after timeoutPropose 268 if cs.isProposalComplete() { 269 cs.enterPrevote(height, cs.Round) 270 } 271 }() 272 273 if b, ok := cs.misbehaviors[cs.Height]; ok { 274 b.EnterPropose(cs, height, round) 275 } else { 276 defaultEnterPropose(cs, height, round) 277 } 278 } 279 280 // Enter: `timeoutPropose` after entering Propose. 281 // Enter: proposal block and POL is ready. 282 // Prevote for LockedBlock if we're locked, or ProposalBlock if valid. 283 // Otherwise vote nil. 284 func (cs *State) enterPrevote(height int64, round int32) { 285 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { 286 cs.Logger.Debug("enter prevote", "msg", log.NewLazySprintf( 287 "enterPrevote(%v/%v): Invalid args. Current step: %v/%v/%v", 288 height, 289 round, 290 cs.Height, 291 cs.Round, 292 cs.Step)) 293 return 294 } 295 296 defer func() { 297 // Done enterPrevote: 298 cs.updateRoundStep(round, cstypes.RoundStepPrevote) 299 cs.newStep() 300 }() 301 302 cs.Logger.Debug("enter prevote", 303 "msg", 304 log.NewLazySprintf("enterPrevote(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 305 306 // Sign and broadcast vote as necessary 307 if b, ok := cs.misbehaviors[cs.Height]; ok { 308 b.EnterPrevote(cs, height, round) 309 } else { 310 defaultEnterPrevote(cs, height, round) 311 } 312 313 // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait 314 // (so we have more time to try and collect +2/3 prevotes for a single block) 315 } 316 317 // Enter: `timeoutPrevote` after any +2/3 prevotes. 318 // Enter: `timeoutPrecommit` after any +2/3 precommits. 319 // Enter: +2/3 precomits for block or nil. 320 // Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) 321 // else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, 322 // else, precommit nil otherwise. 323 func (cs *State) enterPrecommit(height int64, round int32) { 324 logger := cs.Logger.With("height", height, "round", round) 325 326 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { 327 logger.Debug("enter precommit", 328 "msg", 329 log.NewLazySprintf("enterPrecommit(%v/%v): Invalid args. Current step: %v/%v/%v", 330 height, 331 round, 332 cs.Height, 333 cs.Round, 334 cs.Step)) 335 return 336 } 337 338 logger.Info("enter precommit", 339 "msg", 340 log.NewLazySprintf("enterPrecommit(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 341 342 defer func() { 343 // Done enterPrecommit: 344 cs.updateRoundStep(round, cstypes.RoundStepPrecommit) 345 cs.newStep() 346 }() 347 348 if b, ok := cs.misbehaviors[cs.Height]; ok { 349 b.EnterPrecommit(cs, height, round) 350 } else { 351 defaultEnterPrecommit(cs, height, round) 352 } 353 } 354 355 func (cs *State) addVote( 356 vote *types.Vote, 357 peerID p2p.ID, 358 ) (added bool, err error) { 359 cs.Logger.Debug( 360 "addVote", 361 "voteHeight", 362 vote.Height, 363 "voteType", 364 vote.Type, 365 "valIndex", 366 vote.ValidatorIndex, 367 "csHeight", 368 cs.Height, 369 ) 370 371 // A precommit for the previous height? 372 // These come in while we wait timeoutCommit 373 if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { 374 if cs.Step != cstypes.RoundStepNewHeight { 375 // Late precommit at prior height is ignored 376 cs.Logger.Debug("Precommit vote came in after commit timeout and has been ignored", "vote", vote) 377 return 378 } 379 added, err = cs.LastCommit.AddVote(vote) 380 if !added { 381 return 382 } 383 384 cs.Logger.Info("add vote", 385 "msg", 386 log.NewLazySprintf("Added to lastPrecommits: %v", cs.LastCommit.StringShort())) 387 _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) 388 cs.evsw.FireEvent(types.EventVote, vote) 389 390 // if we can skip timeoutCommit and have all the votes now, 391 if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { 392 // go straight to new round (skip timeout commit) 393 // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) 394 cs.enterNewRound(cs.Height, 0) 395 } 396 397 return 398 } 399 400 // Height mismatch is ignored. 401 // Not necessarily a bad peer, but not favourable behaviour. 402 if vote.Height != cs.Height { 403 cs.Logger.Debug("vote ignored and not added", "voteHeight", vote.Height, "csHeight", cs.Height, "peerID", peerID) 404 return 405 } 406 407 added, err = cs.Votes.AddVote(vote, peerID) 408 if !added { 409 // Either duplicate, or error upon cs.Votes.AddByIndex() 410 return 411 } 412 413 _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) 414 cs.evsw.FireEvent(types.EventVote, vote) 415 416 switch vote.Type { 417 case tmproto.PrevoteType: 418 if b, ok := cs.misbehaviors[cs.Height]; ok { 419 b.ReceivePrevote(cs, vote) 420 } else { 421 defaultReceivePrevote(cs, vote) 422 } 423 424 case tmproto.PrecommitType: 425 if b, ok := cs.misbehaviors[cs.Height]; ok { 426 b.ReceivePrecommit(cs, vote) 427 } 428 defaultReceivePrecommit(cs, vote) 429 430 default: 431 panic(fmt.Sprintf("Unexpected vote type %v", vote.Type)) 432 } 433 434 return added, err 435 } 436 437 //----------------------------------------------------------------------------- 438 // Errors 439 440 var ( 441 ErrInvalidProposalSignature = errors.New("error invalid proposal signature") 442 ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") 443 ErrAddingVote = errors.New("error adding vote") 444 ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") 445 446 errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") 447 ) 448 449 //----------------------------------------------------------------------------- 450 451 var msgQueueSize = 1000 452 453 // msgs from the reactor which may update the state 454 type msgInfo struct { 455 Msg tmcon.Message `json:"msg"` 456 PeerID p2p.ID `json:"peer_key"` 457 } 458 459 // internally generated messages which may update the state 460 type timeoutInfo struct { 461 Duration time.Duration `json:"duration"` 462 Height int64 `json:"height"` 463 Round int32 `json:"round"` 464 Step cstypes.RoundStepType `json:"step"` 465 } 466 467 func (ti *timeoutInfo) String() string { 468 return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) 469 } 470 471 // interface to the mempool 472 type txNotifier interface { 473 TxsAvailable() <-chan struct{} 474 } 475 476 // interface to the evidence pool 477 type evidencePool interface { 478 // reports conflicting votes to the evidence pool to be processed into evidence 479 ReportConflictingVotes(voteA, voteB *types.Vote) 480 } 481 482 //---------------------------------------- 483 // Public interface 484 485 // SetLogger implements Service. 486 func (cs *State) SetLogger(l log.Logger) { 487 cs.BaseService.Logger = l 488 cs.timeoutTicker.SetLogger(l) 489 } 490 491 // SetEventBus sets event bus. 492 func (cs *State) SetEventBus(b *types.EventBus) { 493 cs.eventBus = b 494 cs.blockExec.SetEventBus(b) 495 } 496 497 // StateMetrics sets the metrics. 498 func StateMetrics(metrics *tmcon.Metrics) StateOption { 499 return func(cs *State) { cs.metrics = metrics } 500 } 501 502 // String returns a string. 503 func (cs *State) String() string { 504 // better not to access shared variables 505 return "ConsensusState" 506 } 507 508 // GetState returns a copy of the chain state. 509 func (cs *State) GetState() sm.State { 510 cs.mtx.RLock() 511 defer cs.mtx.RUnlock() 512 return cs.state.Copy() 513 } 514 515 // GetLastHeight returns the last height committed. 516 // If there were no blocks, returns 0. 517 func (cs *State) GetLastHeight() int64 { 518 cs.mtx.RLock() 519 defer cs.mtx.RUnlock() 520 return cs.RoundState.Height - 1 521 } 522 523 // GetRoundState returns a shallow copy of the internal consensus state. 524 func (cs *State) GetRoundState() *cstypes.RoundState { 525 cs.mtx.RLock() 526 rs := cs.RoundState // copy 527 cs.mtx.RUnlock() 528 return &rs 529 } 530 531 // GetRoundStateJSON returns a json of RoundState. 532 func (cs *State) GetRoundStateJSON() ([]byte, error) { 533 cs.mtx.RLock() 534 defer cs.mtx.RUnlock() 535 return tmjson.Marshal(cs.RoundState) 536 } 537 538 // GetRoundStateSimpleJSON returns a json of RoundStateSimple 539 func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { 540 cs.mtx.RLock() 541 defer cs.mtx.RUnlock() 542 return tmjson.Marshal(cs.RoundState.RoundStateSimple()) 543 } 544 545 // GetValidators returns a copy of the current validators. 546 func (cs *State) GetValidators() (int64, []*types.Validator) { 547 cs.mtx.RLock() 548 defer cs.mtx.RUnlock() 549 return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators 550 } 551 552 // SetPrivValidator sets the private validator account for signing votes. It 553 // immediately requests pubkey and caches it. 554 func (cs *State) SetPrivValidator(priv types.PrivValidator) { 555 cs.mtx.Lock() 556 defer cs.mtx.Unlock() 557 558 cs.privValidator = priv 559 560 if err := cs.updatePrivValidatorPubKey(); err != nil { 561 cs.Logger.Error("Can't get private validator pubkey", "err", err) 562 } 563 } 564 565 // SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing. 566 func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { 567 cs.mtx.Lock() 568 cs.timeoutTicker = timeoutTicker 569 cs.mtx.Unlock() 570 } 571 572 // LoadCommit loads the commit for a given height. 573 func (cs *State) LoadCommit(height int64) *types.Commit { 574 cs.mtx.RLock() 575 defer cs.mtx.RUnlock() 576 if height == cs.blockStore.Height() { 577 return cs.blockStore.LoadSeenCommit(height) 578 } 579 return cs.blockStore.LoadBlockCommit(height) 580 } 581 582 // OnStart loads the latest state via the WAL, and starts the timeout and 583 // receive routines. 584 func (cs *State) OnStart() error { 585 // We may set the WAL in testing before calling Start, so only OpenWAL if its 586 // still the nilWAL. 587 if _, ok := cs.wal.(nilWAL); ok { 588 if err := cs.loadWalFile(); err != nil { 589 return err 590 } 591 } 592 593 // We may have lost some votes if the process crashed reload from consensus 594 // log to catchup. 595 if cs.doWALCatchup { 596 repairAttempted := false 597 LOOP: 598 for { 599 err := cs.catchupReplay(cs.Height) 600 switch { 601 case err == nil: 602 break LOOP 603 case !IsDataCorruptionError(err): 604 cs.Logger.Error("Error on catchup replay. Proceeding to start State anyway", "err", err) 605 break LOOP 606 case repairAttempted: 607 return err 608 } 609 610 cs.Logger.Info("WAL file is corrupted. Attempting repair", "err", err) 611 612 // 1) prep work 613 if err := cs.wal.Stop(); err != nil { 614 return err 615 } 616 repairAttempted = true 617 618 // 2) backup original WAL file 619 corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) 620 if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { 621 return err 622 } 623 cs.Logger.Info("Backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) 624 625 // 3) try to repair (WAL file will be overwritten!) 626 if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { 627 cs.Logger.Error("Repair failed", "err", err) 628 return err 629 } 630 cs.Logger.Info("Successful repair") 631 632 // reload WAL file 633 if err := cs.loadWalFile(); err != nil { 634 return err 635 } 636 } 637 } 638 639 if err := cs.evsw.Start(); err != nil { 640 return err 641 } 642 643 // we need the timeoutRoutine for replay so 644 // we don't block on the tick chan. 645 // NOTE: we will get a build up of garbage go routines 646 // firing on the tockChan until the receiveRoutine is started 647 // to deal with them (by that point, at most one will be valid) 648 if err := cs.timeoutTicker.Start(); err != nil { 649 return err 650 } 651 652 // Double Signing Risk Reduction 653 if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { 654 return err 655 } 656 657 // now start the receiveRoutine 658 go cs.receiveRoutine(0) 659 660 // schedule the first round! 661 // use GetRoundState so we don't race the receiveRoutine for access 662 cs.scheduleRound0(cs.GetRoundState()) 663 664 return nil 665 } 666 667 // loadWalFile loads WAL data from file. It overwrites cs.wal. 668 func (cs *State) loadWalFile() error { 669 wal, err := cs.OpenWAL(cs.config.WalFile()) 670 if err != nil { 671 cs.Logger.Error("Error loading State wal", "err", err) 672 return err 673 } 674 cs.wal = wal 675 return nil 676 } 677 678 // OnStop implements service.Service. 679 func (cs *State) OnStop() { 680 if err := cs.evsw.Stop(); err != nil { 681 cs.Logger.Error("error trying to stop eventSwitch", "error", err) 682 } 683 if err := cs.timeoutTicker.Stop(); err != nil { 684 cs.Logger.Error("error trying to stop timeoutTicket", "error", err) 685 } 686 // WAL is stopped in receiveRoutine. 687 } 688 689 // Wait waits for the the main routine to return. 690 // NOTE: be sure to Stop() the event switch and drain 691 // any event channels or this may deadlock 692 func (cs *State) Wait() { 693 <-cs.done 694 } 695 696 // OpenWAL opens a file to log all consensus messages and timeouts for 697 // deterministic accountability. 698 func (cs *State) OpenWAL(walFile string) (tmcon.WAL, error) { 699 wal, err := NewWAL(walFile) 700 if err != nil { 701 cs.Logger.Error("Failed to open WAL", "file", walFile, "err", err) 702 return nil, err 703 } 704 wal.SetLogger(cs.Logger.With("wal", walFile)) 705 if err := wal.Start(); err != nil { 706 cs.Logger.Error("Failed to start WAL", "err", err) 707 return nil, err 708 } 709 return wal, nil 710 } 711 712 //------------------------------------------------------------ 713 // Public interface for passing messages into the consensus state, possibly causing a state transition. 714 // If peerID == "", the msg is considered internal. 715 // Messages are added to the appropriate queue (peer or internal). 716 // If the queue is full, the function may block. 717 // TODO: should these return anything or let callers just use events? 718 719 // AddVote inputs a vote. 720 func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { 721 if peerID == "" { 722 cs.internalMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, ""} 723 } else { 724 cs.peerMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, peerID} 725 } 726 727 // TODO: wait for event?! 728 return false, nil 729 } 730 731 // SetProposal inputs a proposal. 732 func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { 733 if peerID == "" { 734 cs.internalMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""} 735 } else { 736 cs.peerMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, peerID} 737 } 738 739 // TODO: wait for event?! 740 return nil 741 } 742 743 // AddProposalBlockPart inputs a part of the proposal block. 744 func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { 745 if peerID == "" { 746 cs.internalMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, ""} 747 } else { 748 cs.peerMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, peerID} 749 } 750 751 // TODO: wait for event?! 752 return nil 753 } 754 755 // SetProposalAndBlock inputs the proposal and all block parts. 756 func (cs *State) SetProposalAndBlock( 757 proposal *types.Proposal, 758 block *types.Block, 759 parts *types.PartSet, 760 peerID p2p.ID, 761 ) error { 762 if err := cs.SetProposal(proposal, peerID); err != nil { 763 return err 764 } 765 for i := 0; i < int(parts.Total()); i++ { 766 part := parts.GetPart(i) 767 if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { 768 return err 769 } 770 } 771 return nil 772 } 773 774 //------------------------------------------------------------ 775 // internal functions for managing the state 776 777 func (cs *State) updateHeight(height int64) { 778 cs.metrics.Height.Set(float64(height)) 779 cs.Height = height 780 } 781 782 func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { 783 cs.Round = round 784 cs.Step = step 785 } 786 787 // enterNewRound(height, 0) at cs.StartTime. 788 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 789 // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) 790 sleepDuration := rs.StartTime.Sub(tmtime.Now()) 791 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 792 } 793 794 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 795 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { 796 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) 797 } 798 799 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 800 func (cs *State) sendInternalMessage(mi msgInfo) { 801 select { 802 case cs.internalMsgQueue <- mi: 803 default: 804 // NOTE: using the go-routine means our votes can 805 // be processed out of order. 806 // TODO: use CList here for strict determinism and 807 // attempt push to internalMsgQueue in receiveRoutine 808 cs.Logger.Info("Internal msg queue is full. Using a go-routine") 809 go func() { cs.internalMsgQueue <- mi }() 810 } 811 } 812 813 // Reconstruct LastCommit from SeenCommit, which we saved along with the block, 814 // (which happens even before saving the state) 815 func (cs *State) reconstructLastCommit(state sm.State) { 816 seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) 817 if seenCommit == nil { 818 panic(fmt.Sprintf("Failed to reconstruct LastCommit: seen commit for height %v not found", 819 state.LastBlockHeight)) 820 } 821 822 lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastValidators) 823 if !lastPrecommits.HasTwoThirdsMajority() { 824 panic("Failed to reconstruct LastCommit: Does not have +2/3 maj") 825 } 826 827 cs.LastCommit = lastPrecommits 828 } 829 830 // Updates State and increments height to match that of state. 831 // The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. 832 func (cs *State) updateToState(state sm.State) { 833 if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { 834 panic(fmt.Sprintf("updateToState() expected state height of %v but found %v", 835 cs.Height, state.LastBlockHeight)) 836 } 837 if !cs.state.IsEmpty() { 838 if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { 839 // This might happen when someone else is mutating cs.state. 840 // Someone forgot to pass in state.Copy() somewhere?! 841 panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", 842 cs.state.LastBlockHeight+1, cs.Height)) 843 } 844 if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { 845 panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", 846 cs.state.LastBlockHeight, cs.state.InitialHeight)) 847 } 848 849 // If state isn't further out than cs.state, just ignore. 850 // This happens when SwitchToConsensus() is called in the reactor. 851 // We don't want to reset e.g. the Votes, but we still want to 852 // signal the new round step, because other services (eg. txNotifier) 853 // depend on having an up-to-date peer state! 854 if state.LastBlockHeight <= cs.state.LastBlockHeight { 855 cs.Logger.Info( 856 "Ignoring updateToState()", 857 "newHeight", 858 state.LastBlockHeight+1, 859 "oldHeight", 860 cs.state.LastBlockHeight+1) 861 cs.newStep() 862 return 863 } 864 } 865 866 // Reset fields based on state. 867 validators := state.Validators 868 869 switch { 870 case state.LastBlockHeight == 0: // Very first commit should be empty. 871 cs.LastCommit = (*types.VoteSet)(nil) 872 case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes 873 if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { 874 panic(fmt.Sprintf("Wanted to form a Commit, but Precommits (H/R: %d/%d) didn't have 2/3+: %v", 875 state.LastBlockHeight, 876 cs.CommitRound, 877 cs.Votes.Precommits(cs.CommitRound))) 878 } 879 cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) 880 case cs.LastCommit == nil: 881 // NOTE: when Tendermint starts, it has no votes. reconstructLastCommit 882 // must be called to reconstruct LastCommit from SeenCommit. 883 panic(fmt.Sprintf("LastCommit cannot be empty after initial block (H:%d)", 884 state.LastBlockHeight+1, 885 )) 886 } 887 888 // Next desired block height 889 height := state.LastBlockHeight + 1 890 if height == 1 { 891 height = state.InitialHeight 892 } 893 894 // RoundState fields 895 cs.updateHeight(height) 896 cs.updateRoundStep(0, cstypes.RoundStepNewHeight) 897 if cs.CommitTime.IsZero() { 898 // "Now" makes it easier to sync up dev nodes. 899 // We add timeoutCommit to allow transactions 900 // to be gathered for the first block. 901 // And alternative solution that relies on clocks: 902 // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) 903 cs.StartTime = cs.config.Commit(tmtime.Now()) 904 } else { 905 cs.StartTime = cs.config.Commit(cs.CommitTime) 906 } 907 908 cs.Validators = validators 909 cs.Proposal = nil 910 cs.ProposalBlock = nil 911 cs.ProposalBlockParts = nil 912 cs.LockedRound = -1 913 cs.LockedBlock = nil 914 cs.LockedBlockParts = nil 915 cs.ValidRound = -1 916 cs.ValidBlock = nil 917 cs.ValidBlockParts = nil 918 cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, validators) 919 cs.CommitRound = -1 920 cs.LastValidators = state.LastValidators 921 cs.TriggeredTimeoutPrecommit = false 922 923 cs.state = state 924 925 // Finally, broadcast RoundState 926 cs.newStep() 927 } 928 929 func (cs *State) newStep() { 930 rs := cs.RoundStateEvent() 931 if err := cs.wal.Write(rs); err != nil { 932 cs.Logger.Error("Error writing to wal", "err", err) 933 } 934 cs.nSteps++ 935 // newStep is called by updateToState in NewState before the eventBus is set! 936 if cs.eventBus != nil { 937 if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { 938 cs.Logger.Error("Error publishing new round step", "err", err) 939 } 940 cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) 941 } 942 } 943 944 //----------------------------------------- 945 // the main go routines 946 947 // receiveRoutine handles messages which may cause state transitions. 948 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 949 // It keeps the RoundState and is the only thing that updates it. 950 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 951 // State must be locked before any internal state is updated. 952 func (cs *State) receiveRoutine(maxSteps int) { 953 onExit := func(cs *State) { 954 // NOTE: the internalMsgQueue may have signed messages from our 955 // priv_val that haven't hit the WAL, but its ok because 956 // priv_val tracks LastSig 957 958 // close wal now that we're done writing to it 959 if err := cs.wal.Stop(); err != nil { 960 cs.Logger.Error("error trying to stop wal", "error", err) 961 } 962 cs.wal.Wait() 963 964 close(cs.done) 965 } 966 967 defer func() { 968 if r := recover(); r != nil { 969 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 970 // stop gracefully 971 // 972 // NOTE: We most probably shouldn't be running any further when there is 973 // some unexpected panic. Some unknown error happened, and so we don't 974 // know if that will result in the validator signing an invalid thing. It 975 // might be worthwhile to explore a mechanism for manual resuming via 976 // some console or secure RPC system, but for now, halting the chain upon 977 // unexpected consensus bugs sounds like the better option. 978 onExit(cs) 979 } 980 }() 981 982 for { 983 if maxSteps > 0 { 984 if cs.nSteps >= maxSteps { 985 cs.Logger.Info("reached max steps. exiting receive routine") 986 cs.nSteps = 0 987 return 988 } 989 } 990 rs := cs.RoundState 991 var mi msgInfo 992 993 select { 994 case <-cs.txNotifier.TxsAvailable(): 995 cs.handleTxsAvailable() 996 case mi = <-cs.peerMsgQueue: 997 if err := cs.wal.Write(mi); err != nil { 998 cs.Logger.Error("Error writing to wal", "err", err) 999 } 1000 // handles proposals, block parts, votes 1001 // may generate internal events (votes, complete proposals, 2/3 majorities) 1002 cs.handleMsg(mi) 1003 case mi = <-cs.internalMsgQueue: 1004 err := cs.wal.WriteSync(mi) // NOTE: fsync 1005 if err != nil { 1006 panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err)) 1007 } 1008 1009 if _, ok := mi.Msg.(*tmcon.VoteMessage); ok { 1010 // we actually want to simulate failing during 1011 // the previous WriteSync, but this isn't easy to do. 1012 // Equivalent would be to fail here and manually remove 1013 // some bytes from the end of the wal. 1014 fail.Fail() // XXX 1015 } 1016 1017 // handles proposals, block parts, votes 1018 cs.handleMsg(mi) 1019 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 1020 if err := cs.wal.Write(ti); err != nil { 1021 cs.Logger.Error("Error writing to wal", "err", err) 1022 } 1023 // if the timeout is relevant to the rs 1024 // go to the next step 1025 cs.handleTimeout(ti, rs) 1026 case <-cs.Quit(): 1027 onExit(cs) 1028 return 1029 } 1030 } 1031 } 1032 1033 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 1034 cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 1035 1036 // timeouts must be for current height, round, step 1037 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 1038 cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 1039 return 1040 } 1041 1042 // the timeout will now cause a state transition 1043 cs.mtx.Lock() 1044 defer cs.mtx.Unlock() 1045 1046 switch ti.Step { 1047 case cstypes.RoundStepNewHeight: 1048 // NewRound event fired from enterNewRound. 1049 // XXX: should we fire timeout here (for timeout commit)? 1050 cs.enterNewRound(ti.Height, 0) 1051 case cstypes.RoundStepNewRound: 1052 cs.enterPropose(ti.Height, 0) 1053 case cstypes.RoundStepPropose: 1054 if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { 1055 cs.Logger.Error("Error publishing timeout propose", "err", err) 1056 } 1057 cs.enterPrevote(ti.Height, ti.Round) 1058 case cstypes.RoundStepPrevoteWait: 1059 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 1060 cs.Logger.Error("Error publishing timeout wait", "err", err) 1061 } 1062 cs.enterPrecommit(ti.Height, ti.Round) 1063 case cstypes.RoundStepPrecommitWait: 1064 if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { 1065 cs.Logger.Error("Error publishing timeout wait", "err", err) 1066 } 1067 cs.enterPrecommit(ti.Height, ti.Round) 1068 cs.enterNewRound(ti.Height, ti.Round+1) 1069 default: 1070 panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step)) 1071 } 1072 } 1073 1074 func (cs *State) handleTxsAvailable() { 1075 cs.mtx.Lock() 1076 defer cs.mtx.Unlock() 1077 1078 // We only need to do this for round 0. 1079 if cs.Round != 0 { 1080 return 1081 } 1082 1083 switch cs.Step { 1084 case cstypes.RoundStepNewHeight: // timeoutCommit phase 1085 if cs.needProofBlock(cs.Height) { 1086 // enterPropose will be called by enterNewRound 1087 return 1088 } 1089 1090 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 1091 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 1092 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 1093 case cstypes.RoundStepNewRound: // after timeoutCommit 1094 cs.enterPropose(cs.Height, 0) 1095 } 1096 } 1097 1098 //----------------------------------------------------------------------------- 1099 // State functions 1100 // Used internally by handleTimeout and handleMsg to make state transitions 1101 1102 // Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), 1103 // 1104 // or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) 1105 // 1106 // Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) 1107 // Enter: +2/3 precommits for nil at (height,round-1) 1108 // Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) 1109 // NOTE: cs.StartTime was already set for height. 1110 func (cs *State) enterNewRound(height int64, round int32) { 1111 logger := cs.Logger.With("height", height, "round", round) 1112 1113 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { 1114 logger.Debug("enter new round", "msg", log.NewLazySprintf( 1115 "enterNewRound(%v/%v): Invalid args. Current step: %v/%v/%v", 1116 height, 1117 round, 1118 cs.Height, 1119 cs.Round, 1120 cs.Step)) 1121 return 1122 } 1123 1124 if now := tmtime.Now(); cs.StartTime.After(now) { 1125 logger.Debug("need to set a buffer and log message here for sanity", "startTime", cs.StartTime, "now", now) 1126 } 1127 1128 logger.Info("enter new round", 1129 "msg", 1130 log.NewLazySprintf("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 1131 1132 // Increment validators if necessary 1133 validators := cs.Validators 1134 if cs.Round < round { 1135 validators = validators.Copy() 1136 validators.IncrementProposerPriority(tmmath.SafeSubInt32(round, cs.Round)) 1137 } 1138 1139 // Setup new round 1140 // we don't fire newStep for this step, 1141 // but we fire an event, so update the round step first 1142 cs.updateRoundStep(round, cstypes.RoundStepNewRound) 1143 cs.Validators = validators 1144 if round == 0 { 1145 // We've already reset these upon new height, 1146 // and meanwhile we might have received a proposal 1147 // for round 0. 1148 } else { 1149 logger.Info("Resetting Proposal info") 1150 cs.Proposal = nil 1151 cs.ProposalBlock = nil 1152 cs.ProposalBlockParts = nil 1153 } 1154 cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping 1155 cs.TriggeredTimeoutPrecommit = false 1156 1157 if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { 1158 cs.Logger.Error("Error publishing new round", "err", err) 1159 } 1160 cs.metrics.Rounds.Set(float64(round)) 1161 1162 // Wait for txs to be available in the mempool 1163 // before we enterPropose in round 0. If the last block changed the app hash, 1164 // we may need an empty "proof" block, and enterPropose immediately. 1165 waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) 1166 if waitForTxs { 1167 if cs.config.CreateEmptyBlocksInterval > 0 { 1168 cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, 1169 cstypes.RoundStepNewRound) 1170 } 1171 } else { 1172 cs.enterPropose(height, round) 1173 } 1174 } 1175 1176 // needProofBlock returns true on the first height (so the genesis app hash is signed right away) 1177 // and where the last block (height-1) caused the app hash to change 1178 func (cs *State) needProofBlock(height int64) bool { 1179 if height == cs.state.InitialHeight { 1180 return true 1181 } 1182 1183 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1184 if lastBlockMeta == nil { 1185 panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) 1186 } 1187 return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) 1188 } 1189 1190 func (cs *State) isProposer(address []byte) bool { 1191 return bytes.Equal(cs.Validators.GetProposer().Address, address) 1192 } 1193 1194 func (cs *State) defaultDecideProposal(height int64, round int32) { 1195 var block *types.Block 1196 var blockParts *types.PartSet 1197 1198 // Decide on block 1199 if cs.ValidBlock != nil { 1200 // If there is valid block, choose that. 1201 block, blockParts = cs.ValidBlock, cs.ValidBlockParts 1202 } else { 1203 // Create a new proposal block from state/txs from the mempool. 1204 block, blockParts = cs.createProposalBlock() 1205 if block == nil { 1206 return 1207 } 1208 } 1209 1210 // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, 1211 // and the privValidator will refuse to sign anything. 1212 if err := cs.wal.FlushAndSync(); err != nil { 1213 cs.Logger.Error("Error flushing to disk") 1214 } 1215 1216 // Make proposal 1217 propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} 1218 proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) 1219 p := proposal.ToProto() 1220 if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { 1221 proposal.Signature = p.Signature 1222 1223 // send proposal and block parts on internal msg queue 1224 cs.sendInternalMessage(msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""}) 1225 for i := 0; i < int(blockParts.Total()); i++ { 1226 part := blockParts.GetPart(i) 1227 cs.sendInternalMessage(msgInfo{&tmcon.BlockPartMessage{Height: cs.Height, Round: cs.Round, Part: part}, ""}) 1228 } 1229 cs.Logger.Info("Signed proposal", "height", height, "round", round, "proposal", proposal) 1230 cs.Logger.Debug("default decide proposal", 1231 "msg", 1232 log.NewLazySprintf("Signed proposal block: %v", block)) 1233 } else if !cs.replayMode { 1234 cs.Logger.Error("enterPropose: Error signing proposal", "height", height, "round", round, "err", err) 1235 } 1236 } 1237 1238 // Returns true if the proposal block is complete && 1239 // (if POLRound was proposed, we have +2/3 prevotes from there). 1240 func (cs *State) isProposalComplete() bool { 1241 if cs.Proposal == nil || cs.ProposalBlock == nil { 1242 return false 1243 } 1244 // we have the proposal. if there's a POLRound, 1245 // make sure we have the prevotes from it too 1246 if cs.Proposal.POLRound < 0 { 1247 return true 1248 } 1249 // if this is false the proposer is lying or we haven't received the POL yet 1250 return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() 1251 } 1252 1253 // Create the next block to propose and return it. Returns nil block upon error. 1254 // 1255 // We really only need to return the parts, but the block is returned for 1256 // convenience so we can log the proposal block. 1257 // 1258 // NOTE: keep it side-effect free for clarity. 1259 // CONTRACT: cs.privValidator is not nil. 1260 func (cs *State) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { 1261 if cs.privValidator == nil { 1262 panic("entered createProposalBlock with privValidator being nil") 1263 } 1264 1265 var commit *types.Commit 1266 switch { 1267 case cs.Height == cs.state.InitialHeight: 1268 // We're creating a proposal for the first block. 1269 // The commit is empty, but not nil. 1270 commit = types.NewCommit(0, 0, types.BlockID{}, nil) 1271 case cs.LastCommit.HasTwoThirdsMajority(): 1272 // Make the commit from LastCommit 1273 commit = cs.LastCommit.MakeCommit() 1274 default: // This shouldn't happen. 1275 cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block") 1276 return 1277 } 1278 1279 if cs.privValidatorPubKey == nil { 1280 // If this node is a validator & proposer in the current round, it will 1281 // miss the opportunity to create a block. 1282 cs.Logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet)) 1283 return 1284 } 1285 proposerAddr := cs.privValidatorPubKey.Address() 1286 1287 return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) 1288 } 1289 1290 // Enter: any +2/3 prevotes at next round. 1291 func (cs *State) enterPrevoteWait(height int64, round int32) { 1292 logger := cs.Logger.With("height", height, "round", round) 1293 1294 if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { 1295 logger.Debug("enter prevote wait", 1296 "msg", 1297 log.NewLazySprintf( 1298 "enterPrevoteWait(%v/%v): Invalid args. Current step: %v/%v/%v", 1299 height, 1300 round, 1301 cs.Height, 1302 cs.Round, 1303 cs.Step)) 1304 return 1305 } 1306 if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { 1307 panic(fmt.Sprintf("enterPrevoteWait(%v/%v), but Prevotes does not have any +2/3 votes", height, round)) 1308 } 1309 1310 logger.Debug("enter prevote wait", 1311 "msg", 1312 log.NewLazySprintf("enterPrevoteWait(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 1313 1314 defer func() { 1315 // Done enterPrevoteWait: 1316 cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) 1317 cs.newStep() 1318 }() 1319 1320 // Wait for some more prevotes; enterPrecommit 1321 cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) 1322 } 1323 1324 // Enter: any +2/3 precommits for next round. 1325 func (cs *State) enterPrecommitWait(height int64, round int32) { 1326 logger := cs.Logger.With("height", height, "round", round) 1327 1328 if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { 1329 logger.Debug("state enter precommit wait", 1330 "msg", 1331 log.NewLazySprintf( 1332 "enterPrecommitWait(%v/%v): Invalid args. "+ 1333 "Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v", 1334 height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit)) 1335 return 1336 } 1337 if !cs.Votes.Precommits(round).HasTwoThirdsAny() { 1338 panic(fmt.Sprintf("enterPrecommitWait(%v/%v), but Precommits does not have any +2/3 votes", height, round)) 1339 } 1340 logger.Info("enter precommit wait", 1341 "msg", 1342 log.NewLazySprintf("enterPrecommitWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) 1343 1344 defer func() { 1345 // Done enterPrecommitWait: 1346 cs.TriggeredTimeoutPrecommit = true 1347 cs.newStep() 1348 }() 1349 1350 // Wait for some more precommits; enterNewRound 1351 cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) 1352 } 1353 1354 // Enter: +2/3 precommits for block 1355 func (cs *State) enterCommit(height int64, commitRound int32) { 1356 logger := cs.Logger.With("height", height, "commitRound", commitRound) 1357 1358 if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { 1359 logger.Debug("enter commit", 1360 "msg", 1361 log.NewLazySprintf("enterCommit(%v/%v): Invalid args. Current step: %v/%v/%v", 1362 height, 1363 commitRound, 1364 cs.Height, 1365 cs.Round, 1366 cs.Step)) 1367 return 1368 } 1369 logger.Info("enter commit", 1370 "msg", 1371 log.NewLazySprintf("enterCommit(%v/%v). Current: %v/%v/%v", height, commitRound, cs.Height, cs.Round, cs.Step)) 1372 1373 defer func() { 1374 // Done enterCommit: 1375 // keep cs.Round the same, commitRound points to the right Precommits set. 1376 cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) 1377 cs.CommitRound = commitRound 1378 cs.CommitTime = tmtime.Now() 1379 cs.newStep() 1380 1381 // Maybe finalize immediately. 1382 cs.tryFinalizeCommit(height) 1383 }() 1384 1385 blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() 1386 if !ok { 1387 panic("RunActionCommit() expects +2/3 precommits") 1388 } 1389 1390 // The Locked* fields no longer matter. 1391 // Move them over to ProposalBlock if they match the commit hash, 1392 // otherwise they'll be cleared in updateToState. 1393 if cs.LockedBlock.HashesTo(blockID.Hash) { 1394 logger.Info("Commit is for locked block. Set ProposalBlock=LockedBlock", "blockHash", blockID.Hash) 1395 cs.ProposalBlock = cs.LockedBlock 1396 cs.ProposalBlockParts = cs.LockedBlockParts 1397 } 1398 1399 // If we don't have the block being committed, set up to get it. 1400 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1401 if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { 1402 logger.Info( 1403 "commit is for a block we do not know about; set ProposalBlock=nil", 1404 "proposal", log.NewLazyBlockHash(cs.ProposalBlock), 1405 "commit", blockID.Hash, 1406 ) 1407 1408 // We're getting the wrong block. 1409 // Set up ProposalBlockParts and keep waiting. 1410 cs.ProposalBlock = nil 1411 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) 1412 if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { 1413 cs.Logger.Error("Error publishing valid block", "err", err) 1414 } 1415 cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) 1416 } 1417 // else { 1418 // We just need to keep waiting. 1419 // } 1420 } 1421 } 1422 1423 // If we have the block AND +2/3 commits for it, finalize. 1424 func (cs *State) tryFinalizeCommit(height int64) { 1425 logger := cs.Logger.With("height", height) 1426 1427 if cs.Height != height { 1428 panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) 1429 } 1430 1431 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1432 if !ok || len(blockID.Hash) == 0 { 1433 logger.Error("Attempt to finalize failed. There was no +2/3 majority, or +2/3 was for <nil>.") 1434 return 1435 } 1436 if !cs.ProposalBlock.HashesTo(blockID.Hash) { 1437 // TODO: this happens every time if we're not a validator (ugly logs) 1438 // TODO: ^^ wait, why does it matter that we're a validator? 1439 logger.Debug( 1440 "attempt to finalize failed; we do not have the commit block", 1441 "proposal-block", log.NewLazyBlockHash(cs.ProposalBlock), 1442 "commit-block", blockID.Hash, 1443 ) 1444 return 1445 } 1446 1447 // go 1448 cs.finalizeCommit(height) 1449 } 1450 1451 // Increment height and goto cstypes.RoundStepNewHeight 1452 func (cs *State) finalizeCommit(height int64) { 1453 if cs.Height != height || cs.Step != cstypes.RoundStepCommit { 1454 cs.Logger.Debug("finalize commit", 1455 "msg", 1456 log.NewLazySprintf("finalizeCommit(%v): Invalid args. Current step: %v/%v/%v", 1457 height, 1458 cs.Height, 1459 cs.Round, 1460 cs.Step)) 1461 return 1462 } 1463 1464 blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() 1465 block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts 1466 1467 if !ok { 1468 panic("Cannot finalizeCommit, commit does not have two thirds majority") 1469 } 1470 if !blockParts.HasHeader(blockID.PartSetHeader) { 1471 panic("Expected ProposalBlockParts header to be commit header") 1472 } 1473 if !block.HashesTo(blockID.Hash) { 1474 panic("Cannot finalizeCommit, ProposalBlock does not hash to commit hash") 1475 } 1476 if err := cs.blockExec.ValidateBlock(cs.state, block); err != nil { 1477 panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) 1478 } 1479 1480 cs.Logger.Info("finalizing commit of block with N txs", 1481 "height", block.Height, 1482 "hash", block.Hash(), 1483 "root", block.AppHash, 1484 "N", len(block.Txs), 1485 ) 1486 cs.Logger.Debug("finalize commit", "msg", log.NewLazySprintf("%v", block)) 1487 1488 fail.Fail() // XXX 1489 1490 // Save to blockStore. 1491 if cs.blockStore.Height() < block.Height { 1492 // NOTE: the seenCommit is local justification to commit this block, 1493 // but may differ from the LastCommit included in the next block 1494 precommits := cs.Votes.Precommits(cs.CommitRound) 1495 seenCommit := precommits.MakeCommit() 1496 cs.blockStore.SaveBlock(block, blockParts, seenCommit) 1497 } else { 1498 // Happens during replay if we already saved the block but didn't commit 1499 cs.Logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) 1500 } 1501 1502 fail.Fail() // XXX 1503 1504 // Write EndHeightMessage{} for this height, implying that the blockstore 1505 // has saved the block. 1506 // 1507 // If we crash before writing this EndHeightMessage{}, we will recover by 1508 // running ApplyBlock during the ABCI handshake when we restart. If we 1509 // didn't save the block to the blockstore before writing 1510 // EndHeightMessage{}, we'd have to change WAL replay -- currently it 1511 // complains about replaying for heights where an #ENDHEIGHT entry already 1512 // exists. 1513 // 1514 // Either way, the State should not be resumed until we 1515 // successfully call ApplyBlock (ie. later here, or in Handshake after 1516 // restart). 1517 endMsg := tmcon.EndHeightMessage{Height: height} 1518 if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync 1519 panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", 1520 endMsg, err)) 1521 } 1522 1523 fail.Fail() // XXX 1524 1525 // Create a copy of the state for staging and an event cache for txs. 1526 stateCopy := cs.state.Copy() 1527 1528 // Execute and commit the block, update and save the state, and update the mempool. 1529 // NOTE The block.AppHash wont reflect these txs until the next block. 1530 var err error 1531 var retainHeight int64 1532 stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( 1533 stateCopy, 1534 types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()}, 1535 block) 1536 if err != nil { 1537 cs.Logger.Error("Error on ApplyBlock", "err", err) 1538 return 1539 } 1540 1541 fail.Fail() // XXX 1542 1543 // Prune old heights, if requested by ABCI app. 1544 if retainHeight > 0 { 1545 pruned, err := cs.pruneBlocks(retainHeight) 1546 if err != nil { 1547 cs.Logger.Error("Failed to prune blocks", "retainHeight", retainHeight, "err", err) 1548 } else { 1549 cs.Logger.Info("Pruned blocks", "pruned", pruned, "retainHeight", retainHeight) 1550 } 1551 } 1552 1553 // must be called before we update state 1554 cs.recordMetrics(height, block) 1555 1556 // NewHeightStep! 1557 cs.updateToState(stateCopy) 1558 1559 fail.Fail() // XXX 1560 1561 // Private validator might have changed it's key pair => refetch pubkey. 1562 if err := cs.updatePrivValidatorPubKey(); err != nil { 1563 cs.Logger.Error("Can't get private validator pubkey", "err", err) 1564 } 1565 1566 // cs.StartTime is already set. 1567 // Schedule Round0 to start soon. 1568 cs.scheduleRound0(&cs.RoundState) 1569 1570 // By here, 1571 // * cs.Height has been increment to height+1 1572 // * cs.Step is now cstypes.RoundStepNewHeight 1573 // * cs.StartTime is set to when we will start round0. 1574 } 1575 1576 func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { 1577 base := cs.blockStore.Base() 1578 if retainHeight <= base { 1579 return 0, nil 1580 } 1581 pruned, err := cs.blockStore.PruneBlocks(retainHeight) 1582 if err != nil { 1583 return 0, fmt.Errorf("failed to prune block store: %w", err) 1584 } 1585 err = cs.blockExec.Store().PruneStates(base, retainHeight) 1586 if err != nil { 1587 return 0, fmt.Errorf("failed to prune state database: %w", err) 1588 } 1589 return pruned, nil 1590 } 1591 1592 func (cs *State) recordMetrics(height int64, block *types.Block) { 1593 cs.metrics.Validators.Set(float64(cs.Validators.Size())) 1594 cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) 1595 1596 var ( 1597 missingValidators int 1598 missingValidatorsPower int64 1599 ) 1600 // height=0 -> MissingValidators and MissingValidatorsPower are both 0. 1601 // Remember that the first LastCommit is intentionally empty, so it's not 1602 // fair to increment missing validators number. 1603 if height > cs.state.InitialHeight { 1604 // Sanity check that commit size matches validator set size - only applies 1605 // after first block. 1606 var ( 1607 commitSize = block.LastCommit.Size() 1608 valSetLen = len(cs.LastValidators.Validators) 1609 address types.Address 1610 ) 1611 if commitSize != valSetLen { 1612 panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", 1613 commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastValidators.Validators)) 1614 } 1615 1616 if cs.privValidator != nil { 1617 if cs.privValidatorPubKey == nil { 1618 // Metrics won't be updated, but it's not critical. 1619 cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) 1620 } else { 1621 address = cs.privValidatorPubKey.Address() 1622 } 1623 } 1624 1625 for i, val := range cs.LastValidators.Validators { 1626 commitSig := block.LastCommit.Signatures[i] 1627 if commitSig.Absent() { 1628 missingValidators++ 1629 missingValidatorsPower += val.VotingPower 1630 } 1631 1632 if bytes.Equal(val.Address, address) { 1633 label := []string{ 1634 "validator_address", val.Address.String(), 1635 } 1636 cs.metrics.ValidatorPower.With(label...).Set(float64(val.VotingPower)) 1637 if commitSig.ForBlock() { 1638 cs.metrics.ValidatorLastSignedHeight.With(label...).Set(float64(height)) 1639 } else { 1640 cs.metrics.ValidatorMissedBlocks.With(label...).Add(float64(1)) 1641 } 1642 } 1643 1644 } 1645 } 1646 cs.metrics.MissingValidators.Set(float64(missingValidators)) 1647 cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) 1648 1649 // NOTE: byzantine validators power and count is only for consensus evidence i.e. duplicate vote 1650 var ( 1651 byzantineValidatorsPower = int64(0) 1652 byzantineValidatorsCount = int64(0) 1653 ) 1654 for _, ev := range block.Evidence.Evidence { 1655 if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { 1656 if _, val := cs.Validators.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { 1657 byzantineValidatorsCount++ 1658 byzantineValidatorsPower += val.VotingPower 1659 } 1660 } 1661 } 1662 cs.metrics.ByzantineValidators.Set(float64(byzantineValidatorsCount)) 1663 cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) 1664 1665 if height > 1 { 1666 lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) 1667 if lastBlockMeta != nil { 1668 cs.metrics.BlockIntervalSeconds.Observe( 1669 block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), 1670 ) 1671 } 1672 } 1673 1674 cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) 1675 cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) 1676 cs.metrics.BlockSizeBytes.Set(float64(block.Size())) 1677 cs.metrics.CommittedHeight.Set(float64(block.Height)) 1678 } 1679 1680 //----------------------------------------------------------------------------- 1681 1682 // NOTE: block is not necessarily valid. 1683 // Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, 1684 // once we have the full block. 1685 func (cs *State) addProposalBlockPart(msg *tmcon.BlockPartMessage, peerID p2p.ID) (added bool, err error) { 1686 height, round, part := msg.Height, msg.Round, msg.Part 1687 1688 // Blocks might be reused, so round mismatch is OK 1689 if cs.Height != height { 1690 cs.Logger.Debug("Received block part from wrong height", "height", height, "round", round) 1691 return false, nil 1692 } 1693 1694 // We're not expecting a block part. 1695 if cs.ProposalBlockParts == nil { 1696 // NOTE: this can happen when we've gone to a higher round and 1697 // then receive parts from the previous round - not necessarily a bad peer. 1698 cs.Logger.Info("Received a block part when we're not expecting any", 1699 "height", height, "round", round, "index", part.Index, "peer", peerID) 1700 return false, nil 1701 } 1702 1703 added, err = cs.ProposalBlockParts.AddPart(part) 1704 if err != nil { 1705 return added, err 1706 } 1707 if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { 1708 return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", 1709 cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, 1710 ) 1711 } 1712 if added && cs.ProposalBlockParts.IsComplete() { 1713 bz, err := io.ReadAll(cs.ProposalBlockParts.GetReader()) 1714 if err != nil { 1715 return added, err 1716 } 1717 1718 pbb := new(tmproto.Block) 1719 err = proto.Unmarshal(bz, pbb) 1720 if err != nil { 1721 return added, err 1722 } 1723 1724 block, err := types.BlockFromProto(pbb) 1725 if err != nil { 1726 return added, err 1727 } 1728 1729 cs.ProposalBlock = block 1730 // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal 1731 cs.Logger.Info("Received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) 1732 if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { 1733 cs.Logger.Error("Error publishing event complete proposal", "err", err) 1734 } 1735 1736 // Update Valid* if we can. 1737 prevotes := cs.Votes.Prevotes(cs.Round) 1738 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 1739 if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { 1740 if cs.ProposalBlock.HashesTo(blockID.Hash) { 1741 cs.Logger.Info("Updating valid block to new proposal block", 1742 "valid-round", cs.Round, "valid-block-hash", cs.ProposalBlock.Hash()) 1743 cs.ValidRound = cs.Round 1744 cs.ValidBlock = cs.ProposalBlock 1745 cs.ValidBlockParts = cs.ProposalBlockParts 1746 } 1747 // TODO: In case there is +2/3 majority in Prevotes set for some 1748 // block and cs.ProposalBlock contains different block, either 1749 // proposer is faulty or voting power of faulty processes is more 1750 // than 1/3. We should trigger in the future accountability 1751 // procedure at this point. 1752 } 1753 1754 if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { 1755 // Move onto the next step 1756 cs.enterPrevote(height, cs.Round) 1757 if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added 1758 cs.enterPrecommit(height, cs.Round) 1759 } 1760 } else if cs.Step == cstypes.RoundStepCommit { 1761 // If we're waiting on the proposal block... 1762 cs.tryFinalizeCommit(height) 1763 } 1764 return added, nil 1765 } 1766 return added, nil 1767 } 1768 1769 // Attempt to add the vote. if its a duplicate signature, dupeout the validator 1770 func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { 1771 added, err := cs.addVote(vote, peerID) 1772 if err != nil { 1773 // If the vote height is off, we'll just ignore it, 1774 // But if it's a conflicting sig, add it to the cs.evpool. 1775 // If it's otherwise invalid, punish peer. 1776 //nolint: gocritic 1777 if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { 1778 if cs.privValidatorPubKey == nil { 1779 return false, errPubKeyIsNotSet 1780 } 1781 1782 if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { 1783 cs.Logger.Error( 1784 "Found conflicting vote from ourselves. Did you unsafe_reset a validator?", 1785 "height", 1786 vote.Height, 1787 "round", 1788 vote.Round, 1789 "type", 1790 vote.Type) 1791 return added, err 1792 } 1793 cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) 1794 return added, err 1795 } else if err == types.ErrVoteNonDeterministicSignature { 1796 cs.Logger.Debug("Vote has non-deterministic signature", "err", err) 1797 } else { 1798 // Either 1799 // 1) bad peer OR 1800 // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR 1801 // 3) tmkms use with multiple validators connecting to a single tmkms instance 1802 // (https://github.com/vipernet-xyz/tm/issues/3839). 1803 cs.Logger.Info("Error attempting to add vote", "err", err) 1804 return added, ErrAddingVote 1805 } 1806 } 1807 return added, nil 1808 } 1809 1810 //----------------------------------------------------------------------------- 1811 1812 // CONTRACT: cs.privValidator is not nil. 1813 func (cs *State) signVote( 1814 msgType tmproto.SignedMsgType, 1815 hash []byte, 1816 header types.PartSetHeader, 1817 ) (*types.Vote, error) { 1818 // Flush the WAL. Otherwise, we may not recompute the same vote to sign, 1819 // and the privValidator will refuse to sign anything. 1820 if err := cs.wal.FlushAndSync(); err != nil { 1821 return nil, err 1822 } 1823 1824 if cs.privValidatorPubKey == nil { 1825 return nil, errPubKeyIsNotSet 1826 } 1827 addr := cs.privValidatorPubKey.Address() 1828 valIdx, _ := cs.Validators.GetByAddress(addr) 1829 1830 vote := &types.Vote{ 1831 ValidatorAddress: addr, 1832 ValidatorIndex: valIdx, 1833 Height: cs.Height, 1834 Round: cs.Round, 1835 Timestamp: cs.voteTime(), 1836 Type: msgType, 1837 BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, 1838 } 1839 v := vote.ToProto() 1840 err := cs.privValidator.SignVote(cs.state.ChainID, v) 1841 vote.Signature = v.Signature 1842 1843 return vote, err 1844 } 1845 1846 func (cs *State) voteTime() time.Time { 1847 now := tmtime.Now() 1848 minVoteTime := now 1849 // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, 1850 // even if cs.LockedBlock != nil. See https://github.com/vipernet-xyz/tm/tree/v0.34.x/spec/. 1851 timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond 1852 if cs.LockedBlock != nil { 1853 // See the BFT time spec 1854 // https://github.com/vipernet-xyz/tm/blob/v0.34.x/spec/consensus/bft-time.md 1855 minVoteTime = cs.LockedBlock.Time.Add(timeIota) 1856 } else if cs.ProposalBlock != nil { 1857 minVoteTime = cs.ProposalBlock.Time.Add(timeIota) 1858 } 1859 1860 if now.After(minVoteTime) { 1861 return now 1862 } 1863 return minVoteTime 1864 } 1865 1866 // sign the vote and publish on internalMsgQueue 1867 func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { 1868 if cs.privValidator == nil { // the node does not have a key 1869 return nil 1870 } 1871 1872 if cs.privValidatorPubKey == nil { 1873 // Vote won't be signed, but it's not critical. 1874 cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) 1875 return nil 1876 } 1877 1878 // If the node not in the validator set, do nothing. 1879 if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { 1880 return nil 1881 } 1882 1883 // TODO: pass pubKey to signVote 1884 vote, err := cs.signVote(msgType, hash, header) 1885 if err == nil { 1886 cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: vote}, ""}) 1887 cs.Logger.Info("Signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) 1888 return vote 1889 } 1890 // if !cs.replayMode { 1891 cs.Logger.Error("Error signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) 1892 //} 1893 return nil 1894 } 1895 1896 // updatePrivValidatorPubKey get's the private validator public key and 1897 // memoizes it. This func returns an error if the private validator is not 1898 // responding or responds with an error. 1899 func (cs *State) updatePrivValidatorPubKey() error { 1900 if cs.privValidator == nil { 1901 return nil 1902 } 1903 1904 pubKey, err := cs.privValidator.GetPubKey() 1905 if err != nil { 1906 return err 1907 } 1908 cs.privValidatorPubKey = pubKey 1909 return nil 1910 } 1911 1912 // look back to check existence of the node's consensus votes before joining consensus 1913 func (cs *State) checkDoubleSigningRisk(height int64) error { 1914 if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { 1915 valAddr := cs.privValidatorPubKey.Address() 1916 doubleSignCheckHeight := cs.config.DoubleSignCheckHeight 1917 if doubleSignCheckHeight > height { 1918 doubleSignCheckHeight = height 1919 } 1920 for i := int64(1); i < doubleSignCheckHeight; i++ { 1921 lastCommit := cs.blockStore.LoadSeenCommit(height - i) 1922 if lastCommit != nil { 1923 for sigIdx, s := range lastCommit.Signatures { 1924 if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { 1925 cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) 1926 return ErrSignatureFoundInPastBlocks 1927 } 1928 } 1929 } 1930 } 1931 } 1932 return nil 1933 } 1934 1935 //--------------------------------------------------------- 1936 1937 func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { 1938 if h1 < h2 { 1939 return -1 1940 } else if h1 > h2 { 1941 return 1 1942 } 1943 if r1 < r2 { 1944 return -1 1945 } else if r1 > r2 { 1946 return 1 1947 } 1948 if s1 < s2 { 1949 return -1 1950 } else if s1 > s2 { 1951 return 1 1952 } 1953 return 0 1954 } 1955 1956 // repairWalFile decodes messages from src (until the decoder errors) and 1957 // writes them to dst. 1958 func repairWalFile(src, dst string) error { 1959 in, err := os.Open(src) 1960 if err != nil { 1961 return err 1962 } 1963 defer in.Close() 1964 1965 out, err := os.Open(dst) 1966 if err != nil { 1967 return err 1968 } 1969 defer out.Close() 1970 1971 var ( 1972 dec = NewWALDecoder(in) 1973 enc = NewWALEncoder(out) 1974 ) 1975 1976 // best-case repair (until first error is encountered) 1977 for { 1978 msg, err := dec.Decode() 1979 if err != nil { 1980 break 1981 } 1982 1983 err = enc.Encode(msg) 1984 if err != nil { 1985 return fmt.Errorf("failed to encode msg: %w", err) 1986 } 1987 } 1988 1989 return nil 1990 }