github.com/iotexproject/iotex-core@v1.14.1-rc1/consensus/consensusfsm/fsm.go (about) 1 // Copyright (c) 2019 IoTeX Foundation 2 // This source code is provided 'as is' and no warranties are given as to title or non-infringement, merchantability 3 // or fitness for purpose and, to the extent permitted by law, all liability for your use of the code is disclaimed. 4 // This source code is governed by Apache License 2.0 that can be found in the LICENSE file. 5 6 package consensusfsm 7 8 import ( 9 "context" 10 "sync" 11 "time" 12 13 "github.com/facebookgo/clock" 14 fsm "github.com/iotexproject/go-fsm" 15 "github.com/pkg/errors" 16 "github.com/prometheus/client_golang/prometheus" 17 "go.uber.org/zap" 18 ) 19 20 /** 21 * TODO: For the nodes received correct proposal, add proposer's proposal endorse 22 * without signature, which could be replaced with real signature 23 */ 24 var ( 25 _consensusEvtsMtc = prometheus.NewCounterVec( 26 prometheus.CounterOpts{ 27 Name: "iotex_consensus_events", 28 Help: "IoTeX consensus events", 29 }, 30 []string{"type", "status"}, 31 ) 32 ) 33 34 func init() { 35 prometheus.MustRegister(_consensusEvtsMtc) 36 } 37 38 const ( 39 // consensus states 40 sPrepare fsm.State = "S_PREPARE" 41 sAcceptBlockProposal fsm.State = "S_ACCEPT_BLOCK_PROPOSAL" 42 sAcceptProposalEndorsement fsm.State = "S_ACCEPT_PROPOSAL_ENDORSEMENT" 43 sAcceptLockEndorsement fsm.State = "S_ACCEPT_LOCK_ENDORSEMENT" 44 sAcceptPreCommitEndorsement fsm.State = "S_ACCEPT_PRECOMMIT_ENDORSEMENT" 45 46 // consensus event types 47 eCalibrate fsm.EventType = "E_CALIBRATE" 48 ePrepare fsm.EventType = "E_PREPARE" 49 eReceiveBlock fsm.EventType = "E_RECEIVE_BLOCK" 50 eFailedToReceiveBlock fsm.EventType = "E_FAILED_TO_RECEIVE_BLOCK" 51 eReceiveProposalEndorsement fsm.EventType = "E_RECEIVE_PROPOSAL_ENDORSEMENT" 52 eStopReceivingProposalEndorsement fsm.EventType = "E_STOP_RECEIVING_PROPOSAL_ENDORSEMENT" 53 eReceiveLockEndorsement fsm.EventType = "E_RECEIVE_LOCK_ENDORSEMENT" 54 eStopReceivingLockEndorsement fsm.EventType = "E_STOP_RECEIVING_LOCK_ENDORSEMENT" 55 eReceivePreCommitEndorsement fsm.EventType = "E_RECEIVE_PRECOMMIT_ENDORSEMENT" 56 eStopReceivingPreCommitEndorsement fsm.EventType = "E_STOP_RECEIVING_PRECOMMIT_ENDORSEMENT" 57 eBroadcastPreCommitEndorsement fsm.EventType = "E_BROADCAST_PRECOMMIT_ENDORSEMENT" 58 59 // BackdoorEvent indicates a backdoor event type 60 BackdoorEvent fsm.EventType = "E_BACKDOOR" 61 62 // InitState refers the initial state of the consensus fsm 63 InitState = sPrepare 64 ) 65 66 var ( 67 // ErrEvtCast indicates the error of casting the event 68 ErrEvtCast = errors.New("error when casting the event") 69 // ErrMsgCast indicates the error of casting to endorsed message 70 ErrMsgCast = errors.New("error when casting to endorsed message") 71 // ErrEvtConvert indicates the error of converting the event from/to the proto message 72 ErrEvtConvert = errors.New("error when converting the event from/to the proto message") 73 // ErrEvtType represents an unexpected event type error 74 ErrEvtType = errors.New("error when check the event type") 75 // ErrOldCalibrateEvt indicates the error of ignoring old calibrate event 76 ErrOldCalibrateEvt = errors.New("ignore old calibrate event") 77 78 // consensusStates is a slice consisting of all consensus states 79 consensusStates = []fsm.State{ 80 sPrepare, 81 sAcceptBlockProposal, 82 sAcceptProposalEndorsement, 83 sAcceptLockEndorsement, 84 sAcceptPreCommitEndorsement, 85 } 86 ) 87 88 // ConsensusFSM wraps over the general purpose FSM and implements the consensus logic 89 type ConsensusFSM struct { 90 fsm fsm.FSM 91 evtq chan *ConsensusEvent 92 close chan interface{} 93 clock clock.Clock 94 ctx Context 95 wg sync.WaitGroup 96 } 97 98 // NewConsensusFSM returns a new fsm 99 func NewConsensusFSM(ctx Context, clock clock.Clock) (*ConsensusFSM, error) { 100 cm := &ConsensusFSM{ 101 evtq: make(chan *ConsensusEvent, ctx.EventChanSize()), 102 close: make(chan interface{}), 103 ctx: ctx, 104 clock: clock, 105 } 106 b := fsm.NewBuilder(). 107 AddInitialState(sPrepare). 108 AddStates( 109 sAcceptBlockProposal, 110 sAcceptProposalEndorsement, 111 sAcceptLockEndorsement, 112 sAcceptPreCommitEndorsement, 113 ). 114 AddTransition(sPrepare, ePrepare, cm.prepare, []fsm.State{ 115 sPrepare, 116 sAcceptBlockProposal, 117 sAcceptPreCommitEndorsement, 118 }). 119 AddTransition( 120 sAcceptBlockProposal, 121 eReceiveBlock, 122 cm.onReceiveBlock, 123 []fsm.State{ 124 sAcceptBlockProposal, // proposed block invalid 125 sAcceptProposalEndorsement, // receive valid block, jump to next step 126 }). 127 AddTransition( 128 sAcceptBlockProposal, 129 eFailedToReceiveBlock, 130 cm.onFailedToReceiveBlock, 131 []fsm.State{ 132 sAcceptProposalEndorsement, // no valid block, jump to next step 133 }). 134 AddTransition( 135 sAcceptProposalEndorsement, 136 eReceiveProposalEndorsement, 137 cm.onReceiveProposalEndorsementInAcceptProposalEndorsementState, 138 []fsm.State{ 139 sAcceptProposalEndorsement, // not enough endorsements 140 sAcceptLockEndorsement, // enough endorsements 141 }). 142 AddTransition( 143 sAcceptProposalEndorsement, 144 eReceivePreCommitEndorsement, 145 cm.onReceiveProposalEndorsementInAcceptProposalEndorsementState, 146 []fsm.State{ 147 sAcceptProposalEndorsement, // not enough endorsements 148 sAcceptLockEndorsement, // enough endorsements 149 }). 150 AddTransition( 151 sAcceptProposalEndorsement, 152 eStopReceivingProposalEndorsement, 153 cm.onStopReceivingProposalEndorsement, 154 []fsm.State{ 155 sAcceptLockEndorsement, // timeout, jump to next step 156 }). 157 AddTransition( 158 sAcceptLockEndorsement, 159 eReceiveProposalEndorsement, 160 cm.onReceiveProposalEndorsementInAcceptLockEndorsementState, 161 []fsm.State{ 162 sAcceptLockEndorsement, 163 }, 164 ). 165 AddTransition( 166 sAcceptLockEndorsement, 167 eReceiveLockEndorsement, 168 cm.onReceiveLockEndorsement, 169 []fsm.State{ 170 sAcceptLockEndorsement, // not enough endorsements 171 sAcceptPreCommitEndorsement, // reach commit agreement, jump to next step 172 }). 173 AddTransition( 174 sAcceptLockEndorsement, 175 eReceivePreCommitEndorsement, 176 cm.onReceiveLockEndorsement, 177 []fsm.State{ 178 sAcceptLockEndorsement, // not enough endorsements 179 sAcceptPreCommitEndorsement, // reach commit agreement, jump to next step 180 }). 181 AddTransition( 182 sAcceptLockEndorsement, 183 eStopReceivingLockEndorsement, 184 cm.onStopReceivingLockEndorsement, 185 []fsm.State{ 186 sPrepare, // timeout, jump to next round 187 }). 188 AddTransition( 189 sAcceptPreCommitEndorsement, 190 eBroadcastPreCommitEndorsement, 191 cm.onBroadcastPreCommitEndorsement, 192 []fsm.State{ 193 sAcceptPreCommitEndorsement, 194 }). 195 AddTransition( 196 sAcceptPreCommitEndorsement, 197 eStopReceivingPreCommitEndorsement, 198 cm.onStopReceivingPreCommitEndorsement, 199 []fsm.State{ 200 sPrepare, 201 }). 202 AddTransition( 203 sAcceptPreCommitEndorsement, 204 eReceivePreCommitEndorsement, 205 cm.onReceivePreCommitEndorsement, 206 []fsm.State{ 207 sAcceptPreCommitEndorsement, 208 sPrepare, // reach consensus, start next epoch 209 }) 210 // Add the backdoor transition so that we could unit test the transition from any given state 211 for _, state := range consensusStates { 212 b = b.AddTransition(state, BackdoorEvent, cm.handleBackdoorEvt, consensusStates) 213 if state != sPrepare { 214 b = b.AddTransition(state, eCalibrate, cm.calibrate, []fsm.State{sPrepare, state}) 215 } 216 } 217 m, err := b.Build() 218 if err != nil { 219 return nil, errors.Wrap(err, "error when building the FSM") 220 } 221 cm.fsm = m 222 return cm, nil 223 } 224 225 // Start starts the fsm and get in initial state 226 func (m *ConsensusFSM) Start(c context.Context) error { 227 m.wg.Add(1) 228 go func() { 229 running := true 230 for running { 231 select { 232 case <-m.close: 233 running = false 234 case evt := <-m.evtq: 235 if err := m.handle(evt); err != nil { 236 m.ctx.Logger().Error( 237 "consensus state transition fails", 238 zap.Error(err), 239 ) 240 } 241 } 242 } 243 m.wg.Done() 244 }() 245 return nil 246 } 247 248 // Stop stops the consensus fsm 249 func (m *ConsensusFSM) Stop(_ context.Context) error { 250 close(m.close) 251 m.wg.Wait() 252 return nil 253 } 254 255 // CurrentState returns the current state 256 func (m *ConsensusFSM) CurrentState() fsm.State { 257 return m.fsm.CurrentState() 258 } 259 260 // NumPendingEvents returns the number of pending events 261 func (m *ConsensusFSM) NumPendingEvents() int { 262 return len(m.evtq) 263 } 264 265 // Calibrate calibrates the state if necessary 266 func (m *ConsensusFSM) Calibrate(height uint64) { 267 m.produce(m.ctx.NewConsensusEvent(eCalibrate, height), 0) 268 } 269 270 // BackToPrepare produces an ePrepare event after delay 271 func (m *ConsensusFSM) BackToPrepare(delay time.Duration) (fsm.State, error) { 272 // If the node is not active in consensus, stay at sPrepare and no need to produce ePrepare 273 if m.ctx.Active() { 274 m.produceConsensusEvent(ePrepare, delay) 275 } 276 return sPrepare, nil 277 } 278 279 // ProduceReceiveBlockEvent produces an eReceiveBlock event after delay 280 func (m *ConsensusFSM) ProduceReceiveBlockEvent(block interface{}) { 281 m.produce(m.ctx.NewConsensusEvent(eReceiveBlock, block), 0) 282 } 283 284 // ProduceReceiveProposalEndorsementEvent produces an eReceiveProposalEndorsement event right away 285 func (m *ConsensusFSM) ProduceReceiveProposalEndorsementEvent(vote interface{}) { 286 m.produce(m.ctx.NewConsensusEvent(eReceiveProposalEndorsement, vote), 0) 287 } 288 289 // ProduceReceiveLockEndorsementEvent produces an eReceiveLockEndorsement event right away 290 func (m *ConsensusFSM) ProduceReceiveLockEndorsementEvent(vote interface{}) { 291 m.produce(m.ctx.NewConsensusEvent(eReceiveLockEndorsement, vote), 0) 292 } 293 294 // ProduceReceivePreCommitEndorsementEvent produces an eReceivePreCommitEndorsement event right away 295 func (m *ConsensusFSM) ProduceReceivePreCommitEndorsementEvent(vote interface{}) { 296 m.produce(m.ctx.NewConsensusEvent(eReceivePreCommitEndorsement, vote), 0) 297 } 298 299 func (m *ConsensusFSM) produceConsensusEvent(et fsm.EventType, delay time.Duration) { 300 m.produce(m.ctx.NewConsensusEvent(et, nil), delay) 301 } 302 303 // produce adds an event into the queue for the consensus FSM to process 304 func (m *ConsensusFSM) produce(evt *ConsensusEvent, delay time.Duration) { 305 if evt == nil { 306 return 307 } 308 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "produced").Inc() 309 if delay > 0 { 310 m.wg.Add(1) 311 go func() { 312 select { 313 case <-m.close: 314 case <-m.clock.After(delay): 315 m.evtq <- evt 316 } 317 m.wg.Done() 318 }() 319 } else { 320 m.evtq <- evt 321 } 322 } 323 324 func (m *ConsensusFSM) handle(evt *ConsensusEvent) error { 325 if m.ctx.IsStaleEvent(evt) { 326 m.ctx.Logger().Debug("stale event", zap.Any("event", evt.Type())) 327 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "stale").Inc() 328 return nil 329 } 330 if m.ctx.IsFutureEvent(evt) { 331 m.ctx.Logger().Debug("future event", zap.Any("event", evt.Type())) 332 // TODO: find a more appropriate delay 333 m.produce(evt, m.ctx.UnmatchedEventInterval(evt.Height())) 334 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "backoff").Inc() 335 return nil 336 } 337 src := m.fsm.CurrentState() 338 err := m.fsm.Handle(evt) 339 switch errors.Cause(err) { 340 case nil: 341 m.ctx.Logger().Debug( 342 "consensus state transition happens", 343 zap.String("src", string(src)), 344 zap.String("dst", string(m.fsm.CurrentState())), 345 zap.String("evt", string(evt.Type())), 346 ) 347 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "consumed").Inc() 348 case fsm.ErrTransitionNotFound: 349 if m.ctx.IsStaleUnmatchedEvent(evt) { 350 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "stale").Inc() 351 return nil 352 } 353 m.produce(evt, m.ctx.UnmatchedEventInterval(evt.Height())) 354 m.ctx.Logger().Debug( 355 "consensus state transition could find the match", 356 zap.String("src", string(src)), 357 zap.String("evt", string(evt.Type())), 358 zap.Error(err), 359 ) 360 _consensusEvtsMtc.WithLabelValues(string(evt.Type()), "backoff").Inc() 361 case ErrOldCalibrateEvt: 362 m.ctx.Logger().Debug( 363 "failed to handle eCalibrate, event height is less than current height", 364 zap.Error(err), 365 ) 366 default: 367 return errors.Wrapf( 368 err, 369 "failed to handle event %s with src %s", 370 string(evt.Type()), 371 string(src), 372 ) 373 } 374 return nil 375 } 376 377 func (m *ConsensusFSM) calibrate(evt fsm.Event) (fsm.State, error) { 378 cEvt, ok := evt.(*ConsensusEvent) 379 if !ok { 380 return sPrepare, errors.New("invalid fsm event") 381 } 382 height, ok := cEvt.Data().(uint64) 383 if !ok { 384 return sPrepare, errors.New("invalid data type") 385 } 386 consensusHeight := m.ctx.Height() 387 if consensusHeight > height { 388 return sPrepare, ErrOldCalibrateEvt 389 } 390 m.ctx.Logger().Debug( 391 "Calibrate consensus context", 392 zap.Uint64("consensusHeight", consensusHeight), 393 zap.Uint64("height", height), 394 ) 395 return m.BackToPrepare(0) 396 } 397 398 func (m *ConsensusFSM) prepare(evt fsm.Event) (fsm.State, error) { 399 if err := m.ctx.Prepare(); err != nil { 400 m.ctx.Logger().Error("Error during prepare", zap.Error(err)) 401 return m.BackToPrepare(0) 402 } 403 m.ctx.Logger().Debug("Start a new round") 404 proposal, err := m.ctx.Proposal() 405 if err != nil { 406 m.ctx.Logger().Error("failed to generate block proposal", zap.Error(err)) 407 return m.BackToPrepare(0) 408 } 409 410 overtime := m.ctx.WaitUntilRoundStart() 411 if proposal != nil { 412 m.ctx.Broadcast(proposal) 413 } 414 if !m.ctx.IsDelegate() { 415 return m.BackToPrepare(0) 416 } 417 if proposal != nil { 418 m.ProduceReceiveBlockEvent(proposal) 419 } 420 421 var h uint64 422 cEvt, ok := evt.(*ConsensusEvent) 423 if !ok { 424 m.ctx.Logger().Panic("failed to convert ConsensusEvent in prepare") 425 } 426 h = cEvt.Height() 427 ttl := m.ctx.AcceptBlockTTL(h) - overtime 428 // Setup timeouts 429 if preCommitEndorsement := m.ctx.PreCommitEndorsement(); preCommitEndorsement != nil { 430 cEvt := m.ctx.NewConsensusEvent(eBroadcastPreCommitEndorsement, preCommitEndorsement) 431 m.produce(cEvt, ttl) 432 ttl += m.ctx.AcceptProposalEndorsementTTL(cEvt.Height()) 433 m.produce(cEvt, ttl) 434 ttl += m.ctx.AcceptLockEndorsementTTL(cEvt.Height()) 435 m.produce(cEvt, ttl) 436 ttl += m.ctx.CommitTTL(cEvt.Height()) 437 m.produceConsensusEvent(eStopReceivingPreCommitEndorsement, ttl) 438 return sAcceptPreCommitEndorsement, nil 439 } 440 m.produceConsensusEvent(eFailedToReceiveBlock, ttl) 441 ttl += m.ctx.AcceptProposalEndorsementTTL(h) 442 m.produceConsensusEvent(eStopReceivingProposalEndorsement, ttl) 443 ttl += m.ctx.AcceptLockEndorsementTTL(h) 444 m.produceConsensusEvent(eStopReceivingLockEndorsement, ttl) 445 ttl += m.ctx.CommitTTL(h) 446 m.produceConsensusEvent(eStopReceivingPreCommitEndorsement, ttl) 447 return sAcceptBlockProposal, nil 448 } 449 450 func (m *ConsensusFSM) onReceiveBlock(evt fsm.Event) (fsm.State, error) { 451 m.ctx.Logger().Debug("Receive block") 452 cEvt, ok := evt.(*ConsensusEvent) 453 if !ok { 454 m.ctx.Logger().Error("invalid fsm event", zap.Any("event", evt)) 455 return sAcceptBlockProposal, nil 456 } 457 if err := m.processBlock(cEvt.Data()); err != nil { 458 m.ctx.Logger().Debug("Failed to generate proposal endorsement", zap.Error(err)) 459 return sAcceptBlockProposal, nil 460 } 461 462 return sAcceptProposalEndorsement, nil 463 } 464 465 func (m *ConsensusFSM) processBlock(block interface{}) error { 466 en, err := m.ctx.NewProposalEndorsement(block) 467 if err != nil { 468 return err 469 } 470 m.ProduceReceiveProposalEndorsementEvent(en) 471 m.ctx.Broadcast(en) 472 return nil 473 } 474 475 func (m *ConsensusFSM) onFailedToReceiveBlock(evt fsm.Event) (fsm.State, error) { 476 m.ctx.Logger().Warn("didn't receive the proposed block before timeout") 477 if err := m.processBlock(nil); err != nil { 478 m.ctx.Logger().Debug("Failed to generate proposal endorsement", zap.Error(err)) 479 } 480 481 return sAcceptProposalEndorsement, nil 482 } 483 484 func (m *ConsensusFSM) onReceiveProposalEndorsementInAcceptLockEndorsementState(evt fsm.Event) (fsm.State, error) { 485 return m.onReceiveProposalEndorsement(evt, sAcceptLockEndorsement) 486 } 487 488 func (m *ConsensusFSM) onReceiveProposalEndorsementInAcceptProposalEndorsementState(evt fsm.Event) (fsm.State, error) { 489 return m.onReceiveProposalEndorsement(evt, sAcceptProposalEndorsement) 490 } 491 492 func (m *ConsensusFSM) onReceiveProposalEndorsement(evt fsm.Event, currentState fsm.State) (fsm.State, error) { 493 cEvt, ok := evt.(*ConsensusEvent) 494 if !ok { 495 return currentState, errors.Wrap(ErrEvtCast, "failed to cast to consensus event") 496 } 497 lockEndorsement, err := m.ctx.NewLockEndorsement(cEvt.Data()) 498 if err != nil { 499 m.ctx.Logger().Debug("Failed to add proposal endorsement", zap.Error(err)) 500 return currentState, nil 501 } 502 if lockEndorsement == nil { 503 return currentState, nil 504 } 505 m.ProduceReceiveLockEndorsementEvent(lockEndorsement) 506 m.ctx.Broadcast(lockEndorsement) 507 508 return sAcceptLockEndorsement, err 509 } 510 511 func (m *ConsensusFSM) onStopReceivingProposalEndorsement(evt fsm.Event) (fsm.State, error) { 512 m.ctx.Logger().Warn("Not enough proposal endorsements") 513 514 return sAcceptLockEndorsement, nil 515 } 516 517 func (m *ConsensusFSM) onReceiveLockEndorsement(evt fsm.Event) (fsm.State, error) { 518 cEvt, ok := evt.(*ConsensusEvent) 519 if !ok { 520 return sAcceptLockEndorsement, errors.Wrap(ErrEvtCast, "failed to cast to consensus event") 521 } 522 preCommitEndorsement, err := m.ctx.NewPreCommitEndorsement(cEvt.Data()) 523 if err != nil { 524 return sAcceptLockEndorsement, err 525 } 526 if preCommitEndorsement == nil { 527 return sAcceptLockEndorsement, nil 528 } 529 m.ProduceReceivePreCommitEndorsementEvent(preCommitEndorsement) 530 m.ctx.Broadcast(preCommitEndorsement) 531 532 return sAcceptPreCommitEndorsement, nil 533 } 534 535 func (m *ConsensusFSM) onBroadcastPreCommitEndorsement(evt fsm.Event) (fsm.State, error) { 536 cEvt, ok := evt.(*ConsensusEvent) 537 if !ok { 538 return sAcceptPreCommitEndorsement, errors.Wrap(ErrEvtCast, "failed to cast to consensus event") 539 } 540 m.ctx.Logger().Debug("broadcast pre-commit endorsement") 541 m.ctx.Broadcast(cEvt.Data()) 542 543 return sAcceptPreCommitEndorsement, nil 544 } 545 546 func (m *ConsensusFSM) onStopReceivingLockEndorsement(evt fsm.Event) (fsm.State, error) { 547 m.ctx.Logger().Warn("Not enough lock endorsements") 548 549 return m.BackToPrepare(0) 550 } 551 552 func (m *ConsensusFSM) onReceivePreCommitEndorsement(evt fsm.Event) (fsm.State, error) { 553 cEvt, ok := evt.(*ConsensusEvent) 554 if !ok { 555 return sAcceptPreCommitEndorsement, errors.Wrap(ErrEvtCast, "failed to cast to consensus event") 556 } 557 committed, err := m.ctx.Commit(cEvt.Data()) 558 if err != nil || !committed { 559 return sAcceptPreCommitEndorsement, err 560 } 561 return m.BackToPrepare(0) 562 } 563 564 func (m *ConsensusFSM) onStopReceivingPreCommitEndorsement(evt fsm.Event) (fsm.State, error) { 565 m.ctx.Logger().Warn("Not enough pre-commit endorsements") 566 567 return m.BackToPrepare(0) 568 } 569 570 // handleBackdoorEvt takes the dst state from the event and move the FSM into it 571 func (m *ConsensusFSM) handleBackdoorEvt(evt fsm.Event) (fsm.State, error) { 572 cEvt, ok := evt.(*ConsensusEvent) 573 if !ok { 574 return sPrepare, errors.Wrap(ErrEvtCast, "the event is not a backdoor event") 575 } 576 dst, ok := cEvt.Data().(fsm.State) 577 if !ok { 578 return sPrepare, errors.Wrap(ErrEvtCast, "the data is not a fsm.State") 579 } 580 581 return dst, nil 582 }