github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/eventhandler/event_handler.go (about) 1 package eventhandler 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/consensus/hotstuff" 12 "github.com/onflow/flow-go/consensus/hotstuff/model" 13 "github.com/onflow/flow-go/model/flow" 14 ) 15 16 // EventHandler is the main handler for individual events that trigger state transition. 17 // It exposes API to handle one event at a time synchronously. EventHandler is *not concurrency safe*. 18 // Please use the EventLoop to ensure that only a single go-routine executes the EventHandler's algorithms. 19 // EventHandler is implemented in event-driven way, it reacts to incoming events and performs certain actions. 20 // It doesn't perform any actions on its own. There are 3 main responsibilities of EventHandler, vote, propose, 21 // timeout. There are specific scenarios that lead to each of those actions. 22 // - create vote: voting logic is triggered by OnReceiveProposal, after receiving proposal we have all required information 23 // to create a valid vote. Compliance engine makes sure that we receive proposals, whose parents are known. 24 // Creating a vote can be triggered ONLY by receiving proposal. 25 // - create timeout: creating model.TimeoutObject[TO] is triggered by OnLocalTimeout, after reaching deadline for current round. 26 // EventHandler gets notified about it and has to create a model.TimeoutObject and broadcast it to other replicas. 27 // Creating a TO can be triggered by reaching round deadline or triggered as part of Bracha broadcast when superminority 28 // of replicas have contributed to TC creation and created a partial TC. 29 // - create a proposal: proposing logic is more complicated. Creating a proposal is triggered by the EventHandler receiving 30 // a QC or TC that induces a view change to a view where the replica is primary. As an edge case, the EventHandler 31 // can receive a QC or TC that triggers the view change, but we can't create a proposal in case we are missing parent block the newest QC refers to. 32 // In case we already have the QC, but are still missing the respective parent, OnReceiveProposal can trigger the proposing logic 33 // as well, but only when receiving proposal for view lower than active view. 34 // To summarize, to make a valid proposal for view N we need to have a QC or TC for N-1 and know the proposal with blockID 35 // NewestQC.BlockID. 36 // 37 // Not concurrency safe. 38 type EventHandler struct { 39 log zerolog.Logger 40 paceMaker hotstuff.PaceMaker 41 blockProducer hotstuff.BlockProducer 42 forks hotstuff.Forks 43 persist hotstuff.Persister 44 committee hotstuff.Replicas 45 safetyRules hotstuff.SafetyRules 46 notifier hotstuff.Consumer 47 } 48 49 var _ hotstuff.EventHandler = (*EventHandler)(nil) 50 51 // NewEventHandler creates an EventHandler instance with initial components. 52 func NewEventHandler( 53 log zerolog.Logger, 54 paceMaker hotstuff.PaceMaker, 55 blockProducer hotstuff.BlockProducer, 56 forks hotstuff.Forks, 57 persist hotstuff.Persister, 58 committee hotstuff.Replicas, 59 safetyRules hotstuff.SafetyRules, 60 notifier hotstuff.Consumer, 61 ) (*EventHandler, error) { 62 e := &EventHandler{ 63 log: log.With().Str("hotstuff", "participant").Logger(), 64 paceMaker: paceMaker, 65 blockProducer: blockProducer, 66 forks: forks, 67 persist: persist, 68 safetyRules: safetyRules, 69 committee: committee, 70 notifier: notifier, 71 } 72 return e, nil 73 } 74 75 // OnReceiveQc processes a valid qc constructed by internal vote aggregator or discovered in TimeoutObject. 76 // All inputs should be validated before feeding into this function. Assuming trusted data. 77 // No errors are expected during normal operation. 78 func (e *EventHandler) OnReceiveQc(qc *flow.QuorumCertificate) error { 79 curView := e.paceMaker.CurView() 80 log := e.log.With(). 81 Uint64("cur_view", curView). 82 Uint64("qc_view", qc.View). 83 Hex("qc_block_id", qc.BlockID[:]). 84 Logger() 85 log.Debug().Msg("received QC") 86 e.notifier.OnReceiveQc(curView, qc) 87 defer e.notifier.OnEventProcessed() 88 89 newViewEvent, err := e.paceMaker.ProcessQC(qc) 90 if err != nil { 91 return fmt.Errorf("could not process QC: %w", err) 92 } 93 if newViewEvent == nil { 94 log.Debug().Msg("QC didn't trigger view change, nothing to do") 95 return nil 96 } 97 98 // current view has changed, go to new view 99 log.Debug().Msg("QC triggered view change, starting new view now") 100 return e.proposeForNewViewIfPrimary() 101 } 102 103 // OnReceiveTc processes a valid tc constructed by internal timeout aggregator, discovered in TimeoutObject or 104 // broadcast over the network. 105 // All inputs should be validated before feeding into this function. Assuming trusted data. 106 // No errors are expected during normal operation. 107 func (e *EventHandler) OnReceiveTc(tc *flow.TimeoutCertificate) error { 108 curView := e.paceMaker.CurView() 109 log := e.log.With(). 110 Uint64("cur_view", curView). 111 Uint64("tc_view", tc.View). 112 Uint64("tc_newest_qc_view", tc.NewestQC.View). 113 Hex("tc_newest_qc_block_id", tc.NewestQC.BlockID[:]). 114 Logger() 115 log.Debug().Msg("received TC") 116 e.notifier.OnReceiveTc(curView, tc) 117 defer e.notifier.OnEventProcessed() 118 119 newViewEvent, err := e.paceMaker.ProcessTC(tc) 120 if err != nil { 121 return fmt.Errorf("could not process TC for view %d: %w", tc.View, err) 122 } 123 if newViewEvent == nil { 124 log.Debug().Msg("TC didn't trigger view change, nothing to do") 125 return nil 126 } 127 128 // current view has changed, go to new view 129 log.Debug().Msg("TC triggered view change, starting new view now") 130 return e.proposeForNewViewIfPrimary() 131 } 132 133 // OnReceiveProposal processes a block proposal received from another HotStuff 134 // consensus participant. 135 // All inputs should be validated before feeding into this function. Assuming trusted data. 136 // No errors are expected during normal operation. 137 func (e *EventHandler) OnReceiveProposal(proposal *model.Proposal) error { 138 block := proposal.Block 139 curView := e.paceMaker.CurView() 140 log := e.log.With(). 141 Uint64("cur_view", curView). 142 Uint64("block_view", block.View). 143 Hex("block_id", block.BlockID[:]). 144 Uint64("qc_view", block.QC.View). 145 Hex("proposer_id", block.ProposerID[:]). 146 Logger() 147 log.Debug().Msg("proposal received from compliance engine") 148 e.notifier.OnReceiveProposal(curView, proposal) 149 defer e.notifier.OnEventProcessed() 150 151 // ignore stale proposals 152 if block.View < e.forks.FinalizedView() { 153 log.Debug().Msg("stale proposal") 154 return nil 155 } 156 157 // store the block. 158 err := e.forks.AddValidatedBlock(block) 159 if err != nil { 160 return fmt.Errorf("cannot add proposal to forks (%x): %w", block.BlockID, err) 161 } 162 163 _, err = e.paceMaker.ProcessQC(proposal.Block.QC) 164 if err != nil { 165 return fmt.Errorf("could not process QC for block %x: %w", block.BlockID, err) 166 } 167 168 _, err = e.paceMaker.ProcessTC(proposal.LastViewTC) 169 if err != nil { 170 return fmt.Errorf("could not process TC for block %x: %w", block.BlockID, err) 171 } 172 173 // if the block is for the current view, then try voting for this block 174 err = e.processBlockForCurrentView(proposal) 175 if err != nil { 176 return fmt.Errorf("failed processing current block: %w", err) 177 } 178 log.Debug().Msg("proposal processed from compliance engine") 179 180 // nothing to do if this proposal is for current view 181 if proposal.Block.View == e.paceMaker.CurView() { 182 return nil 183 } 184 185 return e.proposeForNewViewIfPrimary() 186 } 187 188 // TimeoutChannel returns the channel for subscribing the waiting timeout on receiving 189 // block or votes for the current view. 190 func (e *EventHandler) TimeoutChannel() <-chan time.Time { 191 return e.paceMaker.TimeoutChannel() 192 } 193 194 // OnLocalTimeout handles a local timeout event by creating a model.TimeoutObject and broadcasting it. 195 // No errors are expected during normal operation. 196 func (e *EventHandler) OnLocalTimeout() error { 197 curView := e.paceMaker.CurView() 198 e.log.Debug().Uint64("cur_view", curView).Msg("timeout received from event loop") 199 e.notifier.OnLocalTimeout(curView) 200 defer e.notifier.OnEventProcessed() 201 202 err := e.broadcastTimeoutObjectIfAuthorized() 203 if err != nil { 204 return fmt.Errorf("unexpected exception while processing timeout in view %d: %w", curView, err) 205 } 206 return nil 207 } 208 209 // OnPartialTcCreated handles notification produces by the internal timeout aggregator. 210 // If the notification is for the current view, a corresponding model.TimeoutObject is broadcast to the consensus committee. 211 // No errors are expected during normal operation. 212 func (e *EventHandler) OnPartialTcCreated(partialTC *hotstuff.PartialTcCreated) error { 213 curView := e.paceMaker.CurView() 214 lastViewTC := partialTC.LastViewTC 215 logger := e.log.With(). 216 Uint64("cur_view", curView). 217 Uint64("qc_view", partialTC.NewestQC.View) 218 if lastViewTC != nil { 219 logger.Uint64("last_view_tc_view", lastViewTC.View) 220 } 221 log := logger.Logger() 222 log.Debug().Msg("constructed partial TC") 223 224 e.notifier.OnPartialTc(curView, partialTC) 225 defer e.notifier.OnEventProcessed() 226 227 // process QC, this might trigger view change 228 _, err := e.paceMaker.ProcessQC(partialTC.NewestQC) 229 if err != nil { 230 return fmt.Errorf("could not process newest QC: %w", err) 231 } 232 233 // process TC, this might trigger view change 234 _, err = e.paceMaker.ProcessTC(lastViewTC) 235 if err != nil { 236 return fmt.Errorf("could not process TC for view %d: %w", lastViewTC.View, err) 237 } 238 239 // NOTE: in other cases when we have observed a view change we will trigger proposing logic, this is desired logic 240 // for handling proposal, QC and TC. However, observing a partial TC means 241 // that superminority have timed out and there was at least one honest replica in that set. Honest replicas will never vote 242 // after timing out for current view meaning we won't be able to collect supermajority of votes for a proposal made after 243 // observing partial TC. 244 245 // by definition, we are allowed to produce timeout object if we have received partial TC for current view 246 if e.paceMaker.CurView() != partialTC.View { 247 return nil 248 } 249 250 log.Debug().Msg("partial TC generated for current view, broadcasting timeout") 251 err = e.broadcastTimeoutObjectIfAuthorized() 252 if err != nil { 253 return fmt.Errorf("unexpected exception while processing partial TC in view %d: %w", partialTC.View, err) 254 } 255 return nil 256 } 257 258 // Start starts the event handler. 259 // No errors are expected during normal operation. 260 // CAUTION: EventHandler is not concurrency safe. The Start method must 261 // be executed by the same goroutine that also calls the other business logic 262 // methods, or concurrency safety has to be implemented externally. 263 func (e *EventHandler) Start(ctx context.Context) error { 264 e.notifier.OnStart(e.paceMaker.CurView()) 265 defer e.notifier.OnEventProcessed() 266 e.paceMaker.Start(ctx) 267 err := e.proposeForNewViewIfPrimary() 268 if err != nil { 269 return fmt.Errorf("could not start new view: %w", err) 270 } 271 return nil 272 } 273 274 // broadcastTimeoutObjectIfAuthorized attempts to generate a model.TimeoutObject, adds it 275 // to `timeoutAggregator` and broadcasts it to the consensus commettee. We check, whether 276 // this node, at the current view, is part of the consensus committee. Otherwise, this 277 // method is functionally a no-op. 278 // For example, right after an epoch switchover a consensus node might still be online but 279 // not part of the _active_ consensus committee anymore. Consequently, it should not broadcast 280 // timeouts anymore. 281 // No errors are expected during normal operation. 282 func (e *EventHandler) broadcastTimeoutObjectIfAuthorized() error { 283 curView := e.paceMaker.CurView() 284 newestQC := e.paceMaker.NewestQC() 285 lastViewTC := e.paceMaker.LastViewTC() 286 log := e.log.With().Uint64("cur_view", curView).Logger() 287 288 if newestQC.View+1 == curView { 289 // in case last view has ended with QC and TC, make sure that only QC is included 290 // otherwise such timeout is invalid. This case is possible if TC has included QC with the same 291 // view as the TC itself, meaning that newestQC.View == lastViewTC.View 292 lastViewTC = nil 293 } 294 295 timeout, err := e.safetyRules.ProduceTimeout(curView, newestQC, lastViewTC) 296 if err != nil { 297 if model.IsNoTimeoutError(err) { 298 log.Warn().Err(err).Msgf("not generating timeout as this node is not part of the active committee") 299 return nil 300 } 301 return fmt.Errorf("could not produce timeout: %w", err) 302 } 303 304 // raise a notification to broadcast timeout 305 e.notifier.OnOwnTimeout(timeout) 306 log.Debug().Msg("broadcast TimeoutObject done") 307 308 return nil 309 } 310 311 // proposeForNewViewIfPrimary will only be called when we may be able to propose a block, after processing a new event. 312 // - after entering a new view as a result of processing a QC or TC, then we may propose for the newly entered view 313 // - after receiving a proposal (but not changing view), if that proposal is referenced by our highest known QC, 314 // and the proposal was previously unknown, then we can propose a block in the current view 315 // 316 // It reads the current view, and generates a proposal if we are the leader. 317 // No errors are expected during normal operation. 318 func (e *EventHandler) proposeForNewViewIfPrimary() error { 319 start := time.Now() // track the start time 320 curView := e.paceMaker.CurView() 321 currentLeader, err := e.committee.LeaderForView(curView) 322 if err != nil { 323 return fmt.Errorf("failed to determine primary for new view %d: %w", curView, err) 324 } 325 finalizedView := e.forks.FinalizedView() 326 log := e.log.With(). 327 Uint64("cur_view", curView). 328 Uint64("finalized_view", finalizedView). 329 Hex("leader_id", currentLeader[:]).Logger() 330 331 e.notifier.OnCurrentViewDetails(curView, finalizedView, currentLeader) 332 333 // check that I am the primary for this view and that I haven't already proposed; otherwise there is nothing to do 334 if e.committee.Self() != currentLeader { 335 return nil 336 } 337 for _, b := range e.forks.GetBlocksForView(curView) { // on the happy path, this slice is empty 338 if b.ProposerID == e.committee.Self() { 339 log.Debug().Msg("already proposed for current view") 340 return nil 341 } else { 342 // sanity check: the following code should never be reached, as this node is the current leader, i.e. 343 // we should _not_ consider a proposal for this view from any other as valid and store it in forks. 344 return fmt.Errorf("this node (%v) is leader for the current view %d, but have a proposal from node %v for this view", currentLeader, curView, b.ProposerID) 345 } 346 } 347 348 // attempt to generate proposal: 349 newestQC := e.paceMaker.NewestQC() 350 lastViewTC := e.paceMaker.LastViewTC() 351 352 _, found := e.forks.GetBlock(newestQC.BlockID) 353 if !found { 354 // we don't know anything about block referenced by our newest QC, in this case we can't 355 // create a valid proposal since we can't guarantee validity of block payload. 356 log.Warn(). 357 Uint64("qc_view", newestQC.View). 358 Hex("block_id", newestQC.BlockID[:]).Msg("haven't synced the latest block yet; can't propose") 359 return nil 360 } 361 log.Debug().Msg("generating proposal as leader") 362 363 // Sanity checks to make sure that resulting proposal is valid: 364 // In its proposal, the leader for view N needs to present evidence that it has legitimately entered view N. 365 // As evidence, we include a QC or TC for view N-1, which should always be available as the PaceMaker advances 366 // to view N only after observing a QC or TC from view N-1. Moreover, QC and TC are always processed together. As 367 // EventHandler is strictly single-threaded without reentrancy, we must have a QC or TC for the prior view (curView-1). 368 // Failing one of these sanity checks is a symptom of state corruption or a severe implementation bug. 369 if newestQC.View+1 != curView { 370 if lastViewTC == nil { 371 return fmt.Errorf("possible state corruption, expected lastViewTC to be not nil") 372 } 373 if lastViewTC.View+1 != curView { 374 return fmt.Errorf("possible state corruption, don't have QC(view=%d) and TC(view=%d) for previous view(currentView=%d)", 375 newestQC.View, lastViewTC.View, curView) 376 } 377 } else { 378 // In case last view has ended with QC and TC, make sure that only QC is included, 379 // otherwise such proposal is invalid. This case is possible if TC has included QC with the same 380 // view as the TC itself, meaning that newestQC.View == lastViewTC.View 381 lastViewTC = nil 382 } 383 384 flowProposal, err := e.blockProducer.MakeBlockProposal(curView, newestQC, lastViewTC) 385 if err != nil { 386 return fmt.Errorf("can not make block proposal for curView %v: %w", curView, err) 387 } 388 proposedBlock := model.BlockFromFlow(flowProposal) // turn the signed flow header into a proposal 389 390 // determine target publication time 391 // CAUTION: 392 // • we must call `TargetPublicationTime` _before_ `AddValidatedBlock`, because `AddValidatedBlock` 393 // may emit a BlockIncorporatedEvent, which changes CruiseControl's state. 394 // • metrics for the PaceMaker/CruiseControl assume that the event handler is the only caller of 395 // `TargetPublicationTime`. Technically, `TargetPublicationTime` records the publication delay 396 // relative to its _latest_ call. 397 targetPublicationTime := e.paceMaker.TargetPublicationTime(flowProposal.View, start, flowProposal.ParentID) 398 399 // we want to store created proposal in forks to make sure that we don't create more proposals for 400 // current view. Due to asynchronous nature of our design it's possible that after creating proposal 401 // we will be asked to propose again for same view. 402 err = e.forks.AddValidatedBlock(proposedBlock) 403 if err != nil { 404 return fmt.Errorf("could not add newly created proposal (%v): %w", proposedBlock.BlockID, err) 405 } 406 407 log.Debug(). 408 Uint64("block_view", proposedBlock.View). 409 Time("target_publication", targetPublicationTime). 410 Hex("block_id", proposedBlock.BlockID[:]). 411 Uint64("parent_view", newestQC.View). 412 Hex("parent_id", newestQC.BlockID[:]). 413 Hex("signer", proposedBlock.ProposerID[:]). 414 Msg("forwarding proposal to communicator for broadcasting") 415 416 // raise a notification with proposal (also triggers broadcast) 417 e.notifier.OnOwnProposal(flowProposal, targetPublicationTime) 418 return nil 419 } 420 421 // processBlockForCurrentView processes the block for the current view. 422 // It is called AFTER the block has been stored or found in Forks 423 // It checks whether to vote for this block. 424 // No errors are expected during normal operation. 425 func (e *EventHandler) processBlockForCurrentView(proposal *model.Proposal) error { 426 // sanity check that block is really for the current view: 427 curView := e.paceMaker.CurView() 428 block := proposal.Block 429 if block.View != curView { 430 // ignore outdated proposals in case we have moved forward 431 return nil 432 } 433 // leader (node ID) for next view 434 nextLeader, err := e.committee.LeaderForView(curView + 1) 435 if errors.Is(err, model.ErrViewForUnknownEpoch) { 436 // We are attempting process a block in an unknown epoch 437 // This should never happen, because: 438 // * the compliance layer ensures proposals are passed to the event loop strictly after their parent 439 // * the protocol state ensures that, before incorporating the first block of an epoch E, 440 // either E is known or we have triggered epoch fallback mode - in either case the epoch for the 441 // current epoch is known 442 return fmt.Errorf("attempting to process a block for current view in unknown epoch") 443 } 444 if err != nil { 445 return fmt.Errorf("failed to determine primary for next view %d: %w", curView+1, err) 446 } 447 448 // safetyRules performs all the checks to decide whether to vote for this block or not. 449 err = e.ownVote(proposal, curView, nextLeader) 450 if err != nil { 451 return fmt.Errorf("unexpected error in voting logic: %w", err) 452 } 453 454 return nil 455 } 456 457 // ownVote generates and forwards the own vote, if we decide to vote. 458 // Any errors are potential symptoms of uncovered edge cases or corrupted internal state (fatal). 459 // No errors are expected during normal operation. 460 func (e *EventHandler) ownVote(proposal *model.Proposal, curView uint64, nextLeader flow.Identifier) error { 461 block := proposal.Block 462 log := e.log.With(). 463 Uint64("block_view", block.View). 464 Hex("block_id", block.BlockID[:]). 465 Uint64("parent_view", block.QC.View). 466 Hex("parent_id", block.QC.BlockID[:]). 467 Hex("signer", block.ProposerID[:]). 468 Logger() 469 470 _, found := e.forks.GetBlock(proposal.Block.QC.BlockID) 471 if !found { 472 // we don't have parent for this proposal, we can't vote since we can't guarantee validity of proposals 473 // payload. Strictly speaking this shouldn't ever happen because compliance engine makes sure that we 474 // receive proposals with valid parents. 475 return fmt.Errorf("won't vote for proposal, no parent block for this proposal") 476 } 477 478 // safetyRules performs all the checks to decide whether to vote for this block or not. 479 ownVote, err := e.safetyRules.ProduceVote(proposal, curView) 480 if err != nil { 481 if !model.IsNoVoteError(err) { 482 // unknown error, exit the event loop 483 return fmt.Errorf("could not produce vote: %w", err) 484 } 485 log.Debug().Err(err).Msg("should not vote for this block") 486 return nil 487 } 488 489 log.Debug().Msg("forwarding vote to compliance engine") 490 // raise a notification to send vote 491 e.notifier.OnOwnVote(ownVote.BlockID, ownVote.View, ownVote.SigData, nextLeader) 492 return nil 493 }