github.com/onflow/flow-go@v0.33.17/consensus/hotstuff/eventloop/event_loop.go (about) 1 package eventloop 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 10 "github.com/onflow/flow-go/consensus/hotstuff" 11 "github.com/onflow/flow-go/consensus/hotstuff/model" 12 "github.com/onflow/flow-go/consensus/hotstuff/tracker" 13 "github.com/onflow/flow-go/engine" 14 "github.com/onflow/flow-go/model/flow" 15 "github.com/onflow/flow-go/module" 16 "github.com/onflow/flow-go/module/component" 17 "github.com/onflow/flow-go/module/irrecoverable" 18 "github.com/onflow/flow-go/module/metrics" 19 ) 20 21 // queuedProposal is a helper structure that is used to transmit proposal in channel 22 // it contains an attached insertionTime that is used to measure how long we have waited between queening proposal and 23 // actually processing by `EventHandler`. 24 type queuedProposal struct { 25 proposal *model.Proposal 26 insertionTime time.Time 27 } 28 29 // EventLoop buffers all incoming events to the hotstuff EventHandler, and feeds EventHandler one event at a time. 30 type EventLoop struct { 31 *component.ComponentManager 32 log zerolog.Logger 33 eventHandler hotstuff.EventHandler 34 metrics module.HotstuffMetrics 35 mempoolMetrics module.MempoolMetrics 36 proposals chan queuedProposal 37 newestSubmittedTc *tracker.NewestTCTracker 38 newestSubmittedQc *tracker.NewestQCTracker 39 newestSubmittedPartialTc *tracker.NewestPartialTcTracker 40 tcSubmittedNotifier engine.Notifier 41 qcSubmittedNotifier engine.Notifier 42 partialTcCreatedNotifier engine.Notifier 43 startTime time.Time 44 } 45 46 var _ hotstuff.EventLoop = (*EventLoop)(nil) 47 var _ component.Component = (*EventLoop)(nil) 48 49 // NewEventLoop creates an instance of EventLoop. 50 func NewEventLoop( 51 log zerolog.Logger, 52 metrics module.HotstuffMetrics, 53 mempoolMetrics module.MempoolMetrics, 54 eventHandler hotstuff.EventHandler, 55 startTime time.Time, 56 ) (*EventLoop, error) { 57 // we will use a buffered channel to avoid blocking of caller 58 // we can't afford to drop messages since it undermines liveness, but we also want to avoid blocking of compliance 59 // engine. We assume that we should be able to process proposals faster than compliance engine feeds them, worst case 60 // we will fill the buffer and block compliance engine worker but that should happen only if compliance engine receives 61 // large number of blocks in short period of time(when catching up for instance). 62 proposals := make(chan queuedProposal, 1000) 63 64 el := &EventLoop{ 65 log: log, 66 eventHandler: eventHandler, 67 metrics: metrics, 68 mempoolMetrics: mempoolMetrics, 69 proposals: proposals, 70 tcSubmittedNotifier: engine.NewNotifier(), 71 qcSubmittedNotifier: engine.NewNotifier(), 72 partialTcCreatedNotifier: engine.NewNotifier(), 73 newestSubmittedTc: tracker.NewNewestTCTracker(), 74 newestSubmittedQc: tracker.NewNewestQCTracker(), 75 newestSubmittedPartialTc: tracker.NewNewestPartialTcTracker(), 76 startTime: startTime, 77 } 78 79 componentBuilder := component.NewComponentManagerBuilder() 80 componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 81 ready() 82 83 // launch when scheduled by el.startTime 84 el.log.Info().Msgf("event loop will start at: %v", startTime) 85 select { 86 case <-ctx.Done(): 87 return 88 case <-time.After(time.Until(startTime)): 89 el.log.Info().Msgf("starting event loop") 90 err := el.loop(ctx) 91 if err != nil { 92 el.log.Error().Err(err).Msg("irrecoverable event loop error") 93 ctx.Throw(err) 94 } 95 } 96 }) 97 el.ComponentManager = componentBuilder.Build() 98 99 return el, nil 100 } 101 102 // loop executes the core HotStuff logic in a single thread. It picks inputs from the various 103 // inbound channels and executes the EventHandler's respective method for processing this input. 104 // During normal operations, the EventHandler is not expected to return any errors, as all inputs 105 // are assumed to be fully validated (or produced by trusted components within the node). Therefore, 106 // any error is a symptom of state corruption, bugs or violation of API contracts. In all cases, 107 // continuing operations is not an option, i.e. we exit the event loop and return an exception. 108 func (el *EventLoop) loop(ctx context.Context) error { 109 err := el.eventHandler.Start(ctx) // must be called by the same go-routine that also executes the business logic! 110 if err != nil { 111 return fmt.Errorf("could not start event handler: %w", err) 112 } 113 114 shutdownSignaled := ctx.Done() 115 timeoutCertificates := el.tcSubmittedNotifier.Channel() 116 quorumCertificates := el.qcSubmittedNotifier.Channel() 117 partialTCs := el.partialTcCreatedNotifier.Channel() 118 119 for { 120 // Giving timeout events the priority to be processed first. 121 // This is to prevent attacks from malicious nodes that attempt 122 // to block honest nodes' pacemaker from progressing by sending 123 // other events. 124 timeoutChannel := el.eventHandler.TimeoutChannel() 125 126 // the first select makes sure we process timeouts with priority 127 select { 128 129 // if we receive the shutdown signal, exit the loop 130 case <-shutdownSignaled: 131 return nil 132 133 // processing timeout or partial TC event are top priority since 134 // they allow node to contribute to TC aggregation when replicas can't 135 // make progress on happy path 136 case <-timeoutChannel: 137 138 processStart := time.Now() 139 err = el.eventHandler.OnLocalTimeout() 140 if err != nil { 141 return fmt.Errorf("could not process timeout: %w", err) 142 } 143 // measure how long it takes for a timeout event to be processed 144 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeLocalTimeout) 145 146 // At this point, we have received and processed an event from the timeout channel. 147 // A timeout also means that we have made progress. A new timeout will have 148 // been started and el.eventHandler.TimeoutChannel() will be a NEW channel (for the just-started timeout). 149 // Very important to start the for loop from the beginning, to continue the with the new timeout channel! 150 continue 151 152 case <-partialTCs: 153 154 processStart := time.Now() 155 err = el.eventHandler.OnPartialTcCreated(el.newestSubmittedPartialTc.NewestPartialTc()) 156 if err != nil { 157 return fmt.Errorf("could no process partial created TC event: %w", err) 158 } 159 // measure how long it takes for a partial TC to be processed 160 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnPartialTc) 161 162 // At this point, we have received and processed partial TC event, it could have resulted in several scenarios: 163 // 1. a view change with potential voting or proposal creation 164 // 2. a created and broadcast timeout object 165 // 3. QC and TC didn't result in view change and no timeout was created since we have already timed out or 166 // the partial TC was created for view different from current one. 167 continue 168 169 default: 170 // fall through to non-priority events 171 } 172 173 idleStart := time.Now() 174 175 // select for block headers/QCs here 176 select { 177 178 // same as before 179 case <-shutdownSignaled: 180 return nil 181 182 // same as before 183 case <-timeoutChannel: 184 // measure how long the event loop was idle waiting for an 185 // incoming event 186 el.metrics.HotStuffIdleDuration(time.Since(idleStart)) 187 188 processStart := time.Now() 189 err = el.eventHandler.OnLocalTimeout() 190 if err != nil { 191 return fmt.Errorf("could not process timeout: %w", err) 192 } 193 // measure how long it takes for a timeout event to be processed 194 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeLocalTimeout) 195 196 // if we have a new proposal, process it 197 case queuedItem := <-el.proposals: 198 // the wait duration is measured as how long it takes from a block being 199 // received to event handler commencing the processing of the block 200 el.metrics.HotStuffWaitDuration(time.Since(queuedItem.insertionTime), metrics.HotstuffEventTypeOnProposal) 201 202 // measure how long the event loop was idle waiting for an 203 // incoming event 204 el.metrics.HotStuffIdleDuration(time.Since(idleStart)) 205 206 processStart := time.Now() 207 proposal := queuedItem.proposal 208 err = el.eventHandler.OnReceiveProposal(proposal) 209 if err != nil { 210 return fmt.Errorf("could not process proposal %v: %w", proposal.Block.BlockID, err) 211 } 212 // measure how long it takes for a proposal to be processed 213 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnProposal) 214 215 el.log.Info(). 216 Dur("dur_ms", time.Since(processStart)). 217 Uint64("view", proposal.Block.View). 218 Hex("block_id", proposal.Block.BlockID[:]). 219 Msg("block proposal has been processed successfully") 220 221 // if we have a new QC, process it 222 case <-quorumCertificates: 223 // measure how long the event loop was idle waiting for an 224 // incoming event 225 el.metrics.HotStuffIdleDuration(time.Since(idleStart)) 226 227 processStart := time.Now() 228 err = el.eventHandler.OnReceiveQc(el.newestSubmittedQc.NewestQC()) 229 if err != nil { 230 return fmt.Errorf("could not process QC: %w", err) 231 } 232 // measure how long it takes for a QC to be processed 233 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnQC) 234 235 // if we have a new TC, process it 236 case <-timeoutCertificates: 237 // measure how long the event loop was idle waiting for an 238 // incoming event 239 el.metrics.HotStuffIdleDuration(time.Since(idleStart)) 240 241 processStart := time.Now() 242 err = el.eventHandler.OnReceiveTc(el.newestSubmittedTc.NewestTC()) 243 if err != nil { 244 return fmt.Errorf("could not process TC: %w", err) 245 } 246 // measure how long it takes for a TC to be processed 247 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnTC) 248 249 case <-partialTCs: 250 // measure how long the event loop was idle waiting for an 251 // incoming event 252 el.metrics.HotStuffIdleDuration(time.Since(idleStart)) 253 254 processStart := time.Now() 255 err = el.eventHandler.OnPartialTcCreated(el.newestSubmittedPartialTc.NewestPartialTc()) 256 if err != nil { 257 return fmt.Errorf("could no process partial created TC event: %w", err) 258 } 259 // measure how long it takes for a partial TC to be processed 260 el.metrics.HotStuffBusyDuration(time.Since(processStart), metrics.HotstuffEventTypeOnPartialTc) 261 } 262 } 263 } 264 265 // SubmitProposal pushes the received block to the proposals channel 266 func (el *EventLoop) SubmitProposal(proposal *model.Proposal) { 267 queueItem := queuedProposal{ 268 proposal: proposal, 269 insertionTime: time.Now(), 270 } 271 select { 272 case el.proposals <- queueItem: 273 case <-el.ComponentManager.ShutdownSignal(): 274 return 275 } 276 el.mempoolMetrics.MempoolEntries(metrics.HotstuffEventTypeOnProposal, uint(len(el.proposals))) 277 } 278 279 // onTrustedQC pushes the received QC(which MUST be validated) to the quorumCertificates channel 280 func (el *EventLoop) onTrustedQC(qc *flow.QuorumCertificate) { 281 if el.newestSubmittedQc.Track(qc) { 282 el.qcSubmittedNotifier.Notify() 283 } 284 } 285 286 // onTrustedTC pushes the received TC(which MUST be validated) to the timeoutCertificates channel 287 func (el *EventLoop) onTrustedTC(tc *flow.TimeoutCertificate) { 288 if el.newestSubmittedTc.Track(tc) { 289 el.tcSubmittedNotifier.Notify() 290 } else if el.newestSubmittedQc.Track(tc.NewestQC) { 291 el.qcSubmittedNotifier.Notify() 292 } 293 } 294 295 // OnTcConstructedFromTimeouts pushes the received TC to the timeoutCertificates channel 296 func (el *EventLoop) OnTcConstructedFromTimeouts(tc *flow.TimeoutCertificate) { 297 el.onTrustedTC(tc) 298 } 299 300 // OnPartialTcCreated created a hotstuff.PartialTcCreated payload and pushes it into partialTcCreated buffered channel for 301 // further processing by EventHandler. Since we use buffered channel this function can block if buffer is full. 302 func (el *EventLoop) OnPartialTcCreated(view uint64, newestQC *flow.QuorumCertificate, lastViewTC *flow.TimeoutCertificate) { 303 event := &hotstuff.PartialTcCreated{ 304 View: view, 305 NewestQC: newestQC, 306 LastViewTC: lastViewTC, 307 } 308 if el.newestSubmittedPartialTc.Track(event) { 309 el.partialTcCreatedNotifier.Notify() 310 } 311 } 312 313 // OnNewQcDiscovered pushes already validated QCs that were submitted from TimeoutAggregator to the event handler 314 func (el *EventLoop) OnNewQcDiscovered(qc *flow.QuorumCertificate) { 315 el.onTrustedQC(qc) 316 } 317 318 // OnNewTcDiscovered pushes already validated TCs that were submitted from TimeoutAggregator to the event handler 319 func (el *EventLoop) OnNewTcDiscovered(tc *flow.TimeoutCertificate) { 320 el.onTrustedTC(tc) 321 } 322 323 // OnQcConstructedFromVotes implements hotstuff.VoteCollectorConsumer and pushes received qc into processing pipeline. 324 func (el *EventLoop) OnQcConstructedFromVotes(qc *flow.QuorumCertificate) { 325 el.onTrustedQC(qc) 326 } 327 328 // OnTimeoutProcessed implements hotstuff.TimeoutCollectorConsumer and is no-op 329 func (el *EventLoop) OnTimeoutProcessed(timeout *model.TimeoutObject) {} 330 331 // OnVoteProcessed implements hotstuff.VoteCollectorConsumer and is no-op 332 func (el *EventLoop) OnVoteProcessed(vote *model.Vote) {}