github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/consensus/sealing/engine.go (about) 1 package sealing 2 3 import ( 4 "fmt" 5 6 "github.com/gammazero/workerpool" 7 "github.com/rs/zerolog" 8 9 "github.com/onflow/flow-go/consensus/hotstuff/model" 10 "github.com/onflow/flow-go/engine" 11 "github.com/onflow/flow-go/engine/common/fifoqueue" 12 "github.com/onflow/flow-go/engine/consensus" 13 "github.com/onflow/flow-go/model/flow" 14 "github.com/onflow/flow-go/model/messages" 15 "github.com/onflow/flow-go/module" 16 "github.com/onflow/flow-go/module/mempool" 17 "github.com/onflow/flow-go/module/metrics" 18 msig "github.com/onflow/flow-go/module/signature" 19 "github.com/onflow/flow-go/network" 20 "github.com/onflow/flow-go/network/channels" 21 "github.com/onflow/flow-go/state/protocol" 22 "github.com/onflow/flow-go/storage" 23 ) 24 25 type Event struct { 26 OriginID flow.Identifier 27 Msg interface{} 28 } 29 30 // defaultApprovalQueueCapacity maximum capacity of approvals queue 31 const defaultApprovalQueueCapacity = 10000 32 33 // defaultApprovalResponseQueueCapacity maximum capacity of approval requests queue 34 const defaultApprovalResponseQueueCapacity = 10000 35 36 // defaultSealingEngineWorkers number of workers to dispatch events for sealing core 37 const defaultSealingEngineWorkers = 8 38 39 // defaultAssignmentCollectorsWorkerPoolCapacity is the default number of workers that is available for worker pool which is used 40 // by assignment collector state machine to do transitions 41 const defaultAssignmentCollectorsWorkerPoolCapacity = 4 42 43 // defaultIncorporatedBlockQueueCapacity maximum capacity for queuing incorporated blocks 44 // Caution: We cannot drop incorporated blocks, as there is no way that results included in the block 45 // can be re-added later once dropped. Missing any incorporated result can undermine sealing liveness! 46 // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node 47 // in case queueing an incorporated block fails. 48 const defaultIncorporatedBlockQueueCapacity = 10000 49 50 // defaultIncorporatedResultQueueCapacity maximum capacity for queuing incorporated results 51 // Caution: We cannot drop incorporated results, as there is no way that an incorporated result 52 // can be re-added later once dropped. Missing incorporated results can undermine sealing liveness! 53 // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node 54 // in case queueing an incorporated result fails. 55 const defaultIncorporatedResultQueueCapacity = 80000 56 57 type ( 58 EventSink chan *Event // Channel to push pending events 59 ) 60 61 // Engine is a wrapper for approval processing `Core` which implements logic for 62 // queuing and filtering network messages which later will be processed by sealing engine. 63 // Purpose of this struct is to provide an efficient way how to consume messages from network layer and pass 64 // them to `Core`. Engine runs 2 separate gorourtines that perform pre-processing and consuming messages by Core. 65 type Engine struct { 66 unit *engine.Unit 67 workerPool *workerpool.WorkerPool 68 core consensus.SealingCore 69 log zerolog.Logger 70 me module.Local 71 headers storage.Headers 72 results storage.ExecutionResults 73 index storage.Index 74 state protocol.State 75 cacheMetrics module.MempoolMetrics 76 engineMetrics module.EngineMetrics 77 pendingApprovals engine.MessageStore 78 pendingRequestedApprovals engine.MessageStore 79 pendingIncorporatedResults *fifoqueue.FifoQueue 80 pendingIncorporatedBlocks *fifoqueue.FifoQueue 81 inboundEventsNotifier engine.Notifier 82 finalizationEventsNotifier engine.Notifier 83 blockIncorporatedNotifier engine.Notifier 84 messageHandler *engine.MessageHandler 85 rootHeader *flow.Header 86 } 87 88 // NewEngine constructs new `Engine` which runs on it's own unit. 89 func NewEngine(log zerolog.Logger, 90 tracer module.Tracer, 91 conMetrics module.ConsensusMetrics, 92 engineMetrics module.EngineMetrics, 93 mempool module.MempoolMetrics, 94 sealingTracker consensus.SealingTracker, 95 net network.EngineRegistry, 96 me module.Local, 97 headers storage.Headers, 98 payloads storage.Payloads, 99 results storage.ExecutionResults, 100 index storage.Index, 101 state protocol.State, 102 sealsDB storage.Seals, 103 assigner module.ChunkAssigner, 104 sealsMempool mempool.IncorporatedResultSeals, 105 requiredApprovalsForSealConstructionGetter module.SealingConfigsGetter, 106 ) (*Engine, error) { 107 rootHeader := state.Params().FinalizedRoot() 108 109 unit := engine.NewUnit() 110 e := &Engine{ 111 unit: unit, 112 workerPool: workerpool.New(defaultAssignmentCollectorsWorkerPoolCapacity), 113 log: log.With().Str("engine", "sealing.Engine").Logger(), 114 me: me, 115 state: state, 116 engineMetrics: engineMetrics, 117 cacheMetrics: mempool, 118 headers: headers, 119 results: results, 120 index: index, 121 rootHeader: rootHeader, 122 } 123 124 err := e.setupTrustedInboundQueues() 125 if err != nil { 126 return nil, fmt.Errorf("initialization of inbound queues for trusted inputs failed: %w", err) 127 } 128 129 err = e.setupMessageHandler(requiredApprovalsForSealConstructionGetter) 130 if err != nil { 131 return nil, fmt.Errorf("could not initialize message handler for untrusted inputs: %w", err) 132 } 133 134 // register engine with the approval provider 135 _, err = net.Register(channels.ReceiveApprovals, e) 136 if err != nil { 137 return nil, fmt.Errorf("could not register for approvals: %w", err) 138 } 139 140 // register engine to the channel for requesting missing approvals 141 approvalConduit, err := net.Register(channels.RequestApprovalsByChunk, e) 142 if err != nil { 143 return nil, fmt.Errorf("could not register for requesting approvals: %w", err) 144 } 145 146 signatureHasher := msig.NewBLSHasher(msig.ResultApprovalTag) 147 core, err := NewCore(log, e.workerPool, tracer, conMetrics, sealingTracker, unit, headers, state, sealsDB, assigner, signatureHasher, sealsMempool, approvalConduit, requiredApprovalsForSealConstructionGetter) 148 if err != nil { 149 return nil, fmt.Errorf("failed to init sealing engine: %w", err) 150 } 151 152 err = core.RepopulateAssignmentCollectorTree(payloads) 153 if err != nil { 154 return nil, fmt.Errorf("could not repopulate assignment collectors tree: %w", err) 155 } 156 e.core = core 157 158 return e, nil 159 } 160 161 // setupTrustedInboundQueues initializes inbound queues for TRUSTED INPUTS (from other components within the 162 // consensus node). We deliberately separate the queues for trusted inputs from the MessageHandler, which 163 // handles external, untrusted inputs. This reduces the attack surface, as it makes it impossible for an external 164 // attacker to feed values into the inbound channels for trusted inputs, even in the presence of bugs in 165 // the networking layer or message handler 166 func (e *Engine) setupTrustedInboundQueues() error { 167 e.finalizationEventsNotifier = engine.NewNotifier() 168 e.blockIncorporatedNotifier = engine.NewNotifier() 169 var err error 170 e.pendingIncorporatedResults, err = fifoqueue.NewFifoQueue(defaultIncorporatedResultQueueCapacity) 171 if err != nil { 172 return fmt.Errorf("failed to create queue for incorporated results: %w", err) 173 } 174 e.pendingIncorporatedBlocks, err = fifoqueue.NewFifoQueue(defaultIncorporatedBlockQueueCapacity) 175 if err != nil { 176 return fmt.Errorf("failed to create queue for incorporated blocks: %w", err) 177 } 178 return nil 179 } 180 181 // setupMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED INPUTS. 182 func (e *Engine) setupMessageHandler(getSealingConfigs module.SealingConfigsGetter) error { 183 // FIFO queue for broadcasted approvals 184 pendingApprovalsQueue, err := fifoqueue.NewFifoQueue( 185 defaultApprovalQueueCapacity, 186 fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalQueue, uint(len)) }), 187 ) 188 if err != nil { 189 return fmt.Errorf("failed to create queue for inbound approvals: %w", err) 190 } 191 e.pendingApprovals = &engine.FifoMessageStore{ 192 FifoQueue: pendingApprovalsQueue, 193 } 194 195 // FiFo queue for requested approvals 196 pendingRequestedApprovalsQueue, err := fifoqueue.NewFifoQueue( 197 defaultApprovalResponseQueueCapacity, 198 fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalResponseQueue, uint(len)) }), 199 ) 200 if err != nil { 201 return fmt.Errorf("failed to create queue for requested approvals: %w", err) 202 } 203 e.pendingRequestedApprovals = &engine.FifoMessageStore{ 204 FifoQueue: pendingRequestedApprovalsQueue, 205 } 206 207 e.inboundEventsNotifier = engine.NewNotifier() 208 // define message queueing behaviour 209 e.messageHandler = engine.NewMessageHandler( 210 e.log, 211 e.inboundEventsNotifier, 212 engine.Pattern{ 213 Match: func(msg *engine.Message) bool { 214 _, ok := msg.Payload.(*flow.ResultApproval) 215 if ok { 216 e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval) 217 } 218 return ok 219 }, 220 Map: func(msg *engine.Message) (*engine.Message, bool) { 221 if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 { 222 // if we don't require approvals to construct a seal, don't even process approvals. 223 return nil, false 224 } 225 226 return msg, true 227 }, 228 Store: e.pendingApprovals, 229 }, 230 engine.Pattern{ 231 Match: func(msg *engine.Message) bool { 232 _, ok := msg.Payload.(*messages.ApprovalResponse) 233 if ok { 234 e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval) 235 } 236 return ok 237 }, 238 Map: func(msg *engine.Message) (*engine.Message, bool) { 239 if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 { 240 // if we don't require approvals to construct a seal, don't even process approvals. 241 return nil, false 242 } 243 244 approval := msg.Payload.(*messages.ApprovalResponse).Approval 245 return &engine.Message{ 246 OriginID: msg.OriginID, 247 Payload: &approval, 248 }, true 249 }, 250 Store: e.pendingRequestedApprovals, 251 }, 252 ) 253 254 return nil 255 } 256 257 // Process sends event into channel with pending events. Generally speaking shouldn't lock for too long. 258 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 259 err := e.messageHandler.Process(originID, event) 260 if err != nil { 261 if engine.IsIncompatibleInputTypeError(err) { 262 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 263 return nil 264 } 265 return fmt.Errorf("unexpected error while processing engine message: %w", err) 266 } 267 return nil 268 } 269 270 // processAvailableMessages is processor of pending events which drives events from networking layer to business logic in `Core`. 271 // Effectively consumes messages from networking layer and dispatches them into corresponding sinks which are connected with `Core`. 272 func (e *Engine) processAvailableMessages() error { 273 for { 274 select { 275 case <-e.unit.Quit(): 276 return nil 277 default: 278 } 279 280 event, ok := e.pendingIncorporatedResults.Pop() 281 if ok { 282 e.log.Debug().Msg("got new incorporated result") 283 284 err := e.processIncorporatedResult(event.(*flow.IncorporatedResult)) 285 if err != nil { 286 return fmt.Errorf("could not process incorporated result: %w", err) 287 } 288 continue 289 } 290 291 // TODO prioritization 292 // eg: msg := engine.SelectNextMessage() 293 msg, ok := e.pendingRequestedApprovals.Get() 294 if !ok { 295 msg, ok = e.pendingApprovals.Get() 296 } 297 if ok { 298 e.log.Debug().Msg("got new result approval") 299 300 err := e.onApproval(msg.OriginID, msg.Payload.(*flow.ResultApproval)) 301 if err != nil { 302 return fmt.Errorf("could not process result approval: %w", err) 303 } 304 continue 305 } 306 307 // when there is no more messages in the queue, back to the loop to wait 308 // for the next incoming message to arrive. 309 return nil 310 } 311 } 312 313 // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events 314 func (e *Engine) finalizationProcessingLoop() { 315 finalizationNotifier := e.finalizationEventsNotifier.Channel() 316 for { 317 select { 318 case <-e.unit.Quit(): 319 return 320 case <-finalizationNotifier: 321 finalized, err := e.state.Final().Head() 322 if err != nil { 323 e.log.Fatal().Err(err).Msg("could not retrieve last finalized block") 324 } 325 err = e.core.ProcessFinalizedBlock(finalized.ID()) 326 if err != nil { 327 e.log.Fatal().Err(err).Msgf("could not process finalized block %v", finalized.ID()) 328 } 329 } 330 } 331 } 332 333 // blockIncorporatedEventsProcessingLoop is a separate goroutine for processing block incorporated events 334 func (e *Engine) blockIncorporatedEventsProcessingLoop() { 335 c := e.blockIncorporatedNotifier.Channel() 336 337 for { 338 select { 339 case <-e.unit.Quit(): 340 return 341 case <-c: 342 err := e.processBlockIncorporatedEvents() 343 if err != nil { 344 e.log.Fatal().Err(err).Msg("internal error processing block incorporated queued message") 345 } 346 } 347 } 348 } 349 350 func (e *Engine) loop() { 351 notifier := e.inboundEventsNotifier.Channel() 352 for { 353 select { 354 case <-e.unit.Quit(): 355 return 356 case <-notifier: 357 err := e.processAvailableMessages() 358 if err != nil { 359 e.log.Fatal().Err(err).Msg("internal error processing queued message") 360 } 361 } 362 } 363 } 364 365 // processIncorporatedResult is a function that creates incorporated result and submits it for processing 366 // to sealing core. In phase 2, incorporated result is incorporated at same block that is being executed. 367 // This will be changed in phase 3. 368 func (e *Engine) processIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error { 369 err := e.core.ProcessIncorporatedResult(incorporatedResult) 370 e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageExecutionReceipt) 371 return err 372 } 373 374 func (e *Engine) onApproval(originID flow.Identifier, approval *flow.ResultApproval) error { 375 // don't process approval if originID is mismatched 376 if originID != approval.Body.ApproverID { 377 return nil 378 } 379 380 err := e.core.ProcessApproval(approval) 381 e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageResultApproval) 382 if err != nil { 383 return fmt.Errorf("fatal internal error in sealing core logic") 384 } 385 return nil 386 } 387 388 // SubmitLocal submits an event originating on the local node. 389 func (e *Engine) SubmitLocal(event interface{}) { 390 err := e.ProcessLocal(event) 391 if err != nil { 392 // receiving an input of incompatible type from a trusted internal component is fatal 393 e.log.Fatal().Err(err).Msg("internal error processing event") 394 } 395 } 396 397 // Submit submits the given event from the node with the given origin ID 398 // for processing in a non-blocking manner. It returns instantly and logs 399 // a potential processing error internally when done. 400 func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 401 err := e.Process(channel, originID, event) 402 if err != nil { 403 e.log.Fatal().Err(err).Msg("internal error processing event") 404 } 405 } 406 407 // ProcessLocal processes an event originating on the local node. 408 func (e *Engine) ProcessLocal(event interface{}) error { 409 return e.messageHandler.Process(e.me.NodeID(), event) 410 } 411 412 // Ready returns a ready channel that is closed once the engine has fully 413 // started. For the propagation engine, we consider the engine up and running 414 // upon initialization. 415 func (e *Engine) Ready() <-chan struct{} { 416 // launch as many workers as we need 417 for i := 0; i < defaultSealingEngineWorkers; i++ { 418 e.unit.Launch(e.loop) 419 } 420 e.unit.Launch(e.finalizationProcessingLoop) 421 e.unit.Launch(e.blockIncorporatedEventsProcessingLoop) 422 return e.unit.Ready() 423 } 424 425 func (e *Engine) Done() <-chan struct{} { 426 return e.unit.Done(func() { 427 e.workerPool.StopWait() 428 }) 429 } 430 431 // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer` 432 // It informs sealing.Core about finalization of respective block. 433 // 434 // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages 435 // from external nodes cannot be considered as inputs to this function 436 func (e *Engine) OnFinalizedBlock(*model.Block) { 437 e.finalizationEventsNotifier.Notify() 438 } 439 440 // OnBlockIncorporated implements `OnBlockIncorporated` from the `hotstuff.FinalizationConsumer` 441 // It processes all execution results that were incorporated in parent block payload. 442 // 443 // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages 444 // from external nodes cannot be considered as inputs to this function 445 func (e *Engine) OnBlockIncorporated(incorporatedBlock *model.Block) { 446 added := e.pendingIncorporatedBlocks.Push(incorporatedBlock.BlockID) 447 if !added { 448 // Not being able to queue an incorporated block is a fatal edge case. It might happen, if the 449 // queue capacity is depleted. However, we cannot drop incorporated blocks, because there 450 // is no way that any contained incorporated result would be re-added later once dropped. 451 e.log.Fatal().Msgf("failed to queue incorporated block %v", incorporatedBlock.BlockID) 452 } 453 e.blockIncorporatedNotifier.Notify() 454 } 455 456 // processIncorporatedBlock selects receipts that were included into incorporated block and submits them 457 // for further processing to sealing core. No errors expected during normal operations. 458 func (e *Engine) processIncorporatedBlock(incorporatedBlockID flow.Identifier) error { 459 // In order to process a block within the sealing engine, we need the block's source of 460 // randomness (to compute the chunk assignment). The source of randomness can be taken from _any_ 461 // QC for the block. We know that we have such a QC, once a valid child block is incorporated. 462 // Vice-versa, once a block is incorporated, we know that _its parent_ has a valid child, i.e. 463 // the parent's source of randomness is now know. 464 465 incorporatedBlock, err := e.headers.ByBlockID(incorporatedBlockID) 466 if err != nil { 467 return fmt.Errorf("could not retrieve header for block %v", incorporatedBlockID) 468 } 469 470 e.log.Info().Msgf("processing incorporated block %v at height %d", incorporatedBlockID, incorporatedBlock.Height) 471 472 // we are interested in blocks with height strictly larger than root block 473 if incorporatedBlock.Height <= e.rootHeader.Height { 474 return nil 475 } 476 477 index, err := e.index.ByBlockID(incorporatedBlock.ParentID) 478 if err != nil { 479 return fmt.Errorf("could not retrieve payload index for block %v", incorporatedBlock.ParentID) 480 } 481 482 for _, resultID := range index.ResultIDs { 483 result, err := e.results.ByID(resultID) 484 if err != nil { 485 return fmt.Errorf("could not retrieve receipt incorporated in block %v: %w", incorporatedBlock.ParentID, err) 486 } 487 488 incorporatedResult := flow.NewIncorporatedResult(incorporatedBlock.ParentID, result) 489 added := e.pendingIncorporatedResults.Push(incorporatedResult) 490 if !added { 491 // Not being able to queue an incorporated result is a fatal edge case. It might happen, if the 492 // queue capacity is depleted. However, we cannot drop incorporated results, because there 493 // is no way that an incorporated result can be re-added later once dropped. 494 return fmt.Errorf("failed to queue incorporated result") 495 } 496 } 497 e.inboundEventsNotifier.Notify() 498 return nil 499 } 500 501 // processBlockIncorporatedEvents performs processing of block incorporated hot stuff events 502 // No errors expected during normal operations. 503 func (e *Engine) processBlockIncorporatedEvents() error { 504 for { 505 select { 506 case <-e.unit.Quit(): 507 return nil 508 default: 509 } 510 511 msg, ok := e.pendingIncorporatedBlocks.Pop() 512 if ok { 513 err := e.processIncorporatedBlock(msg.(flow.Identifier)) 514 if err != nil { 515 return fmt.Errorf("could not process incorporated block: %w", err) 516 } 517 continue 518 } 519 520 // when there is no more messages in the queue, back to the loop to wait 521 // for the next incoming message to arrive. 522 return nil 523 } 524 }