github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/sealing/engine.go (about) 1 package sealing 2 3 import ( 4 "fmt" 5 6 "github.com/gammazero/workerpool" 7 "github.com/rs/zerolog" 8 9 "github.com/koko1123/flow-go-1/consensus/hotstuff/model" 10 "github.com/koko1123/flow-go-1/engine" 11 "github.com/koko1123/flow-go-1/engine/common/fifoqueue" 12 "github.com/koko1123/flow-go-1/engine/consensus" 13 "github.com/koko1123/flow-go-1/model/flow" 14 "github.com/koko1123/flow-go-1/model/messages" 15 "github.com/koko1123/flow-go-1/module" 16 "github.com/koko1123/flow-go-1/module/mempool" 17 "github.com/koko1123/flow-go-1/module/metrics" 18 msig "github.com/koko1123/flow-go-1/module/signature" 19 "github.com/koko1123/flow-go-1/network" 20 "github.com/koko1123/flow-go-1/network/channels" 21 "github.com/koko1123/flow-go-1/state/protocol" 22 "github.com/koko1123/flow-go-1/storage" 23 ) 24 25 type Event struct { 26 OriginID flow.Identifier 27 Msg interface{} 28 } 29 30 // defaultApprovalQueueCapacity maximum capacity of approvals queue 31 const defaultApprovalQueueCapacity = 10000 32 33 // defaultApprovalResponseQueueCapacity maximum capacity of approval requests queue 34 const defaultApprovalResponseQueueCapacity = 10000 35 36 // defaultSealingEngineWorkers number of workers to dispatch events for sealing core 37 const defaultSealingEngineWorkers = 8 38 39 // defaultAssignmentCollectorsWorkerPoolCapacity is the default number of workers that is available for worker pool which is used 40 // by assignment collector state machine to do transitions 41 const defaultAssignmentCollectorsWorkerPoolCapacity = 4 42 43 // defaultIncorporatedBlockQueueCapacity maximum capacity for queuing incorporated blocks 44 // Caution: We cannot drop incorporated blocks, as there is no way that results included in the block 45 // can be re-added later once dropped. Missing any incorporated result can undermine sealing liveness! 46 // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node 47 // in case queueing an incorporated block fails. 48 const defaultIncorporatedBlockQueueCapacity = 10000 49 50 // defaultIncorporatedResultQueueCapacity maximum capacity for queuing incorporated results 51 // Caution: We cannot drop incorporated results, as there is no way that an incorporated result 52 // can be re-added later once dropped. Missing incorporated results can undermine sealing liveness! 53 // Therefore, the queue capacity should be large _and_ there should be logic for crashing the node 54 // in case queueing an incorporated result fails. 55 const defaultIncorporatedResultQueueCapacity = 80000 56 57 type ( 58 EventSink chan *Event // Channel to push pending events 59 ) 60 61 // Engine is a wrapper for approval processing `Core` which implements logic for 62 // queuing and filtering network messages which later will be processed by sealing engine. 63 // Purpose of this struct is to provide an efficient way how to consume messages from network layer and pass 64 // them to `Core`. Engine runs 2 separate gorourtines that perform pre-processing and consuming messages by Core. 65 type Engine struct { 66 unit *engine.Unit 67 workerPool *workerpool.WorkerPool 68 core consensus.SealingCore 69 log zerolog.Logger 70 me module.Local 71 headers storage.Headers 72 results storage.ExecutionResults 73 index storage.Index 74 state protocol.State 75 cacheMetrics module.MempoolMetrics 76 engineMetrics module.EngineMetrics 77 pendingApprovals engine.MessageStore 78 pendingRequestedApprovals engine.MessageStore 79 pendingIncorporatedResults *fifoqueue.FifoQueue 80 pendingIncorporatedBlocks *fifoqueue.FifoQueue 81 inboundEventsNotifier engine.Notifier 82 finalizationEventsNotifier engine.Notifier 83 blockIncorporatedNotifier engine.Notifier 84 messageHandler *engine.MessageHandler 85 rootHeader *flow.Header 86 } 87 88 // NewEngine constructs new `Engine` which runs on it's own unit. 89 func NewEngine(log zerolog.Logger, 90 tracer module.Tracer, 91 conMetrics module.ConsensusMetrics, 92 engineMetrics module.EngineMetrics, 93 mempool module.MempoolMetrics, 94 sealingTracker consensus.SealingTracker, 95 net network.Network, 96 me module.Local, 97 headers storage.Headers, 98 payloads storage.Payloads, 99 results storage.ExecutionResults, 100 index storage.Index, 101 state protocol.State, 102 sealsDB storage.Seals, 103 assigner module.ChunkAssigner, 104 sealsMempool mempool.IncorporatedResultSeals, 105 requiredApprovalsForSealConstructionGetter module.SealingConfigsGetter, 106 ) (*Engine, error) { 107 rootHeader, err := state.Params().Root() 108 if err != nil { 109 return nil, fmt.Errorf("could not retrieve root block: %w", err) 110 } 111 112 unit := engine.NewUnit() 113 e := &Engine{ 114 unit: unit, 115 workerPool: workerpool.New(defaultAssignmentCollectorsWorkerPoolCapacity), 116 log: log.With().Str("engine", "sealing.Engine").Logger(), 117 me: me, 118 state: state, 119 engineMetrics: engineMetrics, 120 cacheMetrics: mempool, 121 headers: headers, 122 results: results, 123 index: index, 124 rootHeader: rootHeader, 125 } 126 127 err = e.setupTrustedInboundQueues() 128 if err != nil { 129 return nil, fmt.Errorf("initialization of inbound queues for trusted inputs failed: %w", err) 130 } 131 132 err = e.setupMessageHandler(requiredApprovalsForSealConstructionGetter) 133 if err != nil { 134 return nil, fmt.Errorf("could not initialize message handler for untrusted inputs: %w", err) 135 } 136 137 // register engine with the approval provider 138 _, err = net.Register(channels.ReceiveApprovals, e) 139 if err != nil { 140 return nil, fmt.Errorf("could not register for approvals: %w", err) 141 } 142 143 // register engine to the channel for requesting missing approvals 144 approvalConduit, err := net.Register(channels.RequestApprovalsByChunk, e) 145 if err != nil { 146 return nil, fmt.Errorf("could not register for requesting approvals: %w", err) 147 } 148 149 signatureHasher := msig.NewBLSHasher(msig.ResultApprovalTag) 150 core, err := NewCore(log, e.workerPool, tracer, conMetrics, sealingTracker, unit, headers, state, sealsDB, assigner, signatureHasher, sealsMempool, approvalConduit, requiredApprovalsForSealConstructionGetter) 151 if err != nil { 152 return nil, fmt.Errorf("failed to init sealing engine: %w", err) 153 } 154 155 err = core.RepopulateAssignmentCollectorTree(payloads) 156 if err != nil { 157 return nil, fmt.Errorf("could not repopulate assignment collectors tree: %w", err) 158 } 159 e.core = core 160 161 return e, nil 162 } 163 164 // setupTrustedInboundQueues initializes inbound queues for TRUSTED INPUTS (from other components within the 165 // consensus node). We deliberately separate the queues for trusted inputs from the MessageHandler, which 166 // handles external, untrusted inputs. This reduces the attack surface, as it makes it impossible for an external 167 // attacker to feed values into the inbound channels for trusted inputs, even in the presence of bugs in 168 // the networking layer or message handler 169 func (e *Engine) setupTrustedInboundQueues() error { 170 e.finalizationEventsNotifier = engine.NewNotifier() 171 e.blockIncorporatedNotifier = engine.NewNotifier() 172 var err error 173 e.pendingIncorporatedResults, err = fifoqueue.NewFifoQueue(defaultIncorporatedResultQueueCapacity) 174 if err != nil { 175 return fmt.Errorf("failed to create queue for incorporated results: %w", err) 176 } 177 e.pendingIncorporatedBlocks, err = fifoqueue.NewFifoQueue(defaultIncorporatedBlockQueueCapacity) 178 if err != nil { 179 return fmt.Errorf("failed to create queue for incorporated blocks: %w", err) 180 } 181 return nil 182 } 183 184 // setupMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED INPUTS. 185 func (e *Engine) setupMessageHandler(getSealingConfigs module.SealingConfigsGetter) error { 186 // FIFO queue for broadcasted approvals 187 pendingApprovalsQueue, err := fifoqueue.NewFifoQueue( 188 defaultApprovalQueueCapacity, 189 fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalQueue, uint(len)) }), 190 ) 191 if err != nil { 192 return fmt.Errorf("failed to create queue for inbound approvals: %w", err) 193 } 194 e.pendingApprovals = &engine.FifoMessageStore{ 195 FifoQueue: pendingApprovalsQueue, 196 } 197 198 // FiFo queue for requested approvals 199 pendingRequestedApprovalsQueue, err := fifoqueue.NewFifoQueue( 200 defaultApprovalResponseQueueCapacity, 201 fifoqueue.WithLengthObserver(func(len int) { e.cacheMetrics.MempoolEntries(metrics.ResourceApprovalResponseQueue, uint(len)) }), 202 ) 203 if err != nil { 204 return fmt.Errorf("failed to create queue for requested approvals: %w", err) 205 } 206 e.pendingRequestedApprovals = &engine.FifoMessageStore{ 207 FifoQueue: pendingRequestedApprovalsQueue, 208 } 209 210 e.inboundEventsNotifier = engine.NewNotifier() 211 // define message queueing behaviour 212 e.messageHandler = engine.NewMessageHandler( 213 e.log, 214 e.inboundEventsNotifier, 215 engine.Pattern{ 216 Match: func(msg *engine.Message) bool { 217 _, ok := msg.Payload.(*flow.ResultApproval) 218 if ok { 219 e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval) 220 } 221 return ok 222 }, 223 Map: func(msg *engine.Message) (*engine.Message, bool) { 224 if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 { 225 // if we don't require approvals to construct a seal, don't even process approvals. 226 return nil, false 227 } 228 229 return msg, true 230 }, 231 Store: e.pendingApprovals, 232 }, 233 engine.Pattern{ 234 Match: func(msg *engine.Message) bool { 235 _, ok := msg.Payload.(*messages.ApprovalResponse) 236 if ok { 237 e.engineMetrics.MessageReceived(metrics.EngineSealing, metrics.MessageResultApproval) 238 } 239 return ok 240 }, 241 Map: func(msg *engine.Message) (*engine.Message, bool) { 242 if getSealingConfigs.RequireApprovalsForSealConstructionDynamicValue() < 1 { 243 // if we don't require approvals to construct a seal, don't even process approvals. 244 return nil, false 245 } 246 247 approval := msg.Payload.(*messages.ApprovalResponse).Approval 248 return &engine.Message{ 249 OriginID: msg.OriginID, 250 Payload: &approval, 251 }, true 252 }, 253 Store: e.pendingRequestedApprovals, 254 }, 255 ) 256 257 return nil 258 } 259 260 // Process sends event into channel with pending events. Generally speaking shouldn't lock for too long. 261 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 262 err := e.messageHandler.Process(originID, event) 263 if err != nil { 264 if engine.IsIncompatibleInputTypeError(err) { 265 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 266 return nil 267 } 268 return fmt.Errorf("unexpected error while processing engine message: %w", err) 269 } 270 return nil 271 } 272 273 // processAvailableMessages is processor of pending events which drives events from networking layer to business logic in `Core`. 274 // Effectively consumes messages from networking layer and dispatches them into corresponding sinks which are connected with `Core`. 275 func (e *Engine) processAvailableMessages() error { 276 for { 277 select { 278 case <-e.unit.Quit(): 279 return nil 280 default: 281 } 282 283 event, ok := e.pendingIncorporatedResults.Pop() 284 if ok { 285 e.log.Debug().Msg("got new incorporated result") 286 287 err := e.processIncorporatedResult(event.(*flow.IncorporatedResult)) 288 if err != nil { 289 return fmt.Errorf("could not process incorporated result: %w", err) 290 } 291 continue 292 } 293 294 // TODO prioritization 295 // eg: msg := engine.SelectNextMessage() 296 msg, ok := e.pendingRequestedApprovals.Get() 297 if !ok { 298 msg, ok = e.pendingApprovals.Get() 299 } 300 if ok { 301 e.log.Debug().Msg("got new result approval") 302 303 err := e.onApproval(msg.OriginID, msg.Payload.(*flow.ResultApproval)) 304 if err != nil { 305 return fmt.Errorf("could not process result approval: %w", err) 306 } 307 continue 308 } 309 310 // when there is no more messages in the queue, back to the loop to wait 311 // for the next incoming message to arrive. 312 return nil 313 } 314 } 315 316 // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events 317 func (e *Engine) finalizationProcessingLoop() { 318 finalizationNotifier := e.finalizationEventsNotifier.Channel() 319 for { 320 select { 321 case <-e.unit.Quit(): 322 return 323 case <-finalizationNotifier: 324 finalized, err := e.state.Final().Head() 325 if err != nil { 326 e.log.Fatal().Err(err).Msg("could not retrieve last finalized block") 327 } 328 err = e.core.ProcessFinalizedBlock(finalized.ID()) 329 if err != nil { 330 e.log.Fatal().Err(err).Msgf("could not process finalized block %v", finalized.ID()) 331 } 332 } 333 } 334 } 335 336 // blockIncorporatedEventsProcessingLoop is a separate goroutine for processing block incorporated events 337 func (e *Engine) blockIncorporatedEventsProcessingLoop() { 338 c := e.blockIncorporatedNotifier.Channel() 339 340 for { 341 select { 342 case <-e.unit.Quit(): 343 return 344 case <-c: 345 err := e.processBlockIncorporatedEvents() 346 if err != nil { 347 e.log.Fatal().Err(err).Msg("internal error processing block incorporated queued message") 348 } 349 } 350 } 351 } 352 353 func (e *Engine) loop() { 354 notifier := e.inboundEventsNotifier.Channel() 355 for { 356 select { 357 case <-e.unit.Quit(): 358 return 359 case <-notifier: 360 err := e.processAvailableMessages() 361 if err != nil { 362 e.log.Fatal().Err(err).Msg("internal error processing queued message") 363 } 364 } 365 } 366 } 367 368 // processIncorporatedResult is a function that creates incorporated result and submits it for processing 369 // to sealing core. In phase 2, incorporated result is incorporated at same block that is being executed. 370 // This will be changed in phase 3. 371 func (e *Engine) processIncorporatedResult(incorporatedResult *flow.IncorporatedResult) error { 372 err := e.core.ProcessIncorporatedResult(incorporatedResult) 373 e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageExecutionReceipt) 374 return err 375 } 376 377 func (e *Engine) onApproval(originID flow.Identifier, approval *flow.ResultApproval) error { 378 // don't process approval if originID is mismatched 379 if originID != approval.Body.ApproverID { 380 return nil 381 } 382 383 err := e.core.ProcessApproval(approval) 384 e.engineMetrics.MessageHandled(metrics.EngineSealing, metrics.MessageResultApproval) 385 if err != nil { 386 return fmt.Errorf("fatal internal error in sealing core logic") 387 } 388 return nil 389 } 390 391 // SubmitLocal submits an event originating on the local node. 392 func (e *Engine) SubmitLocal(event interface{}) { 393 err := e.ProcessLocal(event) 394 if err != nil { 395 // receiving an input of incompatible type from a trusted internal component is fatal 396 e.log.Fatal().Err(err).Msg("internal error processing event") 397 } 398 } 399 400 // Submit submits the given event from the node with the given origin ID 401 // for processing in a non-blocking manner. It returns instantly and logs 402 // a potential processing error internally when done. 403 func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 404 err := e.Process(channel, originID, event) 405 if err != nil { 406 e.log.Fatal().Err(err).Msg("internal error processing event") 407 } 408 } 409 410 // ProcessLocal processes an event originating on the local node. 411 func (e *Engine) ProcessLocal(event interface{}) error { 412 return e.messageHandler.Process(e.me.NodeID(), event) 413 } 414 415 // Ready returns a ready channel that is closed once the engine has fully 416 // started. For the propagation engine, we consider the engine up and running 417 // upon initialization. 418 func (e *Engine) Ready() <-chan struct{} { 419 // launch as many workers as we need 420 for i := 0; i < defaultSealingEngineWorkers; i++ { 421 e.unit.Launch(e.loop) 422 } 423 e.unit.Launch(e.finalizationProcessingLoop) 424 e.unit.Launch(e.blockIncorporatedEventsProcessingLoop) 425 return e.unit.Ready() 426 } 427 428 func (e *Engine) Done() <-chan struct{} { 429 return e.unit.Done(func() { 430 e.workerPool.StopWait() 431 }) 432 } 433 434 // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer` 435 // (1) Informs sealing.Core about finalization of respective block. 436 // 437 // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages 438 // from external nodes cannot be considered as inputs to this function 439 func (e *Engine) OnFinalizedBlock(*model.Block) { 440 e.finalizationEventsNotifier.Notify() 441 } 442 443 // OnBlockIncorporated implements `OnBlockIncorporated` from the `hotstuff.FinalizationConsumer` 444 // (1) Processes all execution results that were incorporated in parent block payload. 445 // 446 // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages 447 // from external nodes cannot be considered as inputs to this function 448 func (e *Engine) OnBlockIncorporated(incorporatedBlock *model.Block) { 449 added := e.pendingIncorporatedBlocks.Push(incorporatedBlock.BlockID) 450 if !added { 451 // Not being able to queue an incorporated block is a fatal edge case. It might happen, if the 452 // queue capacity is depleted. However, we cannot drop incorporated blocks, because there 453 // is no way that any contained incorporated result would be re-added later once dropped. 454 e.log.Fatal().Msgf("failed to queue incorporated block %v", incorporatedBlock.BlockID) 455 } 456 e.blockIncorporatedNotifier.Notify() 457 } 458 459 // processIncorporatedBlock selects receipts that were included into incorporated block and submits them 460 // for further processing to sealing core. No errors expected during normal operations. 461 func (e *Engine) processIncorporatedBlock(incorporatedBlockID flow.Identifier) error { 462 // In order to process a block within the sealing engine, we need the block's source of 463 // randomness (to compute the chunk assignment). The source of randomness can be taken from _any_ 464 // QC for the block. We know that we have such a QC, once a valid child block is incorporated. 465 // Vice-versa, once a block is incorporated, we know that _its parent_ has a valid child, i.e. 466 // the parent's source of randomness is now know. 467 468 incorporatedBlock, err := e.headers.ByBlockID(incorporatedBlockID) 469 if err != nil { 470 return fmt.Errorf("could not retrieve header for block %v", incorporatedBlockID) 471 } 472 473 e.log.Info().Msgf("processing incorporated block %v at height %d", incorporatedBlockID, incorporatedBlock.Height) 474 475 // we are interested in blocks with height strictly larger than root block 476 if incorporatedBlock.Height <= e.rootHeader.Height { 477 return nil 478 } 479 480 index, err := e.index.ByBlockID(incorporatedBlock.ParentID) 481 if err != nil { 482 return fmt.Errorf("could not retrieve payload index for block %v", incorporatedBlock.ParentID) 483 } 484 485 for _, resultID := range index.ResultIDs { 486 result, err := e.results.ByID(resultID) 487 if err != nil { 488 return fmt.Errorf("could not retrieve receipt incorporated in block %v: %w", incorporatedBlock.ParentID, err) 489 } 490 491 incorporatedResult := flow.NewIncorporatedResult(incorporatedBlock.ParentID, result) 492 added := e.pendingIncorporatedResults.Push(incorporatedResult) 493 if !added { 494 // Not being able to queue an incorporated result is a fatal edge case. It might happen, if the 495 // queue capacity is depleted. However, we cannot drop incorporated results, because there 496 // is no way that an incorporated result can be re-added later once dropped. 497 return fmt.Errorf("failed to queue incorporated result") 498 } 499 } 500 e.inboundEventsNotifier.Notify() 501 return nil 502 } 503 504 // processBlockIncorporatedEvents performs processing of block incorporated hot stuff events 505 // No errors expected during normal operations. 506 func (e *Engine) processBlockIncorporatedEvents() error { 507 for { 508 select { 509 case <-e.unit.Quit(): 510 return nil 511 default: 512 } 513 514 msg, ok := e.pendingIncorporatedBlocks.Pop() 515 if ok { 516 err := e.processIncorporatedBlock(msg.(flow.Identifier)) 517 if err != nil { 518 return fmt.Errorf("could not process incorporated block: %w", err) 519 } 520 continue 521 } 522 523 // when there is no more messages in the queue, back to the loop to wait 524 // for the next incoming message to arrive. 525 return nil 526 } 527 }