github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/execution/provider/engine.go (about) 1 package provider 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/module/component" 12 "github.com/onflow/flow-go/module/irrecoverable" 13 "github.com/onflow/flow-go/module/mempool" 14 15 "github.com/onflow/flow-go/engine" 16 "github.com/onflow/flow-go/engine/execution/state" 17 "github.com/onflow/flow-go/model/flow" 18 "github.com/onflow/flow-go/model/flow/filter" 19 "github.com/onflow/flow-go/model/messages" 20 "github.com/onflow/flow-go/module" 21 "github.com/onflow/flow-go/module/trace" 22 "github.com/onflow/flow-go/network" 23 "github.com/onflow/flow-go/network/channels" 24 "github.com/onflow/flow-go/state/protocol" 25 "github.com/onflow/flow-go/storage" 26 "github.com/onflow/flow-go/utils/logging" 27 "github.com/onflow/flow-go/utils/rand" 28 ) 29 30 type ProviderEngine interface { 31 network.MessageProcessor 32 module.ReadyDoneAware 33 // BroadcastExecutionReceipt broadcasts an execution receipt to all nodes in the network. 34 // It skips broadcasting the receipt if the block is sealed, or the node is not authorized at the block. 35 // It returns true if the receipt is broadcasted, false otherwise. 36 BroadcastExecutionReceipt(context.Context, uint64, *flow.ExecutionReceipt) (bool, error) 37 } 38 39 type NoopEngine struct { 40 module.NoopReadyDoneAware 41 } 42 43 func (*NoopEngine) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error { 44 return nil 45 } 46 47 func (*NoopEngine) BroadcastExecutionReceipt(context.Context, uint64, *flow.ExecutionReceipt) (bool, error) { 48 return false, nil 49 } 50 51 const ( 52 // DefaultChunkDataPackRequestWorker is the default number of concurrent workers processing chunk data pack requests on 53 // execution nodes. 54 DefaultChunkDataPackRequestWorker = 100 55 // DefaultChunkDataPackQueryTimeout is the default timeout value for querying a chunk data pack from storage. 56 DefaultChunkDataPackQueryTimeout = 10 * time.Second 57 // DefaultChunkDataPackDeliveryTimeout is the default timeout value for delivery of a chunk data pack to a verification 58 // node. 59 DefaultChunkDataPackDeliveryTimeout = 10 * time.Second 60 ) 61 62 var _ ProviderEngine = (*Engine)(nil) 63 64 // An Engine provides means of accessing data about execution state and broadcasts execution receipts to nodes in the network. 65 // Also generates and saves execution receipts 66 type Engine struct { 67 component.Component 68 cm *component.ComponentManager 69 70 log zerolog.Logger 71 tracer module.Tracer 72 receiptCon network.Conduit 73 state protocol.State 74 execState state.ReadOnlyExecutionState 75 chunksConduit network.Conduit 76 metrics module.ExecutionMetrics 77 checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error) 78 chdpRequestHandler *engine.MessageHandler 79 chdpRequestQueue engine.MessageStore 80 81 // buffered channel for ChunkDataRequest workers to pick 82 // requests and process. 83 chdpRequestChannel chan *mempool.ChunkDataPackRequest 84 85 // timeout for delivery of a chunk data pack response in the network. 86 chunkDataPackDeliveryTimeout time.Duration 87 // timeout for querying chunk data pack through database. 88 chunkDataPackQueryTimeout time.Duration 89 } 90 91 func New( 92 logger zerolog.Logger, 93 tracer module.Tracer, 94 net network.EngineRegistry, 95 state protocol.State, 96 execState state.ReadOnlyExecutionState, 97 metrics module.ExecutionMetrics, 98 checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error), 99 chunkDataPackRequestQueue engine.MessageStore, 100 chdpRequestWorkers uint, 101 chunkDataPackQueryTimeout time.Duration, 102 chunkDataPackDeliveryTimeout time.Duration, 103 ) (*Engine, error) { 104 105 log := logger.With().Str("engine", "receipts").Logger() 106 107 handler := engine.NewMessageHandler( 108 log, 109 engine.NewNotifier(), 110 engine.Pattern{ 111 // Match is called on every new message coming to this engine. 112 // Provider enigne only expects ChunkDataRequests. 113 // Other message types are discarded by Match. 114 Match: func(message *engine.Message) bool { 115 chdpReq, ok := message.Payload.(*messages.ChunkDataRequest) 116 if ok { 117 log.Info(). 118 Hex("chunk_id", logging.ID(chdpReq.ChunkID)). 119 Hex("requester_id", logging.ID(message.OriginID)). 120 Msg("chunk data pack request received") 121 } 122 return ok 123 }, 124 // Map is called on messages that are Match(ed) successfully, i.e., 125 // ChunkDataRequests. 126 // It replaces the payload of message with requested chunk id. 127 Map: func(message *engine.Message) (*engine.Message, bool) { 128 chdpReq := message.Payload.(*messages.ChunkDataRequest) 129 return &engine.Message{ 130 OriginID: message.OriginID, 131 Payload: chdpReq.ChunkID, 132 }, true 133 }, 134 Store: chunkDataPackRequestQueue, 135 }) 136 137 engine := Engine{ 138 log: log, 139 tracer: tracer, 140 state: state, 141 execState: execState, 142 metrics: metrics, 143 checkAuthorizedAtBlock: checkAuthorizedAtBlock, 144 chdpRequestHandler: handler, 145 chdpRequestQueue: chunkDataPackRequestQueue, 146 chdpRequestChannel: make(chan *mempool.ChunkDataPackRequest, chdpRequestWorkers), 147 chunkDataPackDeliveryTimeout: chunkDataPackDeliveryTimeout, 148 chunkDataPackQueryTimeout: chunkDataPackQueryTimeout, 149 } 150 151 var err error 152 153 engine.receiptCon, err = net.Register(channels.PushReceipts, &engine) 154 if err != nil { 155 return nil, fmt.Errorf("could not register receipt provider engine: %w", err) 156 } 157 158 chunksConduit, err := net.Register(channels.ProvideChunks, &engine) 159 if err != nil { 160 return nil, fmt.Errorf("could not register chunk data pack provider engine: %w", err) 161 } 162 engine.chunksConduit = chunksConduit 163 164 cm := component.NewComponentManagerBuilder() 165 cm.AddWorker(engine.processQueuedChunkDataPackRequestsShovelerWorker) 166 for i := uint(0); i < chdpRequestWorkers; i++ { 167 cm.AddWorker(engine.processChunkDataPackRequestWorker) 168 } 169 170 engine.cm = cm.Build() 171 engine.Component = engine.cm 172 173 return &engine, nil 174 } 175 176 // processQueuedChunkDataPackRequestsShovelerWorker is constantly listening on the MessageHandler for ChunkDataRequests, 177 // and pushes new ChunkDataRequests into the request channel to be picked by workers. 178 func (e *Engine) processQueuedChunkDataPackRequestsShovelerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 179 ready() 180 181 e.log.Debug().Msg("process chunk data pack request shovller worker started") 182 183 for { 184 select { 185 case <-e.chdpRequestHandler.GetNotifier(): 186 // there is at list a single chunk data pack request queued up. 187 e.processAvailableMesssages(ctx) 188 case <-ctx.Done(): 189 // close the internal channel, the workers will drain the channel before exiting 190 close(e.chdpRequestChannel) 191 e.log.Trace().Msg("processing chunk data pack request worker terminated") 192 return 193 } 194 } 195 } 196 197 // processAvailableMesssages is a blocking method that reads all queued ChunkDataRequests till the queue gets empty. 198 // Each ChunkDataRequest is processed by a single concurrent worker. However, there are limited number of such workers. 199 // If there is no worker available for a request, the method blocks till one is available. 200 func (e *Engine) processAvailableMesssages(ctx irrecoverable.SignalerContext) { 201 for { 202 select { 203 case <-ctx.Done(): 204 return 205 default: 206 } 207 208 msg, ok := e.chdpRequestQueue.Get() 209 if !ok { 210 // no more requests, return 211 return 212 } 213 214 chunkId, ok := msg.Payload.(flow.Identifier) 215 if !ok { 216 // should never happen. 217 // if it does happen, it means there is a bug in the queue implementation. 218 ctx.Throw(fmt.Errorf("invalid chunk id type in chunk data pack request queue: %T", msg.Payload)) 219 } 220 221 request := &mempool.ChunkDataPackRequest{ 222 RequesterId: msg.OriginID, 223 ChunkId: chunkId, 224 } 225 lg := e.log.With(). 226 Hex("chunk_id", logging.ID(request.ChunkId)). 227 Hex("origin_id", logging.ID(request.RequesterId)).Logger() 228 229 lg.Trace().Msg("shovller is queuing chunk data pack request for processing") 230 e.chdpRequestChannel <- request 231 lg.Trace().Msg("shovller queued up chunk data pack request for processing") 232 } 233 } 234 235 // processChunkDataPackRequestWorker encapsulates the logic of a single (concurrent) worker that picks a 236 // ChunkDataRequest from this engine's queue and processes it. 237 func (e *Engine) processChunkDataPackRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 238 ready() 239 240 for { 241 request, ok := <-e.chdpRequestChannel 242 if !ok { 243 e.log.Trace().Msg("processing chunk data pack request worker terminated") 244 return 245 } 246 lg := e.log.With(). 247 Hex("chunk_id", logging.ID(request.ChunkId)). 248 Hex("origin_id", logging.ID(request.RequesterId)).Logger() 249 lg.Trace().Msg("worker picked up chunk data pack request for processing") 250 e.onChunkDataRequest(request) 251 lg.Trace().Msg("worker finished chunk data pack processing") 252 } 253 } 254 255 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 256 select { 257 case <-e.cm.ShutdownSignal(): 258 e.log.Warn().Msgf("received message from %x after shut down", originID) 259 return nil 260 default: 261 } 262 263 err := e.chdpRequestHandler.Process(originID, event) 264 if err != nil { 265 if engine.IsIncompatibleInputTypeError(err) { 266 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 267 return nil 268 } 269 return fmt.Errorf("unexpected error while processing engine message: %w", err) 270 } 271 272 return nil 273 } 274 275 // onChunkDataRequest receives a request for a chunk data pack, 276 // and if such a chunk data pack is available in the execution state, it is sent to the requester node. 277 // TODO improve error handling https://github.com/dapperlabs/flow-go/issues/6363 278 func (e *Engine) onChunkDataRequest(request *mempool.ChunkDataPackRequest) { 279 processStartTime := time.Now() 280 281 lg := e.log.With(). 282 Hex("origin_id", logging.ID(request.RequesterId)). 283 Hex("chunk_id", logging.ID(request.ChunkId)). 284 Logger() 285 lg.Info().Msg("started processing chunk data pack request") 286 287 // TODO(ramtin): we might add a future logic to do extra checks on the origin of the request 288 // currently the networking layer checks that the requested is a valid node operator 289 // that has not been ejected. 290 291 // increases collector metric 292 e.metrics.ChunkDataPackRequestProcessed() 293 chunkDataPack, err := e.execState.ChunkDataPackByChunkID(request.ChunkId) 294 295 // we might be behind when we don't have the requested chunk. 296 // if this happen, log it and return nil 297 if errors.Is(err, storage.ErrNotFound) { 298 lg.Warn(). 299 Err(err). 300 Msg("chunk data pack not found, execution node may be behind") 301 return 302 } 303 if err != nil { 304 lg.Error(). 305 Err(err). 306 Msg("could not retrieve chunk ID from storage") 307 return 308 } 309 310 queryTime := time.Since(processStartTime) 311 lg = lg.With().Dur("query_time", queryTime).Logger() 312 if queryTime > e.chunkDataPackQueryTimeout { 313 lg.Warn(). 314 Dur("query_timout", e.chunkDataPackQueryTimeout). 315 Msg("chunk data pack query takes longer than expected timeout") 316 } 317 318 e.deliverChunkDataResponse(chunkDataPack, request.RequesterId) 319 } 320 321 // deliverChunkDataResponse delivers chunk data pack to the requester through network. 322 func (e *Engine) deliverChunkDataResponse(chunkDataPack *flow.ChunkDataPack, requesterId flow.Identifier) { 323 lg := e.log.With(). 324 Hex("origin_id", logging.ID(requesterId)). 325 Hex("chunk_id", logging.ID(chunkDataPack.ChunkID)). 326 Logger() 327 lg.Info().Msg("sending chunk data pack response") 328 329 // sends requested chunk data pack to the requester 330 deliveryStartTime := time.Now() 331 332 nonce, err := rand.Uint64() 333 if err != nil { 334 // TODO: this error should be returned by deliverChunkDataResponse 335 // it is logged for now since the only error possible is related to a failure 336 // of the system entropy generation. Such error is going to cause failures in other 337 // components where it's handled properly and will lead to crashing the module. 338 lg.Error(). 339 Err(err). 340 Msg("could not generate nonce for chunk data response") 341 return 342 } 343 344 response := &messages.ChunkDataResponse{ 345 ChunkDataPack: *chunkDataPack, 346 Nonce: nonce, 347 } 348 349 err = e.chunksConduit.Unicast(response, requesterId) 350 if err != nil { 351 lg.Warn(). 352 Err(err). 353 Msg("could not send requested chunk data pack to requester") 354 return 355 } 356 357 deliveryTime := time.Since(deliveryStartTime) 358 lg = lg.With().Dur("delivery_time", deliveryTime).Logger() 359 if deliveryTime > e.chunkDataPackDeliveryTimeout { 360 lg.Warn(). 361 Dur("delivery_timout", e.chunkDataPackDeliveryTimeout). 362 Msg("chunk data pack delivery takes longer than expected timeout") 363 } 364 365 if chunkDataPack.Collection != nil { 366 // logging collection id of non-system chunks. 367 // A system chunk has both the collection and collection id set to nil. 368 lg = lg.With(). 369 Hex("collection_id", logging.ID(chunkDataPack.Collection.ID())). 370 Logger() 371 } 372 lg.Info().Msg("chunk data pack request successfully replied") 373 } 374 375 // BroadcastExecutionReceipt broadcasts an execution receipt to all nodes in the network. 376 // It skips broadcasting the receipt if the block is sealed, or the node is not authorized at the block. 377 // It returns true if the receipt is broadcasted, false otherwise. 378 func (e *Engine) BroadcastExecutionReceipt(ctx context.Context, height uint64, receipt *flow.ExecutionReceipt) (bool, error) { 379 // if the receipt is for a sealed block, then no need to broadcast it. 380 lastSealed, err := e.state.Sealed().Head() 381 if err != nil { 382 return false, fmt.Errorf("could not get sealed block before broadcasting: %w", err) 383 } 384 385 isExecutedBlockSealed := height <= lastSealed.Height 386 387 if isExecutedBlockSealed { 388 // no need to braodcast the receipt if the block is sealed 389 return false, nil 390 } 391 392 blockID := receipt.ExecutionResult.BlockID 393 authorizedAtBlock, err := e.checkAuthorizedAtBlock(blockID) 394 if err != nil { 395 return false, fmt.Errorf("could not check staking status: %w", err) 396 } 397 398 if !authorizedAtBlock { 399 return false, nil 400 } 401 402 finalState, err := receipt.ExecutionResult.FinalStateCommitment() 403 if err != nil { 404 return false, fmt.Errorf("could not get final state: %w", err) 405 } 406 407 span, _ := e.tracer.StartSpanFromContext(ctx, trace.EXEBroadcastExecutionReceipt) 408 defer span.End() 409 410 e.log.Debug(). 411 Hex("block_id", logging.ID(receipt.ExecutionResult.BlockID)). 412 Hex("receipt_id", logging.Entity(receipt)). 413 Hex("final_state", finalState[:]). 414 Msg("broadcasting execution receipt") 415 416 identities, err := e.state.Final().Identities(filter.HasRole[flow.Identity](flow.RoleAccess, flow.RoleConsensus, 417 flow.RoleVerification)) 418 if err != nil { 419 return false, fmt.Errorf("could not get consensus and verification identities: %w", err) 420 } 421 422 err = e.receiptCon.Publish(receipt, identities.NodeIDs()...) 423 if err != nil { 424 return false, fmt.Errorf("could not submit execution receipts: %w", err) 425 } 426 427 return true, nil 428 }