github.com/koko1123/flow-go-1@v0.29.6/engine/execution/provider/engine.go (about) 1 package provider 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math/rand" 8 "time" 9 10 "github.com/rs/zerolog" 11 12 "github.com/koko1123/flow-go-1/module/component" 13 "github.com/koko1123/flow-go-1/module/irrecoverable" 14 "github.com/koko1123/flow-go-1/module/mempool" 15 16 "github.com/koko1123/flow-go-1/engine" 17 "github.com/koko1123/flow-go-1/engine/execution/state" 18 "github.com/koko1123/flow-go-1/model/flow" 19 "github.com/koko1123/flow-go-1/model/flow/filter" 20 "github.com/koko1123/flow-go-1/model/messages" 21 "github.com/koko1123/flow-go-1/module" 22 "github.com/koko1123/flow-go-1/module/trace" 23 "github.com/koko1123/flow-go-1/network" 24 "github.com/koko1123/flow-go-1/network/channels" 25 "github.com/koko1123/flow-go-1/state/protocol" 26 "github.com/koko1123/flow-go-1/storage" 27 "github.com/koko1123/flow-go-1/utils/logging" 28 ) 29 30 type ProviderEngine interface { 31 network.MessageProcessor 32 BroadcastExecutionReceipt(context.Context, *flow.ExecutionReceipt) error 33 } 34 35 const ( 36 // DefaultChunkDataPackRequestWorker is the default number of concurrent workers processing chunk data pack requests on 37 // execution nodes. 38 DefaultChunkDataPackRequestWorker = 100 39 // DefaultChunkDataPackQueryTimeout is the default timeout value for querying a chunk data pack from storage. 40 DefaultChunkDataPackQueryTimeout = 10 * time.Second 41 // DefaultChunkDataPackDeliveryTimeout is the default timeout value for delivery of a chunk data pack to a verification 42 // node. 43 DefaultChunkDataPackDeliveryTimeout = 10 * time.Second 44 ) 45 46 // An Engine provides means of accessing data about execution state and broadcasts execution receipts to nodes in the network. 47 // Also generates and saves execution receipts 48 type Engine struct { 49 component.Component 50 cm *component.ComponentManager 51 52 log zerolog.Logger 53 tracer module.Tracer 54 receiptCon network.Conduit 55 state protocol.State 56 execState state.ReadOnlyExecutionState 57 chunksConduit network.Conduit 58 metrics module.ExecutionMetrics 59 checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error) 60 chdpRequestHandler *engine.MessageHandler 61 chdpRequestQueue engine.MessageStore 62 63 // buffered channel for ChunkDataRequest workers to pick 64 // requests and process. 65 chdpRequestChannel chan *mempool.ChunkDataPackRequest 66 67 // timeout for delivery of a chunk data pack response in the network. 68 chunkDataPackDeliveryTimeout time.Duration 69 // timeout for querying chunk data pack through database. 70 chunkDataPackQueryTimeout time.Duration 71 } 72 73 func New( 74 logger zerolog.Logger, 75 tracer module.Tracer, 76 net network.Network, 77 state protocol.State, 78 execState state.ReadOnlyExecutionState, 79 metrics module.ExecutionMetrics, 80 checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error), 81 chunkDataPackRequestQueue engine.MessageStore, 82 chdpRequestWorkers uint, 83 chunkDataPackQueryTimeout time.Duration, 84 chunkDataPackDeliveryTimeout time.Duration, 85 ) (*Engine, error) { 86 87 log := logger.With().Str("engine", "receipts").Logger() 88 89 handler := engine.NewMessageHandler( 90 log, 91 engine.NewNotifier(), 92 engine.Pattern{ 93 // Match is called on every new message coming to this engine. 94 // Provider enigne only expects ChunkDataRequests. 95 // Other message types are discarded by Match. 96 Match: func(message *engine.Message) bool { 97 chdpReq, ok := message.Payload.(*messages.ChunkDataRequest) 98 if ok { 99 log.Info(). 100 Hex("chunk_id", logging.ID(chdpReq.ChunkID)). 101 Hex("requester_id", logging.ID(message.OriginID)). 102 Msg("chunk data pack request received") 103 } 104 return ok 105 }, 106 // Map is called on messages that are Match(ed) successfully, i.e., 107 // ChunkDataRequests. 108 // It replaces the payload of message with requested chunk id. 109 Map: func(message *engine.Message) (*engine.Message, bool) { 110 chdpReq := message.Payload.(*messages.ChunkDataRequest) 111 return &engine.Message{ 112 OriginID: message.OriginID, 113 Payload: chdpReq.ChunkID, 114 }, true 115 }, 116 Store: chunkDataPackRequestQueue, 117 }) 118 119 engine := Engine{ 120 log: log, 121 tracer: tracer, 122 state: state, 123 execState: execState, 124 metrics: metrics, 125 checkAuthorizedAtBlock: checkAuthorizedAtBlock, 126 chdpRequestHandler: handler, 127 chdpRequestQueue: chunkDataPackRequestQueue, 128 chdpRequestChannel: make(chan *mempool.ChunkDataPackRequest, chdpRequestWorkers), 129 chunkDataPackDeliveryTimeout: chunkDataPackDeliveryTimeout, 130 chunkDataPackQueryTimeout: chunkDataPackQueryTimeout, 131 } 132 133 var err error 134 135 engine.receiptCon, err = net.Register(channels.PushReceipts, &engine) 136 if err != nil { 137 return nil, fmt.Errorf("could not register receipt provider engine: %w", err) 138 } 139 140 chunksConduit, err := net.Register(channels.ProvideChunks, &engine) 141 if err != nil { 142 return nil, fmt.Errorf("could not register chunk data pack provider engine: %w", err) 143 } 144 engine.chunksConduit = chunksConduit 145 146 cm := component.NewComponentManagerBuilder() 147 cm.AddWorker(engine.processQueuedChunkDataPackRequestsShovelerWorker) 148 for i := uint(0); i < chdpRequestWorkers; i++ { 149 cm.AddWorker(engine.processChunkDataPackRequestWorker) 150 } 151 152 engine.cm = cm.Build() 153 engine.Component = engine.cm 154 155 return &engine, nil 156 } 157 158 // processQueuedChunkDataPackRequestsShovelerWorker is constantly listening on the MessageHandler for ChunkDataRequests, 159 // and pushes new ChunkDataRequests into the request channel to be picked by workers. 160 func (e *Engine) processQueuedChunkDataPackRequestsShovelerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 161 ready() 162 163 e.log.Debug().Msg("process chunk data pack request shovller worker started") 164 165 for { 166 select { 167 case <-e.chdpRequestHandler.GetNotifier(): 168 // there is at list a single chunk data pack request queued up. 169 e.processAvailableMesssages(ctx) 170 case <-ctx.Done(): 171 // close the internal channel, the workers will drain the channel before exiting 172 close(e.chdpRequestChannel) 173 e.log.Trace().Msg("processing chunk data pack request worker terminated") 174 return 175 } 176 } 177 } 178 179 // processAvailableMesssages is a blocking method that reads all queued ChunkDataRequests till the queue gets empty. 180 // Each ChunkDataRequest is processed by a single concurrent worker. However, there are limited number of such workers. 181 // If there is no worker available for a request, the method blocks till one is available. 182 func (e *Engine) processAvailableMesssages(ctx irrecoverable.SignalerContext) { 183 for { 184 select { 185 case <-ctx.Done(): 186 return 187 default: 188 } 189 190 msg, ok := e.chdpRequestQueue.Get() 191 if !ok { 192 // no more requests, return 193 return 194 } 195 196 chunkId, ok := msg.Payload.(flow.Identifier) 197 if !ok { 198 // should never happen. 199 // if it does happen, it means there is a bug in the queue implementation. 200 ctx.Throw(fmt.Errorf("invalid chunk id type in chunk data pack request queue: %T", msg.Payload)) 201 } 202 203 request := &mempool.ChunkDataPackRequest{ 204 RequesterId: msg.OriginID, 205 ChunkId: chunkId, 206 } 207 lg := e.log.With(). 208 Hex("chunk_id", logging.ID(request.ChunkId)). 209 Hex("origin_id", logging.ID(request.RequesterId)).Logger() 210 211 lg.Trace().Msg("shovller is queuing chunk data pack request for processing") 212 e.chdpRequestChannel <- request 213 lg.Trace().Msg("shovller queued up chunk data pack request for processing") 214 } 215 } 216 217 // processChunkDataPackRequestWorker encapsulates the logic of a single (concurrent) worker that picks a 218 // ChunkDataRequest from this engine's queue and processes it. 219 func (e *Engine) processChunkDataPackRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 220 ready() 221 222 for { 223 request, ok := <-e.chdpRequestChannel 224 if !ok { 225 e.log.Trace().Msg("processing chunk data pack request worker terminated") 226 return 227 } 228 lg := e.log.With(). 229 Hex("chunk_id", logging.ID(request.ChunkId)). 230 Hex("origin_id", logging.ID(request.RequesterId)).Logger() 231 lg.Trace().Msg("worker picked up chunk data pack request for processing") 232 e.onChunkDataRequest(request) 233 lg.Trace().Msg("worker finished chunk data pack processing") 234 } 235 } 236 237 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 238 select { 239 case <-e.cm.ShutdownSignal(): 240 e.log.Warn().Msgf("received message from %x after shut down", originID) 241 return nil 242 default: 243 } 244 245 err := e.chdpRequestHandler.Process(originID, event) 246 if err != nil { 247 if engine.IsIncompatibleInputTypeError(err) { 248 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 249 return nil 250 } 251 return fmt.Errorf("unexpected error while processing engine message: %w", err) 252 } 253 254 return nil 255 } 256 257 // onChunkDataRequest receives a request for a chunk data pack, 258 // and if such a chunk data pack is available in the execution state, it is sent to the requester node. 259 // TODO improve error handling https://github.com/dapperlabs/flow-go/issues/6363 260 func (e *Engine) onChunkDataRequest(request *mempool.ChunkDataPackRequest) { 261 processStartTime := time.Now() 262 263 lg := e.log.With(). 264 Hex("origin_id", logging.ID(request.RequesterId)). 265 Hex("chunk_id", logging.ID(request.ChunkId)). 266 Logger() 267 lg.Info().Msg("started processing chunk data pack request") 268 269 // increases collector metric 270 e.metrics.ChunkDataPackRequestProcessed() 271 chunkDataPack, err := e.execState.ChunkDataPackByChunkID(request.ChunkId) 272 273 // we might be behind when we don't have the requested chunk. 274 // if this happen, log it and return nil 275 if errors.Is(err, storage.ErrNotFound) { 276 lg.Warn(). 277 Err(err). 278 Msg("chunk data pack not found, execution node may be behind") 279 return 280 } 281 if err != nil { 282 lg.Error(). 283 Err(err). 284 Msg("could not retrieve chunk ID from storage") 285 return 286 } 287 288 queryTime := time.Since(processStartTime) 289 lg = lg.With().Dur("query_time", queryTime).Logger() 290 if queryTime > e.chunkDataPackQueryTimeout { 291 lg.Warn(). 292 Dur("query_timout", e.chunkDataPackQueryTimeout). 293 Msg("chunk data pack query takes longer than expected timeout") 294 } 295 296 _, err = e.ensureAuthorized(chunkDataPack.ChunkID, request.RequesterId) 297 if err != nil { 298 lg.Error(). 299 Err(err). 300 Msg("could not verify authorization of identity of chunk data pack request") 301 return 302 } 303 304 e.deliverChunkDataResponse(chunkDataPack, request.RequesterId) 305 } 306 307 // deliverChunkDataResponse delivers chunk data pack to the requester through network. 308 func (e *Engine) deliverChunkDataResponse(chunkDataPack *flow.ChunkDataPack, requesterId flow.Identifier) { 309 lg := e.log.With(). 310 Hex("origin_id", logging.ID(requesterId)). 311 Hex("chunk_id", logging.ID(chunkDataPack.ChunkID)). 312 Logger() 313 lg.Info().Msg("sending chunk data pack response") 314 315 // sends requested chunk data pack to the requester 316 deliveryStartTime := time.Now() 317 318 response := &messages.ChunkDataResponse{ 319 ChunkDataPack: *chunkDataPack, 320 Nonce: rand.Uint64(), 321 } 322 323 err := e.chunksConduit.Unicast(response, requesterId) 324 if err != nil { 325 lg.Warn(). 326 Err(err). 327 Msg("could not send requested chunk data pack to requester") 328 return 329 } 330 331 deliveryTime := time.Since(deliveryStartTime) 332 lg = lg.With().Dur("delivery_time", deliveryTime).Logger() 333 if deliveryTime > e.chunkDataPackDeliveryTimeout { 334 lg.Warn(). 335 Dur("delivery_timout", e.chunkDataPackDeliveryTimeout). 336 Msg("chunk data pack delivery takes longer than expected timeout") 337 } 338 339 if chunkDataPack.Collection != nil { 340 // logging collection id of non-system chunks. 341 // A system chunk has both the collection and collection id set to nil. 342 lg = lg.With(). 343 Hex("collection_id", logging.ID(chunkDataPack.Collection.ID())). 344 Logger() 345 } 346 lg.Info().Msg("chunk data pack request successfully replied") 347 } 348 349 func (e *Engine) ensureAuthorized(chunkID flow.Identifier, originID flow.Identifier) (*flow.Identity, error) { 350 blockID, err := e.execState.GetBlockIDByChunkID(chunkID) 351 if err != nil { 352 return nil, engine.NewInvalidInputErrorf("cannot find blockID corresponding to chunk data pack: %w", err) 353 } 354 355 authorizedAt, err := e.checkAuthorizedAtBlock(blockID) 356 if err != nil { 357 return nil, engine.NewInvalidInputErrorf("cannot check block staking status: %w", err) 358 } 359 if !authorizedAt { 360 return nil, engine.NewInvalidInputErrorf("this node is not authorized at the block (%s) corresponding to chunk data pack (%s)", blockID.String(), chunkID.String()) 361 } 362 363 origin, err := e.state.AtBlockID(blockID).Identity(originID) 364 if err != nil { 365 return nil, engine.NewInvalidInputErrorf("invalid origin id (%s): %w", origin, err) 366 } 367 368 // only verifier nodes are allowed to request chunk data packs 369 if origin.Role != flow.RoleVerification { 370 return nil, engine.NewInvalidInputErrorf("invalid role for receiving collection: %s", origin.Role) 371 } 372 373 if origin.Weight == 0 { 374 return nil, engine.NewInvalidInputErrorf("node %s has zero weight at the block (%s) corresponding to chunk data pack (%s)", originID, blockID.String(), chunkID.String()) 375 } 376 return origin, nil 377 } 378 379 func (e *Engine) BroadcastExecutionReceipt(ctx context.Context, receipt *flow.ExecutionReceipt) error { 380 finalState, err := receipt.ExecutionResult.FinalStateCommitment() 381 if err != nil { 382 return fmt.Errorf("could not get final state: %w", err) 383 } 384 385 span, _ := e.tracer.StartSpanFromContext(ctx, trace.EXEBroadcastExecutionReceipt) 386 defer span.End() 387 388 e.log.Debug(). 389 Hex("block_id", logging.ID(receipt.ExecutionResult.BlockID)). 390 Hex("receipt_id", logging.Entity(receipt)). 391 Hex("final_state", finalState[:]). 392 Msg("broadcasting execution receipt") 393 394 identities, err := e.state.Final().Identities(filter.HasRole(flow.RoleAccess, flow.RoleConsensus, 395 flow.RoleVerification)) 396 if err != nil { 397 return fmt.Errorf("could not get consensus and verification identities: %w", err) 398 } 399 400 err = e.receiptCon.Publish(receipt, identities.NodeIDs()...) 401 if err != nil { 402 return fmt.Errorf("could not submit execution receipts: %w", err) 403 } 404 405 return nil 406 }