github.com/onflow/flow-go@v0.33.17/engine/execution/ingestion/engine.go (about) 1 package ingestion 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "time" 9 10 "github.com/rs/zerolog" 11 "github.com/rs/zerolog/log" 12 13 "github.com/onflow/flow-go/engine" 14 "github.com/onflow/flow-go/engine/execution" 15 "github.com/onflow/flow-go/engine/execution/computation" 16 "github.com/onflow/flow-go/engine/execution/ingestion/stop" 17 "github.com/onflow/flow-go/engine/execution/ingestion/uploader" 18 "github.com/onflow/flow-go/engine/execution/provider" 19 "github.com/onflow/flow-go/engine/execution/state" 20 "github.com/onflow/flow-go/model/flow" 21 "github.com/onflow/flow-go/module" 22 "github.com/onflow/flow-go/module/executiondatasync/pruner" 23 "github.com/onflow/flow-go/module/mempool/entity" 24 "github.com/onflow/flow-go/module/mempool/queue" 25 "github.com/onflow/flow-go/module/mempool/stdmap" 26 "github.com/onflow/flow-go/module/trace" 27 "github.com/onflow/flow-go/network" 28 "github.com/onflow/flow-go/network/channels" 29 psEvents "github.com/onflow/flow-go/state/protocol/events" 30 "github.com/onflow/flow-go/storage" 31 "github.com/onflow/flow-go/utils/logging" 32 ) 33 34 // An Engine receives and saves incoming blocks. 35 type Engine struct { 36 psEvents.Noop // satisfy protocol events consumer interface 37 38 unit *engine.Unit 39 log zerolog.Logger 40 me module.Local 41 collectionFetcher CollectionFetcher 42 headers storage.Headers // see comments on getHeaderByHeight for why we need it 43 blocks storage.Blocks 44 collections storage.Collections 45 computationManager computation.ComputationManager 46 providerEngine provider.ProviderEngine 47 mempool *Mempool 48 execState state.ExecutionState 49 metrics module.ExecutionMetrics 50 maxCollectionHeight uint64 51 tracer module.Tracer 52 extensiveLogging bool 53 executionDataPruner *pruner.Pruner 54 uploader *uploader.Manager 55 stopControl *stop.StopControl 56 loader BlockLoader 57 } 58 59 func New( 60 unit *engine.Unit, 61 logger zerolog.Logger, 62 net network.EngineRegistry, 63 me module.Local, 64 collectionFetcher CollectionFetcher, 65 headers storage.Headers, 66 blocks storage.Blocks, 67 collections storage.Collections, 68 executionEngine computation.ComputationManager, 69 providerEngine provider.ProviderEngine, 70 execState state.ExecutionState, 71 metrics module.ExecutionMetrics, 72 tracer module.Tracer, 73 extLog bool, 74 pruner *pruner.Pruner, 75 uploader *uploader.Manager, 76 stopControl *stop.StopControl, 77 loader BlockLoader, 78 ) (*Engine, error) { 79 log := logger.With().Str("engine", "ingestion").Logger() 80 81 mempool := newMempool() 82 83 eng := Engine{ 84 unit: unit, 85 log: log, 86 me: me, 87 collectionFetcher: collectionFetcher, 88 headers: headers, 89 blocks: blocks, 90 collections: collections, 91 computationManager: executionEngine, 92 providerEngine: providerEngine, 93 mempool: mempool, 94 execState: execState, 95 metrics: metrics, 96 maxCollectionHeight: 0, 97 tracer: tracer, 98 extensiveLogging: extLog, 99 executionDataPruner: pruner, 100 uploader: uploader, 101 stopControl: stopControl, 102 loader: loader, 103 } 104 105 return &eng, nil 106 } 107 108 // Ready returns a channel that will close when the engine has 109 // successfully started. 110 func (e *Engine) Ready() <-chan struct{} { 111 if e.stopControl.IsExecutionStopped() { 112 return e.unit.Ready() 113 } 114 115 if err := e.uploader.RetryUploads(); err != nil { 116 e.log.Warn().Msg("failed to re-upload all ComputationResults") 117 } 118 119 err := e.reloadUnexecutedBlocks() 120 if err != nil { 121 e.log.Fatal().Err(err).Msg("failed to load all unexecuted blocks") 122 } 123 124 return e.unit.Ready() 125 } 126 127 // Done returns a channel that will close when the engine has 128 // successfully stopped. 129 func (e *Engine) Done() <-chan struct{} { 130 return e.unit.Done() 131 } 132 133 // SubmitLocal submits an event originating on the local node. 134 func (e *Engine) SubmitLocal(event interface{}) { 135 e.unit.Launch(func() { 136 err := e.process(e.me.NodeID(), event) 137 if err != nil { 138 engine.LogError(e.log, err) 139 } 140 }) 141 } 142 143 // Submit submits the given event from the node with the given origin ID 144 // for processing in a non-blocking manner. It returns instantly and logs 145 // a potential processing error internally when done. 146 func (e *Engine) Submit( 147 channel channels.Channel, 148 originID flow.Identifier, 149 event interface{}, 150 ) { 151 e.unit.Launch(func() { 152 err := e.process(originID, event) 153 if err != nil { 154 engine.LogError(e.log, err) 155 } 156 }) 157 } 158 159 // ProcessLocal processes an event originating on the local node. 160 func (e *Engine) ProcessLocal(event interface{}) error { 161 return fmt.Errorf("ingestion error does not process local events") 162 } 163 164 func (e *Engine) Process( 165 channel channels.Channel, 166 originID flow.Identifier, 167 event interface{}, 168 ) error { 169 return e.unit.Do(func() error { 170 return e.process(originID, event) 171 }) 172 } 173 174 func (e *Engine) process(originID flow.Identifier, event interface{}) error { 175 return nil 176 } 177 178 // on nodes startup, we need to load all the unexecuted blocks to the execution queues. 179 // blocks have to be loaded in the way that the parent has been loaded before loading its children 180 func (e *Engine) reloadUnexecutedBlocks() error { 181 unexecuted, err := e.loader.LoadUnexecuted(e.unit.Ctx()) 182 if err != nil { 183 return fmt.Errorf("could not load unexecuted blocks: %w", err) 184 } 185 // it's possible the BlockProcessable is called during the reloading, as the follower engine 186 // will receive blocks before ingestion engine is ready. 187 // The problem with that is, since the reloading hasn't finished yet, enqueuing the new block from 188 // the BlockProcessable callback will fail, because its parent block might have not been reloaded 189 // to the queues yet. 190 // So one solution here is to lock the execution queues during reloading, so that if BlockProcessable 191 // is called before reloading is finished, it will be blocked, which will avoid that edge case. 192 return e.mempool.Run(func( 193 blockByCollection *stdmap.BlockByCollectionBackdata, 194 executionQueues *stdmap.QueuesBackdata, 195 ) error { 196 for _, blockID := range unexecuted { 197 err := e.reloadBlock(blockByCollection, executionQueues, blockID) 198 if err != nil { 199 return fmt.Errorf("could not reload block: %v, %w", blockID, err) 200 } 201 202 e.log.Debug().Hex("block_id", blockID[:]).Msg("reloaded block") 203 } 204 205 e.log.Info().Int("count", len(unexecuted)).Msg("all unexecuted have been successfully reloaded") 206 207 return nil 208 }) 209 } 210 211 func (e *Engine) reloadBlock( 212 blockByCollection *stdmap.BlockByCollectionBackdata, 213 executionQueues *stdmap.QueuesBackdata, 214 blockID flow.Identifier, 215 ) error { 216 block, err := e.blocks.ByID(blockID) 217 if err != nil { 218 return fmt.Errorf("could not get block by ID: %v %w", blockID, err) 219 } 220 221 // enqueue the block and check if there is any missing collections 222 missingCollections, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false) 223 224 if err != nil { 225 return fmt.Errorf("could not enqueue block %x on reloading: %w", blockID, err) 226 } 227 228 // forward the missing collections to requester engine for requesting them from collection nodes, 229 // adding the missing collections to mempool in order to trigger the block execution as soon as 230 // all missing collections are received. 231 err = e.fetchAndHandleCollection(blockID, block.Header.Height, missingCollections, func(collection *flow.Collection) error { 232 err := e.addCollectionToMempool(collection, blockByCollection) 233 234 if err != nil { 235 return fmt.Errorf("could not add collection to mempool: %w", err) 236 } 237 return nil 238 }) 239 240 if err != nil { 241 return fmt.Errorf("could not fetch or handle collection %w", err) 242 } 243 return nil 244 } 245 246 // BlockProcessable handles the new verified blocks (blocks that 247 // have passed consensus validation) received from the consensus nodes 248 // NOTE: BlockProcessable might be called multiple times for the same block. 249 // NOTE: Ready calls reloadUnexecutedBlocks during initialization, which handles dropped protocol events. 250 func (e *Engine) BlockProcessable(b *flow.Header, _ *flow.QuorumCertificate) { 251 252 // TODO: this should not be blocking: https://github.com/onflow/flow-go/issues/4400 253 254 // skip if stopControl tells to skip 255 if !e.stopControl.ShouldExecuteBlock(b) { 256 return 257 } 258 259 blockID := b.ID() 260 newBlock, err := e.blocks.ByID(blockID) 261 if err != nil { 262 e.log.Fatal().Err(err).Msgf("could not get incorporated block(%v): %v", blockID, err) 263 } 264 265 e.log.Info().Hex("block_id", blockID[:]). 266 Uint64("height", b.Height). 267 Msg("handling new block") 268 269 err = e.handleBlock(e.unit.Ctx(), newBlock) 270 if err != nil { 271 e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to handle block") 272 } 273 } 274 275 // Main handling 276 277 // handle block will process the incoming block. 278 // the block has passed the consensus validation. 279 func (e *Engine) handleBlock(ctx context.Context, block *flow.Block) error { 280 281 blockID := block.ID() 282 log := e.log.With().Hex("block_id", blockID[:]).Logger() 283 284 span, _ := e.tracer.StartBlockSpan(ctx, blockID, trace.EXEHandleBlock) 285 defer span.End() 286 287 executed, err := e.execState.IsBlockExecuted(block.Header.Height, blockID) 288 if err != nil { 289 return fmt.Errorf("could not check whether block is executed: %w", err) 290 } 291 292 if executed { 293 log.Debug().Msg("block has been executed already") 294 return nil 295 } 296 297 var missingCollections []*flow.CollectionGuarantee 298 // unexecuted block 299 // acquiring the lock so that there is only one process modifying the queue 300 err = e.mempool.Run(func( 301 blockByCollection *stdmap.BlockByCollectionBackdata, 302 executionQueues *stdmap.QueuesBackdata, 303 ) error { 304 missing, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false) 305 if err != nil { 306 return err 307 } 308 missingCollections = missing 309 return nil 310 }) 311 312 if err != nil { 313 return fmt.Errorf("could not enqueue block %v: %w", blockID, err) 314 } 315 316 return e.addOrFetch(blockID, block.Header.Height, missingCollections) 317 } 318 319 func (e *Engine) enqueueBlockAndCheckExecutable( 320 blockByCollection *stdmap.BlockByCollectionBackdata, 321 executionQueues *stdmap.QueuesBackdata, 322 block *flow.Block, 323 checkStateSync bool, 324 ) ([]*flow.CollectionGuarantee, error) { 325 executableBlock := &entity.ExecutableBlock{ 326 Block: block, 327 CompleteCollections: make(map[flow.Identifier]*entity.CompleteCollection), 328 } 329 330 blockID := executableBlock.ID() 331 332 lg := e.log.With(). 333 Hex("block_id", blockID[:]). 334 Uint64("block_height", executableBlock.Block.Header.Height). 335 Logger() 336 337 // adding the block to the queue, 338 queue, added, head := enqueue(executableBlock, executionQueues) 339 340 // if it's not added, it means the block is not a new block, it already 341 // exists in the queue, then bail 342 if !added { 343 log.Debug().Hex("block_id", logging.Entity(executableBlock)). 344 Int("block_height", int(executableBlock.Height())). 345 Msg("block already exists in the execution queue") 346 return nil, nil 347 } 348 349 firstUnexecutedHeight := queue.Head.Item.Height() 350 351 // check if a block is executable. 352 // a block is executable if the following conditions are all true 353 // 1) the parent state commitment is ready 354 // 2) the collections for the block payload are ready 355 // 3) the child block is ready for querying the randomness 356 357 // check if the block's parent has been executed. (we can't execute the block if the parent has 358 // not been executed yet) 359 // check if there is a statecommitment for the parent block 360 parentCommitment, err := e.execState.StateCommitmentByBlockID(block.Header.ParentID) 361 362 // if we found the statecommitment for the parent block, then add it to the executable block. 363 if err == nil { 364 executableBlock.StartState = &parentCommitment 365 } else if errors.Is(err, storage.ErrNotFound) { 366 // the parent block is an unexecuted block. 367 // if the queue only has one block, and its parent doesn't 368 // exist in the queue, then we need to load the block from the storage. 369 _, ok := queue.Nodes[blockID] 370 if !ok { 371 lg.Error().Msgf("an unexecuted parent block is missing in the queue") 372 } 373 } else { 374 // if there is exception, then crash 375 lg.Fatal().Err(err).Msg("unexpected error while accessing storage, shutting down") 376 } 377 378 // check if we have all the collections for the block, and request them if there is missing. 379 missingCollections, err := e.matchAndFindMissingCollections(executableBlock, blockByCollection) 380 if err != nil { 381 return nil, fmt.Errorf("cannot send collection requests: %w", err) 382 } 383 384 complete := false 385 386 // if newly enqueued block is inside any existing queue, we should skip now and wait 387 // for parent to finish execution 388 if head { 389 // execute the block if the block is ready to be executed 390 complete = e.executeBlockIfComplete(executableBlock) 391 } 392 393 lg.Info(). 394 // if the execution is halt, but the queue keeps growing, we could check which block 395 // hasn't been executed. 396 Uint64("first_unexecuted_in_queue", firstUnexecutedHeight). 397 Bool("complete", complete). 398 Bool("head_of_queue", head). 399 Int("cols", len(executableBlock.Block.Payload.Guarantees)). 400 Int("missing_cols", len(missingCollections)). 401 Msg("block is enqueued") 402 403 return missingCollections, nil 404 } 405 406 // executeBlock will execute the block. 407 // When finish executing, it will check if the children becomes executable and execute them if yes. 408 func (e *Engine) executeBlock( 409 ctx context.Context, 410 executableBlock *entity.ExecutableBlock, 411 ) { 412 lg := e.log.With(). 413 Hex("block_id", logging.Entity(executableBlock)). 414 Uint64("height", executableBlock.Block.Header.Height). 415 Int("collections", len(executableBlock.CompleteCollections)). 416 Logger() 417 418 lg.Info().Msg("executing block") 419 420 startedAt := time.Now() 421 422 span, ctx := e.tracer.StartSpanFromContext(ctx, trace.EXEExecuteBlock) 423 defer span.End() 424 425 parentID := executableBlock.Block.Header.ParentID 426 parentErID, err := e.execState.GetExecutionResultID(ctx, parentID) 427 if err != nil { 428 lg.Err(err). 429 Str("parentID", parentID.String()). 430 Msg("could not get execution result ID for parent block") 431 return 432 } 433 434 snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState, 435 executableBlock.Block.Header.ParentID, 436 executableBlock.Block.Header.Height-1, 437 ) 438 439 computationResult, err := e.computationManager.ComputeBlock( 440 ctx, 441 parentErID, 442 executableBlock, 443 snapshot) 444 if err != nil { 445 lg.Err(err).Msg("error while computing block") 446 return 447 } 448 449 wg := sync.WaitGroup{} 450 wg.Add(1) 451 defer wg.Wait() 452 453 go func() { 454 defer wg.Done() 455 err := e.uploader.Upload(ctx, computationResult) 456 if err != nil { 457 lg.Err(err).Msg("error while uploading block") 458 // continue processing. uploads should not block execution 459 } 460 }() 461 462 err = e.saveExecutionResults(ctx, computationResult) 463 if errors.Is(err, storage.ErrDataMismatch) { 464 lg.Fatal().Err(err).Msg("fatal: trying to store different results for the same block") 465 } 466 467 if err != nil { 468 lg.Err(err).Msg("error while handing computation results") 469 return 470 } 471 472 receipt := computationResult.ExecutionReceipt 473 broadcasted, err := e.providerEngine.BroadcastExecutionReceipt( 474 ctx, executableBlock.Block.Header.Height, receipt) 475 if err != nil { 476 lg.Err(err).Msg("critical: failed to broadcast the receipt") 477 } 478 479 finalEndState := computationResult.CurrentEndState() 480 lg.Info(). 481 Hex("parent_block", executableBlock.Block.Header.ParentID[:]). 482 Int("collections", len(executableBlock.Block.Payload.Guarantees)). 483 Hex("start_state", executableBlock.StartState[:]). 484 Hex("final_state", finalEndState[:]). 485 Hex("receipt_id", logging.Entity(receipt)). 486 Hex("result_id", logging.Entity(receipt.ExecutionResult)). 487 Hex("execution_data_id", receipt.ExecutionResult.ExecutionDataID[:]). 488 Bool("state_changed", finalEndState != *executableBlock.StartState). 489 Uint64("num_txs", nonSystemTransactionCount(receipt.ExecutionResult)). 490 Bool("broadcasted", broadcasted). 491 Int64("timeSpentInMS", time.Since(startedAt).Milliseconds()). 492 Msg("block executed") 493 494 err = e.onBlockExecuted(executableBlock, finalEndState) 495 if err != nil { 496 lg.Err(err).Msg("failed in process block's children") 497 } 498 499 if e.executionDataPruner != nil { 500 e.executionDataPruner.NotifyFulfilledHeight(executableBlock.Height()) 501 } 502 503 e.stopControl.OnBlockExecuted(executableBlock.Block.Header) 504 505 e.unit.Ctx() 506 507 } 508 509 func nonSystemTransactionCount(result flow.ExecutionResult) uint64 { 510 count := uint64(0) 511 for _, chunk := range result.Chunks { 512 count += chunk.NumberOfTransactions 513 } 514 return count 515 } 516 517 // we've executed the block, now we need to check: 518 // 1. whether the state syncing can be turned off 519 // 2. whether its children can be executed 520 // the executionQueues stores blocks as a tree: 521 // 522 // 10 <- 11 <- 12 523 // ^-- 13 524 // 14 <- 15 <- 16 525 // 526 // if block 10 is the one just executed, then we will remove it from the queue, and add 527 // its children back, meaning the tree will become: 528 // 529 // 11 <- 12 530 // 13 531 // 14 <- 15 <- 16 532 533 func (e *Engine) onBlockExecuted( 534 executed *entity.ExecutableBlock, 535 finalState flow.StateCommitment, 536 ) error { 537 538 e.metrics.ExecutionStorageStateCommitment(int64(len(finalState))) 539 e.metrics.ExecutionLastExecutedBlockHeight(executed.Block.Header.Height) 540 541 missingCollections := make(map[*entity.ExecutableBlock][]*flow.CollectionGuarantee) 542 err := e.mempool.Run( 543 func( 544 blockByCollection *stdmap.BlockByCollectionBackdata, 545 executionQueues *stdmap.QueuesBackdata, 546 ) error { 547 // find the block that was just executed 548 executionQueue, exists := executionQueues.ByID(executed.ID()) 549 if !exists { 550 logQueueState(e.log, executionQueues, executed.ID()) 551 // when the block no longer exists in the queue, it means there was a race condition that 552 // two onBlockExecuted was called for the same block, and one process has already removed the 553 // block from the queue, so we will print an error here 554 return fmt.Errorf("block has been executed already, no longer exists in the queue") 555 } 556 557 // dismount the executed block and all its children 558 _, newQueues := executionQueue.Dismount() 559 560 // go through each children, add them back to the queue, and check 561 // if the children is executable 562 for _, queue := range newQueues { 563 queueID := queue.ID() 564 added := executionQueues.Add(queueID, queue) 565 if !added { 566 // blocks should be unique in execution queues, if we dismount all the children blocks, then 567 // add it back to the queues, then it should always be able to add. 568 // If not, then there is a bug that the queues have duplicated blocks 569 return fmt.Errorf("fatal error - child block already in execution queue") 570 } 571 572 // the parent block has been executed, update the StartState of 573 // each child block. 574 child := queue.Head.Item.(*entity.ExecutableBlock) 575 child.StartState = &finalState 576 577 missing, err := e.matchAndFindMissingCollections(child, blockByCollection) 578 if err != nil { 579 return fmt.Errorf("cannot send collection requests: %w", err) 580 } 581 if len(missing) > 0 { 582 missingCollections[child] = append(missingCollections[child], missing...) 583 } 584 585 completed := e.executeBlockIfComplete(child) 586 if !completed { 587 e.log.Debug(). 588 Hex("executed_block", logging.Entity(executed)). 589 Hex("child_block", logging.Entity(child)). 590 Msg("child block is not ready to be executed yet") 591 } else { 592 e.log.Debug(). 593 Hex("executed_block", logging.Entity(executed)). 594 Hex("child_block", logging.Entity(child)). 595 Msg("child block is ready to be executed") 596 } 597 } 598 599 // remove the executed block 600 executionQueues.Remove(executed.ID()) 601 602 return nil 603 }) 604 605 if err != nil { 606 e.log.Fatal().Err(err). 607 Hex("block", logging.Entity(executed)). 608 Uint64("height", executed.Block.Header.Height). 609 Msg("error while requeueing blocks after execution") 610 } 611 612 for child, missing := range missingCollections { 613 err := e.addOrFetch(child.ID(), child.Block.Header.Height, missing) 614 if err != nil { 615 return fmt.Errorf("fail to add missing collections: %w", err) 616 } 617 } 618 619 return nil 620 } 621 622 // executeBlockIfComplete checks whether the block is ready to be executed. 623 // if yes, execute the block 624 // return a bool indicates whether the block was completed 625 func (e *Engine) executeBlockIfComplete(eb *entity.ExecutableBlock) bool { 626 627 if eb.Executing { 628 return false 629 } 630 631 // if don't have the delta, then check if everything is ready for executing 632 // the block 633 if eb.IsComplete() { 634 635 if e.extensiveLogging { 636 e.logExecutableBlock(eb) 637 } 638 639 // no external synchronisation is used because this method must be run in a thread-safe context 640 eb.Executing = true 641 642 e.unit.Launch(func() { 643 e.executeBlock(e.unit.Ctx(), eb) 644 }) 645 return true 646 } 647 return false 648 } 649 650 // OnCollection is a callback for handling the collections requested by the 651 // collection requester. 652 func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) { 653 // convert entity to strongly typed collection 654 collection, ok := entity.(*flow.Collection) 655 if !ok { 656 e.log.Error().Msgf("invalid entity type (%T)", entity) 657 return 658 } 659 660 // no need to validate the origin ID, since the collection requester has 661 // checked the origin must be a collection node. 662 663 err := e.handleCollection(originID, collection) 664 if err != nil { 665 e.log.Error().Err(err).Msg("could not handle collection") 666 } 667 } 668 669 // a block can't be executed if its collection is missing. 670 // since a collection can belong to multiple blocks, we need to 671 // find all the blocks that are needing this collection, and then 672 // check if any of these block becomes executable and execute it if 673 // is. 674 func (e *Engine) handleCollection( 675 originID flow.Identifier, 676 collection *flow.Collection, 677 ) error { 678 collID := collection.ID() 679 680 span, _ := e.tracer.StartCollectionSpan(context.Background(), collID, trace.EXEHandleCollection) 681 defer span.End() 682 683 lg := e.log.With().Hex("collection_id", collID[:]).Logger() 684 685 lg.Info().Hex("sender", originID[:]).Int("len", collection.Len()).Msg("handle collection") 686 defer func(startTime time.Time) { 687 lg.Info().TimeDiff("duration", time.Now(), startTime).Msg("collection handled") 688 }(time.Now()) 689 690 // TODO: bail if have seen this collection before. 691 err := e.collections.Store(collection) 692 if err != nil { 693 return fmt.Errorf("cannot store collection: %w", err) 694 } 695 696 return e.mempool.BlockByCollection.Run( 697 func(backdata *stdmap.BlockByCollectionBackdata) error { 698 return e.addCollectionToMempool(collection, backdata) 699 }, 700 ) 701 } 702 703 func (e *Engine) addCollectionToMempool( 704 collection *flow.Collection, 705 backdata *stdmap.BlockByCollectionBackdata, 706 ) error { 707 collID := collection.ID() 708 blockByCollectionID, exists := backdata.ByID(collID) 709 710 // if we don't find any block for this collection, then 711 // means we don't need this collection any more. 712 // or it was ejected from the mempool when it was full. 713 // either way, we will return 714 if !exists { 715 return nil 716 } 717 718 for _, executableBlock := range blockByCollectionID.ExecutableBlocks { 719 blockID := executableBlock.ID() 720 721 completeCollection, ok := executableBlock.CompleteCollections[collID] 722 if !ok { 723 return fmt.Errorf("cannot handle collection: internal inconsistency - collection pointing to block %v which does not contain said collection", 724 blockID) 725 } 726 727 // record collection max height metrics 728 blockHeight := executableBlock.Block.Header.Height 729 if blockHeight > e.maxCollectionHeight { 730 e.metrics.UpdateCollectionMaxHeight(blockHeight) 731 e.maxCollectionHeight = blockHeight 732 } 733 734 if completeCollection.IsCompleted() { 735 // already received transactions for this collection 736 continue 737 } 738 739 // update the transactions of the collection 740 // Note: it's guaranteed the transactions are for this collection, because 741 // the collection id matches with the CollectionID from the collection guarantee 742 completeCollection.Transactions = collection.Transactions 743 744 // check if the block becomes executable 745 _ = e.executeBlockIfComplete(executableBlock) 746 } 747 748 // since we've received this collection, remove it from the index 749 // this also prevents from executing the same block twice, because the second 750 // time when the collection arrives, it will not be found in the blockByCollectionID 751 // index. 752 backdata.Remove(collID) 753 754 return nil 755 } 756 757 func newQueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) ( 758 *queue.Queue, 759 bool, 760 ) { 761 q := queue.NewQueue(blockify) 762 qID := q.ID() 763 return q, queues.Add(qID, q) 764 } 765 766 // enqueue adds a block to the queues, return the queue that includes the block and booleans 767 // * is block new one (it's not already enqueued, not a duplicate) 768 // * is head of the queue (new queue has been created) 769 // 770 // Queues are chained blocks. Since a block can't be executable until its parent has been 771 // executed, the chained structure allows us to only check the head of each queue to see if 772 // any block becomes executable. 773 // for instance we have one queue whose head is A: 774 // 775 // A <- B <- C 776 // ^- D <- E 777 // 778 // If we receive E <- F, then we will add it to the queue: 779 // 780 // A <- B <- C 781 // ^- D <- E <- F 782 // 783 // Even through there are 6 blocks, we only need to check if block A becomes executable. 784 // when the parent block isn't in the queue, we add it as a new queue. for instance, if 785 // we receive H <- G, then the queues will become: 786 // 787 // A <- B <- C 788 // ^- D <- E 789 // G 790 func enqueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) ( 791 *queue.Queue, 792 bool, 793 bool, 794 ) { 795 for _, queue := range queues.All() { 796 if stored, isNew := queue.TryAdd(blockify); stored { 797 return queue, isNew, false 798 } 799 } 800 queue, isNew := newQueue(blockify, queues) 801 return queue, isNew, true 802 } 803 804 // check if the block's collections have been received, 805 // if yes, add the collection to the executable block 806 // if no, fetch the collection. 807 // if a block has 3 collection, it would be 3 reqs to fetch them. 808 // mark the collection belongs to the block, 809 // mark the block contains this collection. 810 // It returns the missing collections to be fetched 811 // TODO: to rename 812 func (e *Engine) matchAndFindMissingCollections( 813 executableBlock *entity.ExecutableBlock, 814 collectionsBackdata *stdmap.BlockByCollectionBackdata, 815 ) ([]*flow.CollectionGuarantee, error) { 816 missingCollections := make([]*flow.CollectionGuarantee, 0, len(executableBlock.Block.Payload.Guarantees)) 817 818 for _, guarantee := range executableBlock.Block.Payload.Guarantees { 819 coll := &entity.CompleteCollection{ 820 Guarantee: guarantee, 821 } 822 executableBlock.CompleteCollections[guarantee.ID()] = coll 823 824 // check if we have requested this collection before. 825 // blocksNeedingCollection stores all the blocks that contain this collection 826 827 if blocksNeedingCollection, exists := collectionsBackdata.ByID(guarantee.ID()); exists { 828 // if we've requested this collection, it means other block might also contain this collection. 829 // in this case, add this block to the map so that when the collection is received, 830 // we could update the executable block 831 blocksNeedingCollection.ExecutableBlocks[executableBlock.ID()] = executableBlock 832 833 // since the collection is still being requested, we don't have the transactions 834 // yet, so exit 835 continue 836 } 837 838 // the storage doesn't have this collection, meaning this is our first time seeing this 839 // collection guarantee, create an entry to store in collectionsBackdata in order to 840 // update the executable blocks when the collection is received. 841 blocksNeedingCollection := &entity.BlocksByCollection{ 842 CollectionID: guarantee.ID(), 843 ExecutableBlocks: map[flow.Identifier]*entity.ExecutableBlock{executableBlock.ID(): executableBlock}, 844 } 845 846 added := collectionsBackdata.Add(blocksNeedingCollection.ID(), blocksNeedingCollection) 847 if !added { 848 // sanity check, should not happen, unless mempool implementation has a bug 849 return nil, fmt.Errorf("collection already mapped to block") 850 } 851 852 missingCollections = append(missingCollections, guarantee) 853 } 854 855 return missingCollections, nil 856 } 857 858 // save the execution result of a block 859 func (e *Engine) saveExecutionResults( 860 ctx context.Context, 861 result *execution.ComputationResult, 862 ) error { 863 span, childCtx := e.tracer.StartSpanFromContext(ctx, trace.EXESaveExecutionResults) 864 defer span.End() 865 866 e.log.Debug(). 867 Hex("block_id", logging.Entity(result.ExecutableBlock)). 868 Msg("received computation result") 869 870 for _, event := range result.ExecutionResult.ServiceEvents { 871 e.log.Info(). 872 Uint64("block_height", result.ExecutableBlock.Height()). 873 Hex("block_id", logging.Entity(result.ExecutableBlock)). 874 Str("event_type", event.Type.String()). 875 Msg("service event emitted") 876 } 877 878 err := e.execState.SaveExecutionResults(childCtx, result) 879 if err != nil { 880 return fmt.Errorf("cannot persist execution state: %w", err) 881 } 882 883 finalEndState := result.CurrentEndState() 884 e.log.Debug(). 885 Hex("block_id", logging.Entity(result.ExecutableBlock)). 886 Hex("start_state", result.ExecutableBlock.StartState[:]). 887 Hex("final_state", finalEndState[:]). 888 Msg("saved computation results") 889 890 return nil 891 } 892 893 // logExecutableBlock logs all data about an executable block 894 // over time we should skip this 895 func (e *Engine) logExecutableBlock(eb *entity.ExecutableBlock) { 896 // log block 897 e.log.Debug(). 898 Hex("block_id", logging.Entity(eb)). 899 Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)). 900 Uint64("block_height", eb.Block.Header.Height). 901 Int("number_of_collections", len(eb.Collections())). 902 RawJSON("block_header", logging.AsJSON(eb.Block.Header)). 903 Msg("extensive log: block header") 904 905 // logs transactions 906 for i, col := range eb.Collections() { 907 for j, tx := range col.Transactions { 908 e.log.Debug(). 909 Hex("block_id", logging.Entity(eb)). 910 Int("block_height", int(eb.Block.Header.Height)). 911 Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)). 912 Int("collection_index", i). 913 Int("tx_index", j). 914 Hex("collection_id", logging.ID(col.Guarantee.CollectionID)). 915 Hex("tx_hash", logging.Entity(tx)). 916 Hex("start_state_commitment", eb.StartState[:]). 917 RawJSON("transaction", logging.AsJSON(tx)). 918 Msg("extensive log: executed tx content") 919 } 920 } 921 } 922 923 // addOrFetch checks if there are stored collections for the given guarantees, if there is, 924 // forward them to mempool to process the collection, otherwise fetch the collections. 925 // any error returned are exception 926 func (e *Engine) addOrFetch( 927 blockID flow.Identifier, 928 height uint64, 929 guarantees []*flow.CollectionGuarantee, 930 ) error { 931 return e.fetchAndHandleCollection(blockID, height, guarantees, func(collection *flow.Collection) error { 932 err := e.mempool.BlockByCollection.Run( 933 func(backdata *stdmap.BlockByCollectionBackdata) error { 934 return e.addCollectionToMempool(collection, backdata) 935 }) 936 937 if err != nil { 938 return fmt.Errorf("could not add collection to mempool: %w", err) 939 } 940 return nil 941 }) 942 } 943 944 // addOrFetch checks if there are stored collections for the given guarantees, if there is, 945 // forward them to the handler to process the collection, otherwise fetch the collections. 946 // any error returned are exception 947 func (e *Engine) fetchAndHandleCollection( 948 blockID flow.Identifier, 949 height uint64, 950 guarantees []*flow.CollectionGuarantee, 951 handleCollection func(*flow.Collection) error, 952 ) error { 953 fetched := false 954 for _, guarantee := range guarantees { 955 // if we've requested this collection, we will store it in the storage, 956 // so check the storage to see whether we've seen it. 957 collection, err := e.collections.ByID(guarantee.CollectionID) 958 959 if err == nil { 960 // we found the collection from storage, forward this collection to handler 961 err = handleCollection(collection) 962 if err != nil { 963 return fmt.Errorf("could not handle collection: %w", err) 964 } 965 966 continue 967 } 968 969 // check if there was exception 970 if !errors.Is(err, storage.ErrNotFound) { 971 return fmt.Errorf("error while querying for collection: %w", err) 972 } 973 974 err = e.collectionFetcher.FetchCollection(blockID, height, guarantee) 975 if err != nil { 976 return fmt.Errorf("could not fetch collection: %w", err) 977 } 978 fetched = true 979 } 980 981 // make sure that the requests are dispatched immediately by the requester 982 if fetched { 983 e.collectionFetcher.Force() 984 e.metrics.ExecutionCollectionRequestSent() 985 } 986 987 return nil 988 } 989 990 func logQueueState(log zerolog.Logger, queues *stdmap.QueuesBackdata, blockID flow.Identifier) { 991 all := queues.All() 992 993 log.With().Hex("queue_state__executed_block_id", blockID[:]).Int("count", len(all)).Logger() 994 for i, queue := range all { 995 log.Error().Msgf("%v-th queue state: %v", i, queue.String()) 996 } 997 }