github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/execution/ingestion/engine.go (about) 1 package ingestion 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "time" 9 10 "github.com/rs/zerolog" 11 "github.com/rs/zerolog/log" 12 13 "github.com/onflow/flow-go/engine" 14 "github.com/onflow/flow-go/engine/execution" 15 "github.com/onflow/flow-go/engine/execution/computation" 16 "github.com/onflow/flow-go/engine/execution/ingestion/stop" 17 "github.com/onflow/flow-go/engine/execution/ingestion/uploader" 18 "github.com/onflow/flow-go/engine/execution/provider" 19 "github.com/onflow/flow-go/engine/execution/state" 20 "github.com/onflow/flow-go/model/flow" 21 "github.com/onflow/flow-go/module" 22 "github.com/onflow/flow-go/module/executiondatasync/pruner" 23 "github.com/onflow/flow-go/module/mempool/entity" 24 "github.com/onflow/flow-go/module/mempool/queue" 25 "github.com/onflow/flow-go/module/mempool/stdmap" 26 "github.com/onflow/flow-go/module/trace" 27 "github.com/onflow/flow-go/network" 28 psEvents "github.com/onflow/flow-go/state/protocol/events" 29 "github.com/onflow/flow-go/storage" 30 "github.com/onflow/flow-go/utils/logging" 31 ) 32 33 // An Engine receives and saves incoming blocks. 34 type Engine struct { 35 psEvents.Noop // satisfy protocol events consumer interface 36 37 unit *engine.Unit 38 log zerolog.Logger 39 collectionFetcher CollectionFetcher 40 headers storage.Headers // see comments on getHeaderByHeight for why we need it 41 blocks storage.Blocks 42 collections storage.Collections 43 computationManager computation.ComputationManager 44 providerEngine provider.ProviderEngine 45 mempool *Mempool 46 execState state.ExecutionState 47 metrics module.ExecutionMetrics 48 tracer module.Tracer 49 extensiveLogging bool 50 executionDataPruner *pruner.Pruner 51 uploader *uploader.Manager 52 stopControl *stop.StopControl 53 loader BlockLoader 54 } 55 56 func New( 57 unit *engine.Unit, 58 logger zerolog.Logger, 59 net network.EngineRegistry, 60 collectionFetcher CollectionFetcher, 61 headers storage.Headers, 62 blocks storage.Blocks, 63 collections storage.Collections, 64 executionEngine computation.ComputationManager, 65 providerEngine provider.ProviderEngine, 66 execState state.ExecutionState, 67 metrics module.ExecutionMetrics, 68 tracer module.Tracer, 69 extLog bool, 70 pruner *pruner.Pruner, 71 uploader *uploader.Manager, 72 stopControl *stop.StopControl, 73 loader BlockLoader, 74 ) (*Engine, error) { 75 log := logger.With().Str("engine", "ingestion").Logger() 76 77 mempool := newMempool() 78 79 eng := Engine{ 80 unit: unit, 81 log: log, 82 collectionFetcher: collectionFetcher, 83 headers: headers, 84 blocks: blocks, 85 collections: collections, 86 computationManager: executionEngine, 87 providerEngine: providerEngine, 88 mempool: mempool, 89 execState: execState, 90 metrics: metrics, 91 tracer: tracer, 92 extensiveLogging: extLog, 93 executionDataPruner: pruner, 94 uploader: uploader, 95 stopControl: stopControl, 96 loader: loader, 97 } 98 99 return &eng, nil 100 } 101 102 // Ready returns a channel that will close when the engine has 103 // successfully started. 104 func (e *Engine) Ready() <-chan struct{} { 105 if e.stopControl.IsExecutionStopped() { 106 return e.unit.Ready() 107 } 108 109 if err := e.uploader.RetryUploads(); err != nil { 110 e.log.Warn().Msg("failed to re-upload all ComputationResults") 111 } 112 113 err := e.reloadUnexecutedBlocks() 114 if err != nil { 115 e.log.Fatal().Err(err).Msg("failed to load all unexecuted blocks") 116 } 117 118 return e.unit.Ready() 119 } 120 121 // Done returns a channel that will close when the engine has 122 // successfully stopped. 123 func (e *Engine) Done() <-chan struct{} { 124 return e.unit.Done() 125 } 126 127 // ProcessLocal processes an event originating on the local node. 128 func (e *Engine) ProcessLocal(event interface{}) error { 129 return fmt.Errorf("ingestion error does not process local events") 130 } 131 132 // on nodes startup, we need to load all the unexecuted blocks to the execution queues. 133 // blocks have to be loaded in the way that the parent has been loaded before loading its children 134 func (e *Engine) reloadUnexecutedBlocks() error { 135 unexecuted, err := e.loader.LoadUnexecuted(e.unit.Ctx()) 136 if err != nil { 137 return fmt.Errorf("could not load unexecuted blocks: %w", err) 138 } 139 // it's possible the BlockProcessable is called during the reloading, as the follower engine 140 // will receive blocks before ingestion engine is ready. 141 // The problem with that is, since the reloading hasn't finished yet, enqueuing the new block from 142 // the BlockProcessable callback will fail, because its parent block might have not been reloaded 143 // to the queues yet. 144 // So one solution here is to lock the execution queues during reloading, so that if BlockProcessable 145 // is called before reloading is finished, it will be blocked, which will avoid that edge case. 146 return e.mempool.Run(func( 147 blockByCollection *stdmap.BlockByCollectionBackdata, 148 executionQueues *stdmap.QueuesBackdata, 149 ) error { 150 for _, blockID := range unexecuted { 151 err := e.reloadBlock(blockByCollection, executionQueues, blockID) 152 if err != nil { 153 return fmt.Errorf("could not reload block: %v, %w", blockID, err) 154 } 155 156 e.log.Debug().Hex("block_id", blockID[:]).Msg("reloaded block") 157 } 158 159 e.log.Info().Int("count", len(unexecuted)).Msg("all unexecuted have been successfully reloaded") 160 161 return nil 162 }) 163 } 164 165 func (e *Engine) reloadBlock( 166 blockByCollection *stdmap.BlockByCollectionBackdata, 167 executionQueues *stdmap.QueuesBackdata, 168 blockID flow.Identifier, 169 ) error { 170 block, err := e.blocks.ByID(blockID) 171 if err != nil { 172 return fmt.Errorf("could not get block by ID: %v %w", blockID, err) 173 } 174 175 // enqueue the block and check if there is any missing collections 176 missingCollections, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false) 177 178 if err != nil { 179 return fmt.Errorf("could not enqueue block %x on reloading: %w", blockID, err) 180 } 181 182 // forward the missing collections to requester engine for requesting them from collection nodes, 183 // adding the missing collections to mempool in order to trigger the block execution as soon as 184 // all missing collections are received. 185 err = e.fetchAndHandleCollection(blockID, block.Header.Height, missingCollections, func(collection *flow.Collection) error { 186 err := e.addCollectionToMempool(collection, blockByCollection) 187 188 if err != nil { 189 return fmt.Errorf("could not add collection to mempool: %w", err) 190 } 191 return nil 192 }) 193 194 if err != nil { 195 return fmt.Errorf("could not fetch or handle collection %w", err) 196 } 197 return nil 198 } 199 200 // BlockProcessable handles the new verified blocks (blocks that 201 // have passed consensus validation) received from the consensus nodes 202 // NOTE: BlockProcessable might be called multiple times for the same block. 203 // NOTE: Ready calls reloadUnexecutedBlocks during initialization, which handles dropped protocol events. 204 func (e *Engine) BlockProcessable(b *flow.Header, _ *flow.QuorumCertificate) { 205 206 // TODO: this should not be blocking: https://github.com/onflow/flow-go/issues/4400 207 208 // skip if stopControl tells to skip, so that we can avoid fetching collections 209 // for this block 210 if !e.stopControl.ShouldExecuteBlock(b.ID(), b.Height) { 211 return 212 } 213 214 blockID := b.ID() 215 newBlock, err := e.blocks.ByID(blockID) 216 if err != nil { 217 e.log.Fatal().Err(err).Msgf("could not get incorporated block(%v): %v", blockID, err) 218 } 219 220 e.log.Info().Hex("block_id", blockID[:]). 221 Uint64("height", b.Height). 222 Msg("handling new block") 223 224 err = e.handleBlock(e.unit.Ctx(), newBlock) 225 if err != nil { 226 e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to handle block") 227 } 228 } 229 230 // Main handling 231 232 // handle block will process the incoming block. 233 // the block has passed the consensus validation. 234 func (e *Engine) handleBlock(ctx context.Context, block *flow.Block) error { 235 236 blockID := block.ID() 237 log := e.log.With().Hex("block_id", blockID[:]).Logger() 238 239 span, _ := e.tracer.StartBlockSpan(ctx, blockID, trace.EXEHandleBlock) 240 defer span.End() 241 242 executed, err := e.execState.IsBlockExecuted(block.Header.Height, blockID) 243 if err != nil { 244 return fmt.Errorf("could not check whether block is executed: %w", err) 245 } 246 247 if executed { 248 log.Debug().Msg("block has been executed already") 249 return nil 250 } 251 252 var missingCollections []*flow.CollectionGuarantee 253 // unexecuted block 254 // acquiring the lock so that there is only one process modifying the queue 255 err = e.mempool.Run(func( 256 blockByCollection *stdmap.BlockByCollectionBackdata, 257 executionQueues *stdmap.QueuesBackdata, 258 ) error { 259 missing, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false) 260 if err != nil { 261 return err 262 } 263 missingCollections = missing 264 return nil 265 }) 266 267 if err != nil { 268 return fmt.Errorf("could not enqueue block %v: %w", blockID, err) 269 } 270 271 return e.addOrFetch(blockID, block.Header.Height, missingCollections) 272 } 273 274 func (e *Engine) enqueueBlockAndCheckExecutable( 275 blockByCollection *stdmap.BlockByCollectionBackdata, 276 executionQueues *stdmap.QueuesBackdata, 277 block *flow.Block, 278 checkStateSync bool, 279 ) ([]*flow.CollectionGuarantee, error) { 280 executableBlock := &entity.ExecutableBlock{ 281 Block: block, 282 CompleteCollections: make(map[flow.Identifier]*entity.CompleteCollection), 283 } 284 285 blockID := executableBlock.ID() 286 287 lg := e.log.With(). 288 Hex("block_id", blockID[:]). 289 Uint64("block_height", executableBlock.Block.Header.Height). 290 Logger() 291 292 // adding the block to the queue, 293 queue, added, head := enqueue(executableBlock, executionQueues) 294 295 // if it's not added, it means the block is not a new block, it already 296 // exists in the queue, then bail 297 if !added { 298 log.Debug().Hex("block_id", logging.Entity(executableBlock)). 299 Int("block_height", int(executableBlock.Height())). 300 Msg("block already exists in the execution queue") 301 return nil, nil 302 } 303 304 firstUnexecutedHeight := queue.Head.Item.Height() 305 306 // check if a block is executable. 307 // a block is executable if the following conditions are all true 308 // 1) the parent state commitment is ready 309 // 2) the collections for the block payload are ready 310 // 3) the child block is ready for querying the randomness 311 312 // check if the block's parent has been executed. (we can't execute the block if the parent has 313 // not been executed yet) 314 // check if there is a statecommitment for the parent block 315 parentCommitment, err := e.execState.StateCommitmentByBlockID(block.Header.ParentID) 316 317 // if we found the statecommitment for the parent block, then add it to the executable block. 318 if err == nil { 319 executableBlock.StartState = &parentCommitment 320 } else if errors.Is(err, storage.ErrNotFound) { 321 // the parent block is an unexecuted block. 322 // if the queue only has one block, and its parent doesn't 323 // exist in the queue, then we need to load the block from the storage. 324 _, ok := queue.Nodes[blockID] 325 if !ok { 326 lg.Error().Msgf("an unexecuted parent block is missing in the queue") 327 } 328 } else { 329 // if there is exception, then crash 330 lg.Fatal().Err(err).Msg("unexpected error while accessing storage, shutting down") 331 } 332 333 // check if we have all the collections for the block, and request them if there is missing. 334 missingCollections, err := e.matchAndFindMissingCollections(executableBlock, blockByCollection) 335 if err != nil { 336 return nil, fmt.Errorf("cannot send collection requests: %w", err) 337 } 338 339 complete := false 340 341 // if newly enqueued block is inside any existing queue, we should skip now and wait 342 // for parent to finish execution 343 if head { 344 // execute the block if the block is ready to be executed 345 complete = e.executeBlockIfComplete(executableBlock) 346 } 347 348 lg.Info(). 349 // if the execution is halt, but the queue keeps growing, we could check which block 350 // hasn't been executed. 351 Uint64("first_unexecuted_in_queue", firstUnexecutedHeight). 352 Bool("complete", complete). 353 Bool("head_of_queue", head). 354 Int("cols", len(executableBlock.Block.Payload.Guarantees)). 355 Int("missing_cols", len(missingCollections)). 356 Msg("block is enqueued") 357 358 return missingCollections, nil 359 } 360 361 // executeBlock will execute the block. 362 // When finish executing, it will check if the children becomes executable and execute them if yes. 363 func (e *Engine) executeBlock( 364 ctx context.Context, 365 executableBlock *entity.ExecutableBlock, 366 ) { 367 368 // don't execute the block if the stop control says no 369 if !e.stopControl.ShouldExecuteBlock(executableBlock.Block.Header.ID(), executableBlock.Block.Header.Height) { 370 return 371 } 372 373 lg := e.log.With(). 374 Hex("block_id", logging.Entity(executableBlock)). 375 Uint64("height", executableBlock.Block.Header.Height). 376 Int("collections", len(executableBlock.CompleteCollections)). 377 Logger() 378 379 lg.Info().Msg("executing block") 380 381 startedAt := time.Now() 382 383 span, ctx := e.tracer.StartSpanFromContext(ctx, trace.EXEExecuteBlock) 384 defer span.End() 385 386 parentID := executableBlock.Block.Header.ParentID 387 parentErID, err := e.execState.GetExecutionResultID(ctx, parentID) 388 if err != nil { 389 lg.Err(err). 390 Str("parentID", parentID.String()). 391 Msg("could not get execution result ID for parent block") 392 return 393 } 394 395 snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState, 396 executableBlock.Block.Header.ParentID, 397 executableBlock.Block.Header.Height-1, 398 ) 399 400 computationResult, err := e.computationManager.ComputeBlock( 401 ctx, 402 parentErID, 403 executableBlock, 404 snapshot) 405 if err != nil { 406 lg.Err(err).Msg("error while computing block") 407 return 408 } 409 410 wg := sync.WaitGroup{} 411 wg.Add(1) 412 defer wg.Wait() 413 414 go func() { 415 defer wg.Done() 416 err := e.uploader.Upload(ctx, computationResult) 417 if err != nil { 418 lg.Err(err).Msg("error while uploading block") 419 // continue processing. uploads should not block execution 420 } 421 }() 422 423 err = e.saveExecutionResults(ctx, computationResult) 424 if errors.Is(err, storage.ErrDataMismatch) { 425 lg.Fatal().Err(err).Msg("fatal: trying to store different results for the same block") 426 } 427 428 if err != nil { 429 lg.Err(err).Msg("error while handing computation results") 430 return 431 } 432 433 receipt := computationResult.ExecutionReceipt 434 broadcasted, err := e.providerEngine.BroadcastExecutionReceipt( 435 ctx, executableBlock.Block.Header.Height, receipt) 436 if err != nil { 437 lg.Err(err).Msg("critical: failed to broadcast the receipt") 438 } 439 440 finalEndState := computationResult.CurrentEndState() 441 lg.Info(). 442 Hex("parent_block", executableBlock.Block.Header.ParentID[:]). 443 Int("collections", len(executableBlock.Block.Payload.Guarantees)). 444 Hex("start_state", executableBlock.StartState[:]). 445 Hex("final_state", finalEndState[:]). 446 Hex("receipt_id", logging.Entity(receipt)). 447 Hex("result_id", logging.Entity(receipt.ExecutionResult)). 448 Hex("execution_data_id", receipt.ExecutionResult.ExecutionDataID[:]). 449 Bool("state_changed", finalEndState != *executableBlock.StartState). 450 Uint64("num_txs", nonSystemTransactionCount(receipt.ExecutionResult)). 451 Bool("broadcasted", broadcasted). 452 Int64("timeSpentInMS", time.Since(startedAt).Milliseconds()). 453 Msg("block executed") 454 455 e.stopControl.OnBlockExecuted(executableBlock.Block.Header) 456 457 err = e.onBlockExecuted(executableBlock, finalEndState) 458 if err != nil { 459 lg.Err(err).Msg("failed in process block's children") 460 } 461 462 if e.executionDataPruner != nil { 463 e.executionDataPruner.NotifyFulfilledHeight(executableBlock.Height()) 464 } 465 466 e.unit.Ctx() 467 468 } 469 470 func nonSystemTransactionCount(result flow.ExecutionResult) uint64 { 471 count := uint64(0) 472 for _, chunk := range result.Chunks { 473 count += chunk.NumberOfTransactions 474 } 475 return count 476 } 477 478 // we've executed the block, now we need to check: 479 // 1. whether the state syncing can be turned off 480 // 2. whether its children can be executed 481 // the executionQueues stores blocks as a tree: 482 // 483 // 10 <- 11 <- 12 484 // ^-- 13 485 // 14 <- 15 <- 16 486 // 487 // if block 10 is the one just executed, then we will remove it from the queue, and add 488 // its children back, meaning the tree will become: 489 // 490 // 11 <- 12 491 // 13 492 // 14 <- 15 <- 16 493 494 func (e *Engine) onBlockExecuted( 495 executed *entity.ExecutableBlock, 496 finalState flow.StateCommitment, 497 ) error { 498 499 e.metrics.ExecutionStorageStateCommitment(int64(len(finalState))) 500 e.metrics.ExecutionLastExecutedBlockHeight(executed.Block.Header.Height) 501 502 missingCollections := make(map[*entity.ExecutableBlock][]*flow.CollectionGuarantee) 503 err := e.mempool.Run( 504 func( 505 blockByCollection *stdmap.BlockByCollectionBackdata, 506 executionQueues *stdmap.QueuesBackdata, 507 ) error { 508 // find the block that was just executed 509 executionQueue, exists := executionQueues.ByID(executed.ID()) 510 if !exists { 511 logQueueState(e.log, executionQueues, executed.ID()) 512 // when the block no longer exists in the queue, it means there was a race condition that 513 // two onBlockExecuted was called for the same block, and one process has already removed the 514 // block from the queue, so we will print an error here 515 return fmt.Errorf("block has been executed already, no longer exists in the queue") 516 } 517 518 // dismount the executed block and all its children 519 _, newQueues := executionQueue.Dismount() 520 521 // go through each children, add them back to the queue, and check 522 // if the children is executable 523 for _, queue := range newQueues { 524 queueID := queue.ID() 525 added := executionQueues.Add(queueID, queue) 526 if !added { 527 // blocks should be unique in execution queues, if we dismount all the children blocks, then 528 // add it back to the queues, then it should always be able to add. 529 // If not, then there is a bug that the queues have duplicated blocks 530 return fmt.Errorf("fatal error - child block already in execution queue") 531 } 532 533 // the parent block has been executed, update the StartState of 534 // each child block. 535 child := queue.Head.Item.(*entity.ExecutableBlock) 536 child.StartState = &finalState 537 538 missing, err := e.matchAndFindMissingCollections(child, blockByCollection) 539 if err != nil { 540 return fmt.Errorf("cannot send collection requests: %w", err) 541 } 542 if len(missing) > 0 { 543 missingCollections[child] = append(missingCollections[child], missing...) 544 } 545 546 completed := e.executeBlockIfComplete(child) 547 if !completed { 548 e.log.Debug(). 549 Hex("executed_block", logging.Entity(executed)). 550 Hex("child_block", logging.Entity(child)). 551 Msg("child block is not ready to be executed yet") 552 } else { 553 e.log.Debug(). 554 Hex("executed_block", logging.Entity(executed)). 555 Hex("child_block", logging.Entity(child)). 556 Msg("child block is ready to be executed") 557 } 558 } 559 560 // remove the executed block 561 executionQueues.Remove(executed.ID()) 562 563 return nil 564 }) 565 566 if err != nil { 567 e.log.Fatal().Err(err). 568 Hex("block", logging.Entity(executed)). 569 Uint64("height", executed.Block.Header.Height). 570 Msg("error while requeueing blocks after execution") 571 } 572 573 for child, missing := range missingCollections { 574 err := e.addOrFetch(child.ID(), child.Block.Header.Height, missing) 575 if err != nil { 576 return fmt.Errorf("fail to add missing collections: %w", err) 577 } 578 } 579 580 return nil 581 } 582 583 // executeBlockIfComplete checks whether the block is ready to be executed. 584 // if yes, execute the block 585 // return a bool indicates whether the block was completed 586 func (e *Engine) executeBlockIfComplete(eb *entity.ExecutableBlock) bool { 587 588 if eb.Executing { 589 return false 590 } 591 592 // if don't have the delta, then check if everything is ready for executing 593 // the block 594 if eb.IsComplete() { 595 596 if e.extensiveLogging { 597 e.logExecutableBlock(eb) 598 } 599 600 // no external synchronisation is used because this method must be run in a thread-safe context 601 eb.Executing = true 602 603 e.unit.Launch(func() { 604 e.executeBlock(e.unit.Ctx(), eb) 605 }) 606 return true 607 } 608 return false 609 } 610 611 // OnCollection is a callback for handling the collections requested by the 612 // collection requester. 613 func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) { 614 // convert entity to strongly typed collection 615 collection, ok := entity.(*flow.Collection) 616 if !ok { 617 e.log.Error().Msgf("invalid entity type (%T)", entity) 618 return 619 } 620 621 // no need to validate the origin ID, since the collection requester has 622 // checked the origin must be a collection node. 623 624 err := e.handleCollection(originID, collection) 625 if err != nil { 626 e.log.Error().Err(err).Msg("could not handle collection") 627 } 628 } 629 630 // a block can't be executed if its collection is missing. 631 // since a collection can belong to multiple blocks, we need to 632 // find all the blocks that are needing this collection, and then 633 // check if any of these block becomes executable and execute it if 634 // is. 635 func (e *Engine) handleCollection( 636 originID flow.Identifier, 637 collection *flow.Collection, 638 ) error { 639 collID := collection.ID() 640 641 span, _ := e.tracer.StartCollectionSpan(context.Background(), collID, trace.EXEHandleCollection) 642 defer span.End() 643 644 lg := e.log.With().Hex("collection_id", collID[:]).Logger() 645 646 lg.Info().Hex("sender", originID[:]).Int("len", collection.Len()).Msg("handle collection") 647 defer func(startTime time.Time) { 648 lg.Info().TimeDiff("duration", time.Now(), startTime).Msg("collection handled") 649 }(time.Now()) 650 651 // TODO: bail if have seen this collection before. 652 err := e.collections.Store(collection) 653 if err != nil { 654 return fmt.Errorf("cannot store collection: %w", err) 655 } 656 657 return e.mempool.BlockByCollection.Run( 658 func(backdata *stdmap.BlockByCollectionBackdata) error { 659 return e.addCollectionToMempool(collection, backdata) 660 }, 661 ) 662 } 663 664 func (e *Engine) addCollectionToMempool( 665 collection *flow.Collection, 666 backdata *stdmap.BlockByCollectionBackdata, 667 ) error { 668 collID := collection.ID() 669 blockByCollectionID, exists := backdata.ByID(collID) 670 671 // if we don't find any block for this collection, then 672 // means we don't need this collection any more. 673 // or it was ejected from the mempool when it was full. 674 // either way, we will return 675 if !exists { 676 return nil 677 } 678 679 for _, executableBlock := range blockByCollectionID.ExecutableBlocks { 680 blockID := executableBlock.ID() 681 682 completeCollection, ok := executableBlock.CompleteCollections[collID] 683 if !ok { 684 return fmt.Errorf("cannot handle collection: internal inconsistency - collection pointing to block %v which does not contain said collection", 685 blockID) 686 } 687 688 e.metrics.UpdateCollectionMaxHeight(executableBlock.Block.Header.Height) 689 690 if completeCollection.IsCompleted() { 691 // already received transactions for this collection 692 continue 693 } 694 695 // update the transactions of the collection 696 // Note: it's guaranteed the transactions are for this collection, because 697 // the collection id matches with the CollectionID from the collection guarantee 698 completeCollection.Transactions = collection.Transactions 699 700 // check if the block becomes executable 701 _ = e.executeBlockIfComplete(executableBlock) 702 } 703 704 // since we've received this collection, remove it from the index 705 // this also prevents from executing the same block twice, because the second 706 // time when the collection arrives, it will not be found in the blockByCollectionID 707 // index. 708 backdata.Remove(collID) 709 710 return nil 711 } 712 713 func newQueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) ( 714 *queue.Queue, 715 bool, 716 ) { 717 q := queue.NewQueue(blockify) 718 qID := q.ID() 719 return q, queues.Add(qID, q) 720 } 721 722 // enqueue adds a block to the queues, return the queue that includes the block and booleans 723 // * is block new one (it's not already enqueued, not a duplicate) 724 // * is head of the queue (new queue has been created) 725 // 726 // Queues are chained blocks. Since a block can't be executable until its parent has been 727 // executed, the chained structure allows us to only check the head of each queue to see if 728 // any block becomes executable. 729 // for instance we have one queue whose head is A: 730 // 731 // A <- B <- C 732 // ^- D <- E 733 // 734 // If we receive E <- F, then we will add it to the queue: 735 // 736 // A <- B <- C 737 // ^- D <- E <- F 738 // 739 // Even through there are 6 blocks, we only need to check if block A becomes executable. 740 // when the parent block isn't in the queue, we add it as a new queue. for instance, if 741 // we receive H <- G, then the queues will become: 742 // 743 // A <- B <- C 744 // ^- D <- E 745 // G 746 func enqueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) ( 747 *queue.Queue, 748 bool, 749 bool, 750 ) { 751 for _, queue := range queues.All() { 752 if stored, isNew := queue.TryAdd(blockify); stored { 753 return queue, isNew, false 754 } 755 } 756 queue, isNew := newQueue(blockify, queues) 757 return queue, isNew, true 758 } 759 760 // check if the block's collections have been received, 761 // if yes, add the collection to the executable block 762 // if no, fetch the collection. 763 // if a block has 3 collection, it would be 3 reqs to fetch them. 764 // mark the collection belongs to the block, 765 // mark the block contains this collection. 766 // It returns the missing collections to be fetched 767 // TODO: to rename 768 func (e *Engine) matchAndFindMissingCollections( 769 executableBlock *entity.ExecutableBlock, 770 collectionsBackdata *stdmap.BlockByCollectionBackdata, 771 ) ([]*flow.CollectionGuarantee, error) { 772 missingCollections := make([]*flow.CollectionGuarantee, 0, len(executableBlock.Block.Payload.Guarantees)) 773 774 for _, guarantee := range executableBlock.Block.Payload.Guarantees { 775 coll := &entity.CompleteCollection{ 776 Guarantee: guarantee, 777 } 778 executableBlock.CompleteCollections[guarantee.ID()] = coll 779 780 // check if we have requested this collection before. 781 // blocksNeedingCollection stores all the blocks that contain this collection 782 783 if blocksNeedingCollection, exists := collectionsBackdata.ByID(guarantee.ID()); exists { 784 // if we've requested this collection, it means other block might also contain this collection. 785 // in this case, add this block to the map so that when the collection is received, 786 // we could update the executable block 787 blocksNeedingCollection.ExecutableBlocks[executableBlock.ID()] = executableBlock 788 789 // since the collection is still being requested, we don't have the transactions 790 // yet, so exit 791 continue 792 } 793 794 // the storage doesn't have this collection, meaning this is our first time seeing this 795 // collection guarantee, create an entry to store in collectionsBackdata in order to 796 // update the executable blocks when the collection is received. 797 blocksNeedingCollection := &entity.BlocksByCollection{ 798 CollectionID: guarantee.ID(), 799 ExecutableBlocks: map[flow.Identifier]*entity.ExecutableBlock{executableBlock.ID(): executableBlock}, 800 } 801 802 added := collectionsBackdata.Add(blocksNeedingCollection.ID(), blocksNeedingCollection) 803 if !added { 804 // sanity check, should not happen, unless mempool implementation has a bug 805 return nil, fmt.Errorf("collection already mapped to block") 806 } 807 808 missingCollections = append(missingCollections, guarantee) 809 } 810 811 return missingCollections, nil 812 } 813 814 // save the execution result of a block 815 func (e *Engine) saveExecutionResults( 816 ctx context.Context, 817 result *execution.ComputationResult, 818 ) error { 819 span, childCtx := e.tracer.StartSpanFromContext(ctx, trace.EXESaveExecutionResults) 820 defer span.End() 821 822 e.log.Debug(). 823 Hex("block_id", logging.Entity(result.ExecutableBlock)). 824 Msg("received computation result") 825 826 for _, event := range result.ExecutionResult.ServiceEvents { 827 e.log.Info(). 828 Uint64("block_height", result.ExecutableBlock.Height()). 829 Hex("block_id", logging.Entity(result.ExecutableBlock)). 830 Str("event_type", event.Type.String()). 831 Msg("service event emitted") 832 } 833 834 err := e.execState.SaveExecutionResults(childCtx, result) 835 if err != nil { 836 return fmt.Errorf("cannot persist execution state: %w", err) 837 } 838 839 finalEndState := result.CurrentEndState() 840 e.log.Debug(). 841 Hex("block_id", logging.Entity(result.ExecutableBlock)). 842 Hex("start_state", result.ExecutableBlock.StartState[:]). 843 Hex("final_state", finalEndState[:]). 844 Msg("saved computation results") 845 846 return nil 847 } 848 849 // logExecutableBlock logs all data about an executable block 850 // over time we should skip this 851 func (e *Engine) logExecutableBlock(eb *entity.ExecutableBlock) { 852 // log block 853 e.log.Debug(). 854 Hex("block_id", logging.Entity(eb)). 855 Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)). 856 Uint64("block_height", eb.Block.Header.Height). 857 Int("number_of_collections", len(eb.Collections())). 858 RawJSON("block_header", logging.AsJSON(eb.Block.Header)). 859 Msg("extensive log: block header") 860 861 // logs transactions 862 for i, col := range eb.Collections() { 863 for j, tx := range col.Transactions { 864 e.log.Debug(). 865 Hex("block_id", logging.Entity(eb)). 866 Int("block_height", int(eb.Block.Header.Height)). 867 Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)). 868 Int("collection_index", i). 869 Int("tx_index", j). 870 Hex("collection_id", logging.ID(col.Guarantee.CollectionID)). 871 Hex("tx_hash", logging.Entity(tx)). 872 Hex("start_state_commitment", eb.StartState[:]). 873 RawJSON("transaction", logging.AsJSON(tx)). 874 Msg("extensive log: executed tx content") 875 } 876 } 877 } 878 879 // addOrFetch checks if there are stored collections for the given guarantees, if there is, 880 // forward them to mempool to process the collection, otherwise fetch the collections. 881 // any error returned are exception 882 func (e *Engine) addOrFetch( 883 blockID flow.Identifier, 884 height uint64, 885 guarantees []*flow.CollectionGuarantee, 886 ) error { 887 return e.fetchAndHandleCollection(blockID, height, guarantees, func(collection *flow.Collection) error { 888 err := e.mempool.BlockByCollection.Run( 889 func(backdata *stdmap.BlockByCollectionBackdata) error { 890 return e.addCollectionToMempool(collection, backdata) 891 }) 892 893 if err != nil { 894 return fmt.Errorf("could not add collection to mempool: %w", err) 895 } 896 return nil 897 }) 898 } 899 900 // addOrFetch checks if there are stored collections for the given guarantees, if there is, 901 // forward them to the handler to process the collection, otherwise fetch the collections. 902 // any error returned are exception 903 func (e *Engine) fetchAndHandleCollection( 904 blockID flow.Identifier, 905 height uint64, 906 guarantees []*flow.CollectionGuarantee, 907 handleCollection func(*flow.Collection) error, 908 ) error { 909 fetched := false 910 for _, guarantee := range guarantees { 911 // if we've requested this collection, we will store it in the storage, 912 // so check the storage to see whether we've seen it. 913 collection, err := e.collections.ByID(guarantee.CollectionID) 914 915 if err == nil { 916 // we found the collection from storage, forward this collection to handler 917 err = handleCollection(collection) 918 if err != nil { 919 return fmt.Errorf("could not handle collection: %w", err) 920 } 921 922 continue 923 } 924 925 // check if there was exception 926 if !errors.Is(err, storage.ErrNotFound) { 927 return fmt.Errorf("error while querying for collection: %w", err) 928 } 929 930 err = e.collectionFetcher.FetchCollection(blockID, height, guarantee) 931 if err != nil { 932 return fmt.Errorf("could not fetch collection: %w", err) 933 } 934 fetched = true 935 } 936 937 // make sure that the requests are dispatched immediately by the requester 938 if fetched { 939 e.collectionFetcher.Force() 940 e.metrics.ExecutionCollectionRequestSent() 941 } 942 943 return nil 944 } 945 946 func logQueueState(log zerolog.Logger, queues *stdmap.QueuesBackdata, blockID flow.Identifier) { 947 all := queues.All() 948 949 log.With().Hex("queue_state__executed_block_id", blockID[:]).Int("count", len(all)).Logger() 950 for i, queue := range all { 951 log.Error().Msgf("%v-th queue state: %v", i, queue.String()) 952 } 953 }