github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/execution/ingestion/engine.go (about)

     1  package ingestion
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/rs/zerolog"
    11  	"github.com/rs/zerolog/log"
    12  
    13  	"github.com/onflow/flow-go/engine"
    14  	"github.com/onflow/flow-go/engine/execution"
    15  	"github.com/onflow/flow-go/engine/execution/computation"
    16  	"github.com/onflow/flow-go/engine/execution/ingestion/stop"
    17  	"github.com/onflow/flow-go/engine/execution/ingestion/uploader"
    18  	"github.com/onflow/flow-go/engine/execution/provider"
    19  	"github.com/onflow/flow-go/engine/execution/state"
    20  	"github.com/onflow/flow-go/model/flow"
    21  	"github.com/onflow/flow-go/module"
    22  	"github.com/onflow/flow-go/module/executiondatasync/pruner"
    23  	"github.com/onflow/flow-go/module/mempool/entity"
    24  	"github.com/onflow/flow-go/module/mempool/queue"
    25  	"github.com/onflow/flow-go/module/mempool/stdmap"
    26  	"github.com/onflow/flow-go/module/trace"
    27  	"github.com/onflow/flow-go/network"
    28  	psEvents "github.com/onflow/flow-go/state/protocol/events"
    29  	"github.com/onflow/flow-go/storage"
    30  	"github.com/onflow/flow-go/utils/logging"
    31  )
    32  
    33  // An Engine receives and saves incoming blocks.
    34  type Engine struct {
    35  	psEvents.Noop // satisfy protocol events consumer interface
    36  
    37  	unit                *engine.Unit
    38  	log                 zerolog.Logger
    39  	collectionFetcher   CollectionFetcher
    40  	headers             storage.Headers // see comments on getHeaderByHeight for why we need it
    41  	blocks              storage.Blocks
    42  	collections         storage.Collections
    43  	computationManager  computation.ComputationManager
    44  	providerEngine      provider.ProviderEngine
    45  	mempool             *Mempool
    46  	execState           state.ExecutionState
    47  	metrics             module.ExecutionMetrics
    48  	tracer              module.Tracer
    49  	extensiveLogging    bool
    50  	executionDataPruner *pruner.Pruner
    51  	uploader            *uploader.Manager
    52  	stopControl         *stop.StopControl
    53  	loader              BlockLoader
    54  }
    55  
    56  func New(
    57  	unit *engine.Unit,
    58  	logger zerolog.Logger,
    59  	net network.EngineRegistry,
    60  	collectionFetcher CollectionFetcher,
    61  	headers storage.Headers,
    62  	blocks storage.Blocks,
    63  	collections storage.Collections,
    64  	executionEngine computation.ComputationManager,
    65  	providerEngine provider.ProviderEngine,
    66  	execState state.ExecutionState,
    67  	metrics module.ExecutionMetrics,
    68  	tracer module.Tracer,
    69  	extLog bool,
    70  	pruner *pruner.Pruner,
    71  	uploader *uploader.Manager,
    72  	stopControl *stop.StopControl,
    73  	loader BlockLoader,
    74  ) (*Engine, error) {
    75  	log := logger.With().Str("engine", "ingestion").Logger()
    76  
    77  	mempool := newMempool()
    78  
    79  	eng := Engine{
    80  		unit:                unit,
    81  		log:                 log,
    82  		collectionFetcher:   collectionFetcher,
    83  		headers:             headers,
    84  		blocks:              blocks,
    85  		collections:         collections,
    86  		computationManager:  executionEngine,
    87  		providerEngine:      providerEngine,
    88  		mempool:             mempool,
    89  		execState:           execState,
    90  		metrics:             metrics,
    91  		tracer:              tracer,
    92  		extensiveLogging:    extLog,
    93  		executionDataPruner: pruner,
    94  		uploader:            uploader,
    95  		stopControl:         stopControl,
    96  		loader:              loader,
    97  	}
    98  
    99  	return &eng, nil
   100  }
   101  
   102  // Ready returns a channel that will close when the engine has
   103  // successfully started.
   104  func (e *Engine) Ready() <-chan struct{} {
   105  	if e.stopControl.IsExecutionStopped() {
   106  		return e.unit.Ready()
   107  	}
   108  
   109  	if err := e.uploader.RetryUploads(); err != nil {
   110  		e.log.Warn().Msg("failed to re-upload all ComputationResults")
   111  	}
   112  
   113  	err := e.reloadUnexecutedBlocks()
   114  	if err != nil {
   115  		e.log.Fatal().Err(err).Msg("failed to load all unexecuted blocks")
   116  	}
   117  
   118  	return e.unit.Ready()
   119  }
   120  
   121  // Done returns a channel that will close when the engine has
   122  // successfully stopped.
   123  func (e *Engine) Done() <-chan struct{} {
   124  	return e.unit.Done()
   125  }
   126  
   127  // ProcessLocal processes an event originating on the local node.
   128  func (e *Engine) ProcessLocal(event interface{}) error {
   129  	return fmt.Errorf("ingestion error does not process local events")
   130  }
   131  
   132  // on nodes startup, we need to load all the unexecuted blocks to the execution queues.
   133  // blocks have to be loaded in the way that the parent has been loaded before loading its children
   134  func (e *Engine) reloadUnexecutedBlocks() error {
   135  	unexecuted, err := e.loader.LoadUnexecuted(e.unit.Ctx())
   136  	if err != nil {
   137  		return fmt.Errorf("could not load unexecuted blocks: %w", err)
   138  	}
   139  	// it's possible the BlockProcessable is called during the reloading, as the follower engine
   140  	// will receive blocks before ingestion engine is ready.
   141  	// The problem with that is, since the reloading hasn't finished yet, enqueuing the new block from
   142  	// the BlockProcessable callback will fail, because its parent block might have not been reloaded
   143  	// to the queues yet.
   144  	// So one solution here is to lock the execution queues during reloading, so that if BlockProcessable
   145  	// is called before reloading is finished, it will be blocked, which will avoid that edge case.
   146  	return e.mempool.Run(func(
   147  		blockByCollection *stdmap.BlockByCollectionBackdata,
   148  		executionQueues *stdmap.QueuesBackdata,
   149  	) error {
   150  		for _, blockID := range unexecuted {
   151  			err := e.reloadBlock(blockByCollection, executionQueues, blockID)
   152  			if err != nil {
   153  				return fmt.Errorf("could not reload block: %v, %w", blockID, err)
   154  			}
   155  
   156  			e.log.Debug().Hex("block_id", blockID[:]).Msg("reloaded block")
   157  		}
   158  
   159  		e.log.Info().Int("count", len(unexecuted)).Msg("all unexecuted have been successfully reloaded")
   160  
   161  		return nil
   162  	})
   163  }
   164  
   165  func (e *Engine) reloadBlock(
   166  	blockByCollection *stdmap.BlockByCollectionBackdata,
   167  	executionQueues *stdmap.QueuesBackdata,
   168  	blockID flow.Identifier,
   169  ) error {
   170  	block, err := e.blocks.ByID(blockID)
   171  	if err != nil {
   172  		return fmt.Errorf("could not get block by ID: %v %w", blockID, err)
   173  	}
   174  
   175  	// enqueue the block and check if there is any missing collections
   176  	missingCollections, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false)
   177  
   178  	if err != nil {
   179  		return fmt.Errorf("could not enqueue block %x on reloading: %w", blockID, err)
   180  	}
   181  
   182  	// forward the missing collections to requester engine for requesting them from collection nodes,
   183  	// adding the missing collections to mempool in order to trigger the block execution as soon as
   184  	// all missing collections are received.
   185  	err = e.fetchAndHandleCollection(blockID, block.Header.Height, missingCollections, func(collection *flow.Collection) error {
   186  		err := e.addCollectionToMempool(collection, blockByCollection)
   187  
   188  		if err != nil {
   189  			return fmt.Errorf("could not add collection to mempool: %w", err)
   190  		}
   191  		return nil
   192  	})
   193  
   194  	if err != nil {
   195  		return fmt.Errorf("could not fetch or handle collection %w", err)
   196  	}
   197  	return nil
   198  }
   199  
   200  // BlockProcessable handles the new verified blocks (blocks that
   201  // have passed consensus validation) received from the consensus nodes
   202  // NOTE: BlockProcessable might be called multiple times for the same block.
   203  // NOTE: Ready calls reloadUnexecutedBlocks during initialization, which handles dropped protocol events.
   204  func (e *Engine) BlockProcessable(b *flow.Header, _ *flow.QuorumCertificate) {
   205  
   206  	// TODO: this should not be blocking: https://github.com/onflow/flow-go/issues/4400
   207  
   208  	// skip if stopControl tells to skip, so that we can avoid fetching collections
   209  	// for this block
   210  	if !e.stopControl.ShouldExecuteBlock(b.ID(), b.Height) {
   211  		return
   212  	}
   213  
   214  	blockID := b.ID()
   215  	newBlock, err := e.blocks.ByID(blockID)
   216  	if err != nil {
   217  		e.log.Fatal().Err(err).Msgf("could not get incorporated block(%v): %v", blockID, err)
   218  	}
   219  
   220  	e.log.Info().Hex("block_id", blockID[:]).
   221  		Uint64("height", b.Height).
   222  		Msg("handling new block")
   223  
   224  	err = e.handleBlock(e.unit.Ctx(), newBlock)
   225  	if err != nil {
   226  		e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to handle block")
   227  	}
   228  }
   229  
   230  // Main handling
   231  
   232  // handle block will process the incoming block.
   233  // the block has passed the consensus validation.
   234  func (e *Engine) handleBlock(ctx context.Context, block *flow.Block) error {
   235  
   236  	blockID := block.ID()
   237  	log := e.log.With().Hex("block_id", blockID[:]).Logger()
   238  
   239  	span, _ := e.tracer.StartBlockSpan(ctx, blockID, trace.EXEHandleBlock)
   240  	defer span.End()
   241  
   242  	executed, err := e.execState.IsBlockExecuted(block.Header.Height, blockID)
   243  	if err != nil {
   244  		return fmt.Errorf("could not check whether block is executed: %w", err)
   245  	}
   246  
   247  	if executed {
   248  		log.Debug().Msg("block has been executed already")
   249  		return nil
   250  	}
   251  
   252  	var missingCollections []*flow.CollectionGuarantee
   253  	// unexecuted block
   254  	// acquiring the lock so that there is only one process modifying the queue
   255  	err = e.mempool.Run(func(
   256  		blockByCollection *stdmap.BlockByCollectionBackdata,
   257  		executionQueues *stdmap.QueuesBackdata,
   258  	) error {
   259  		missing, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false)
   260  		if err != nil {
   261  			return err
   262  		}
   263  		missingCollections = missing
   264  		return nil
   265  	})
   266  
   267  	if err != nil {
   268  		return fmt.Errorf("could not enqueue block %v: %w", blockID, err)
   269  	}
   270  
   271  	return e.addOrFetch(blockID, block.Header.Height, missingCollections)
   272  }
   273  
   274  func (e *Engine) enqueueBlockAndCheckExecutable(
   275  	blockByCollection *stdmap.BlockByCollectionBackdata,
   276  	executionQueues *stdmap.QueuesBackdata,
   277  	block *flow.Block,
   278  	checkStateSync bool,
   279  ) ([]*flow.CollectionGuarantee, error) {
   280  	executableBlock := &entity.ExecutableBlock{
   281  		Block:               block,
   282  		CompleteCollections: make(map[flow.Identifier]*entity.CompleteCollection),
   283  	}
   284  
   285  	blockID := executableBlock.ID()
   286  
   287  	lg := e.log.With().
   288  		Hex("block_id", blockID[:]).
   289  		Uint64("block_height", executableBlock.Block.Header.Height).
   290  		Logger()
   291  
   292  	// adding the block to the queue,
   293  	queue, added, head := enqueue(executableBlock, executionQueues)
   294  
   295  	// if it's not added, it means the block is not a new block, it already
   296  	// exists in the queue, then bail
   297  	if !added {
   298  		log.Debug().Hex("block_id", logging.Entity(executableBlock)).
   299  			Int("block_height", int(executableBlock.Height())).
   300  			Msg("block already exists in the execution queue")
   301  		return nil, nil
   302  	}
   303  
   304  	firstUnexecutedHeight := queue.Head.Item.Height()
   305  
   306  	// check if a block is executable.
   307  	// a block is executable if the following conditions are all true
   308  	// 1) the parent state commitment is ready
   309  	// 2) the collections for the block payload are ready
   310  	// 3) the child block is ready for querying the randomness
   311  
   312  	// check if the block's parent has been executed. (we can't execute the block if the parent has
   313  	// not been executed yet)
   314  	// check if there is a statecommitment for the parent block
   315  	parentCommitment, err := e.execState.StateCommitmentByBlockID(block.Header.ParentID)
   316  
   317  	// if we found the statecommitment for the parent block, then add it to the executable block.
   318  	if err == nil {
   319  		executableBlock.StartState = &parentCommitment
   320  	} else if errors.Is(err, storage.ErrNotFound) {
   321  		// the parent block is an unexecuted block.
   322  		// if the queue only has one block, and its parent doesn't
   323  		// exist in the queue, then we need to load the block from the storage.
   324  		_, ok := queue.Nodes[blockID]
   325  		if !ok {
   326  			lg.Error().Msgf("an unexecuted parent block is missing in the queue")
   327  		}
   328  	} else {
   329  		// if there is exception, then crash
   330  		lg.Fatal().Err(err).Msg("unexpected error while accessing storage, shutting down")
   331  	}
   332  
   333  	// check if we have all the collections for the block, and request them if there is missing.
   334  	missingCollections, err := e.matchAndFindMissingCollections(executableBlock, blockByCollection)
   335  	if err != nil {
   336  		return nil, fmt.Errorf("cannot send collection requests: %w", err)
   337  	}
   338  
   339  	complete := false
   340  
   341  	// if newly enqueued block is inside any existing queue, we should skip now and wait
   342  	// for parent to finish execution
   343  	if head {
   344  		// execute the block if the block is ready to be executed
   345  		complete = e.executeBlockIfComplete(executableBlock)
   346  	}
   347  
   348  	lg.Info().
   349  		// if the execution is halt, but the queue keeps growing, we could check which block
   350  		// hasn't been executed.
   351  		Uint64("first_unexecuted_in_queue", firstUnexecutedHeight).
   352  		Bool("complete", complete).
   353  		Bool("head_of_queue", head).
   354  		Int("cols", len(executableBlock.Block.Payload.Guarantees)).
   355  		Int("missing_cols", len(missingCollections)).
   356  		Msg("block is enqueued")
   357  
   358  	return missingCollections, nil
   359  }
   360  
   361  // executeBlock will execute the block.
   362  // When finish executing, it will check if the children becomes executable and execute them if yes.
   363  func (e *Engine) executeBlock(
   364  	ctx context.Context,
   365  	executableBlock *entity.ExecutableBlock,
   366  ) {
   367  
   368  	// don't execute the block if the stop control says no
   369  	if !e.stopControl.ShouldExecuteBlock(executableBlock.Block.Header.ID(), executableBlock.Block.Header.Height) {
   370  		return
   371  	}
   372  
   373  	lg := e.log.With().
   374  		Hex("block_id", logging.Entity(executableBlock)).
   375  		Uint64("height", executableBlock.Block.Header.Height).
   376  		Int("collections", len(executableBlock.CompleteCollections)).
   377  		Logger()
   378  
   379  	lg.Info().Msg("executing block")
   380  
   381  	startedAt := time.Now()
   382  
   383  	span, ctx := e.tracer.StartSpanFromContext(ctx, trace.EXEExecuteBlock)
   384  	defer span.End()
   385  
   386  	parentID := executableBlock.Block.Header.ParentID
   387  	parentErID, err := e.execState.GetExecutionResultID(ctx, parentID)
   388  	if err != nil {
   389  		lg.Err(err).
   390  			Str("parentID", parentID.String()).
   391  			Msg("could not get execution result ID for parent block")
   392  		return
   393  	}
   394  
   395  	snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState,
   396  		executableBlock.Block.Header.ParentID,
   397  		executableBlock.Block.Header.Height-1,
   398  	)
   399  
   400  	computationResult, err := e.computationManager.ComputeBlock(
   401  		ctx,
   402  		parentErID,
   403  		executableBlock,
   404  		snapshot)
   405  	if err != nil {
   406  		lg.Err(err).Msg("error while computing block")
   407  		return
   408  	}
   409  
   410  	wg := sync.WaitGroup{}
   411  	wg.Add(1)
   412  	defer wg.Wait()
   413  
   414  	go func() {
   415  		defer wg.Done()
   416  		err := e.uploader.Upload(ctx, computationResult)
   417  		if err != nil {
   418  			lg.Err(err).Msg("error while uploading block")
   419  			// continue processing. uploads should not block execution
   420  		}
   421  	}()
   422  
   423  	err = e.saveExecutionResults(ctx, computationResult)
   424  	if errors.Is(err, storage.ErrDataMismatch) {
   425  		lg.Fatal().Err(err).Msg("fatal: trying to store different results for the same block")
   426  	}
   427  
   428  	if err != nil {
   429  		lg.Err(err).Msg("error while handing computation results")
   430  		return
   431  	}
   432  
   433  	receipt := computationResult.ExecutionReceipt
   434  	broadcasted, err := e.providerEngine.BroadcastExecutionReceipt(
   435  		ctx, executableBlock.Block.Header.Height, receipt)
   436  	if err != nil {
   437  		lg.Err(err).Msg("critical: failed to broadcast the receipt")
   438  	}
   439  
   440  	finalEndState := computationResult.CurrentEndState()
   441  	lg.Info().
   442  		Hex("parent_block", executableBlock.Block.Header.ParentID[:]).
   443  		Int("collections", len(executableBlock.Block.Payload.Guarantees)).
   444  		Hex("start_state", executableBlock.StartState[:]).
   445  		Hex("final_state", finalEndState[:]).
   446  		Hex("receipt_id", logging.Entity(receipt)).
   447  		Hex("result_id", logging.Entity(receipt.ExecutionResult)).
   448  		Hex("execution_data_id", receipt.ExecutionResult.ExecutionDataID[:]).
   449  		Bool("state_changed", finalEndState != *executableBlock.StartState).
   450  		Uint64("num_txs", nonSystemTransactionCount(receipt.ExecutionResult)).
   451  		Bool("broadcasted", broadcasted).
   452  		Int64("timeSpentInMS", time.Since(startedAt).Milliseconds()).
   453  		Msg("block executed")
   454  
   455  	e.stopControl.OnBlockExecuted(executableBlock.Block.Header)
   456  
   457  	err = e.onBlockExecuted(executableBlock, finalEndState)
   458  	if err != nil {
   459  		lg.Err(err).Msg("failed in process block's children")
   460  	}
   461  
   462  	if e.executionDataPruner != nil {
   463  		e.executionDataPruner.NotifyFulfilledHeight(executableBlock.Height())
   464  	}
   465  
   466  	e.unit.Ctx()
   467  
   468  }
   469  
   470  func nonSystemTransactionCount(result flow.ExecutionResult) uint64 {
   471  	count := uint64(0)
   472  	for _, chunk := range result.Chunks {
   473  		count += chunk.NumberOfTransactions
   474  	}
   475  	return count
   476  }
   477  
   478  // we've executed the block, now we need to check:
   479  // 1. whether the state syncing can be turned off
   480  // 2. whether its children can be executed
   481  //   the executionQueues stores blocks as a tree:
   482  //
   483  //   10 <- 11 <- 12
   484  //   	 ^-- 13
   485  //   14 <- 15 <- 16
   486  //
   487  //   if block 10 is the one just executed, then we will remove it from the queue, and add
   488  //   its children back, meaning the tree will become:
   489  //
   490  //   11 <- 12
   491  //   13
   492  //   14 <- 15 <- 16
   493  
   494  func (e *Engine) onBlockExecuted(
   495  	executed *entity.ExecutableBlock,
   496  	finalState flow.StateCommitment,
   497  ) error {
   498  
   499  	e.metrics.ExecutionStorageStateCommitment(int64(len(finalState)))
   500  	e.metrics.ExecutionLastExecutedBlockHeight(executed.Block.Header.Height)
   501  
   502  	missingCollections := make(map[*entity.ExecutableBlock][]*flow.CollectionGuarantee)
   503  	err := e.mempool.Run(
   504  		func(
   505  			blockByCollection *stdmap.BlockByCollectionBackdata,
   506  			executionQueues *stdmap.QueuesBackdata,
   507  		) error {
   508  			// find the block that was just executed
   509  			executionQueue, exists := executionQueues.ByID(executed.ID())
   510  			if !exists {
   511  				logQueueState(e.log, executionQueues, executed.ID())
   512  				// when the block no longer exists in the queue, it means there was a race condition that
   513  				// two onBlockExecuted was called for the same block, and one process has already removed the
   514  				// block from the queue, so we will print an error here
   515  				return fmt.Errorf("block has been executed already, no longer exists in the queue")
   516  			}
   517  
   518  			// dismount the executed block and all its children
   519  			_, newQueues := executionQueue.Dismount()
   520  
   521  			// go through each children, add them back to the queue, and check
   522  			// if the children is executable
   523  			for _, queue := range newQueues {
   524  				queueID := queue.ID()
   525  				added := executionQueues.Add(queueID, queue)
   526  				if !added {
   527  					// blocks should be unique in execution queues, if we dismount all the children blocks, then
   528  					// add it back to the queues, then it should always be able to add.
   529  					// If not, then there is a bug that the queues have duplicated blocks
   530  					return fmt.Errorf("fatal error - child block already in execution queue")
   531  				}
   532  
   533  				// the parent block has been executed, update the StartState of
   534  				// each child block.
   535  				child := queue.Head.Item.(*entity.ExecutableBlock)
   536  				child.StartState = &finalState
   537  
   538  				missing, err := e.matchAndFindMissingCollections(child, blockByCollection)
   539  				if err != nil {
   540  					return fmt.Errorf("cannot send collection requests: %w", err)
   541  				}
   542  				if len(missing) > 0 {
   543  					missingCollections[child] = append(missingCollections[child], missing...)
   544  				}
   545  
   546  				completed := e.executeBlockIfComplete(child)
   547  				if !completed {
   548  					e.log.Debug().
   549  						Hex("executed_block", logging.Entity(executed)).
   550  						Hex("child_block", logging.Entity(child)).
   551  						Msg("child block is not ready to be executed yet")
   552  				} else {
   553  					e.log.Debug().
   554  						Hex("executed_block", logging.Entity(executed)).
   555  						Hex("child_block", logging.Entity(child)).
   556  						Msg("child block is ready to be executed")
   557  				}
   558  			}
   559  
   560  			// remove the executed block
   561  			executionQueues.Remove(executed.ID())
   562  
   563  			return nil
   564  		})
   565  
   566  	if err != nil {
   567  		e.log.Fatal().Err(err).
   568  			Hex("block", logging.Entity(executed)).
   569  			Uint64("height", executed.Block.Header.Height).
   570  			Msg("error while requeueing blocks after execution")
   571  	}
   572  
   573  	for child, missing := range missingCollections {
   574  		err := e.addOrFetch(child.ID(), child.Block.Header.Height, missing)
   575  		if err != nil {
   576  			return fmt.Errorf("fail to add missing collections: %w", err)
   577  		}
   578  	}
   579  
   580  	return nil
   581  }
   582  
   583  // executeBlockIfComplete checks whether the block is ready to be executed.
   584  // if yes, execute the block
   585  // return a bool indicates whether the block was completed
   586  func (e *Engine) executeBlockIfComplete(eb *entity.ExecutableBlock) bool {
   587  
   588  	if eb.Executing {
   589  		return false
   590  	}
   591  
   592  	// if don't have the delta, then check if everything is ready for executing
   593  	// the block
   594  	if eb.IsComplete() {
   595  
   596  		if e.extensiveLogging {
   597  			e.logExecutableBlock(eb)
   598  		}
   599  
   600  		// no external synchronisation is used because this method must be run in a thread-safe context
   601  		eb.Executing = true
   602  
   603  		e.unit.Launch(func() {
   604  			e.executeBlock(e.unit.Ctx(), eb)
   605  		})
   606  		return true
   607  	}
   608  	return false
   609  }
   610  
   611  // OnCollection is a callback for handling the collections requested by the
   612  // collection requester.
   613  func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) {
   614  	// convert entity to strongly typed collection
   615  	collection, ok := entity.(*flow.Collection)
   616  	if !ok {
   617  		e.log.Error().Msgf("invalid entity type (%T)", entity)
   618  		return
   619  	}
   620  
   621  	// no need to validate the origin ID, since the collection requester has
   622  	// checked the origin must be a collection node.
   623  
   624  	err := e.handleCollection(originID, collection)
   625  	if err != nil {
   626  		e.log.Error().Err(err).Msg("could not handle collection")
   627  	}
   628  }
   629  
   630  // a block can't be executed if its collection is missing.
   631  // since a collection can belong to multiple blocks, we need to
   632  // find all the blocks that are needing this collection, and then
   633  // check if any of these block becomes executable and execute it if
   634  // is.
   635  func (e *Engine) handleCollection(
   636  	originID flow.Identifier,
   637  	collection *flow.Collection,
   638  ) error {
   639  	collID := collection.ID()
   640  
   641  	span, _ := e.tracer.StartCollectionSpan(context.Background(), collID, trace.EXEHandleCollection)
   642  	defer span.End()
   643  
   644  	lg := e.log.With().Hex("collection_id", collID[:]).Logger()
   645  
   646  	lg.Info().Hex("sender", originID[:]).Int("len", collection.Len()).Msg("handle collection")
   647  	defer func(startTime time.Time) {
   648  		lg.Info().TimeDiff("duration", time.Now(), startTime).Msg("collection handled")
   649  	}(time.Now())
   650  
   651  	// TODO: bail if have seen this collection before.
   652  	err := e.collections.Store(collection)
   653  	if err != nil {
   654  		return fmt.Errorf("cannot store collection: %w", err)
   655  	}
   656  
   657  	return e.mempool.BlockByCollection.Run(
   658  		func(backdata *stdmap.BlockByCollectionBackdata) error {
   659  			return e.addCollectionToMempool(collection, backdata)
   660  		},
   661  	)
   662  }
   663  
   664  func (e *Engine) addCollectionToMempool(
   665  	collection *flow.Collection,
   666  	backdata *stdmap.BlockByCollectionBackdata,
   667  ) error {
   668  	collID := collection.ID()
   669  	blockByCollectionID, exists := backdata.ByID(collID)
   670  
   671  	// if we don't find any block for this collection, then
   672  	// means we don't need this collection any more.
   673  	// or it was ejected from the mempool when it was full.
   674  	// either way, we will return
   675  	if !exists {
   676  		return nil
   677  	}
   678  
   679  	for _, executableBlock := range blockByCollectionID.ExecutableBlocks {
   680  		blockID := executableBlock.ID()
   681  
   682  		completeCollection, ok := executableBlock.CompleteCollections[collID]
   683  		if !ok {
   684  			return fmt.Errorf("cannot handle collection: internal inconsistency - collection pointing to block %v which does not contain said collection",
   685  				blockID)
   686  		}
   687  
   688  		e.metrics.UpdateCollectionMaxHeight(executableBlock.Block.Header.Height)
   689  
   690  		if completeCollection.IsCompleted() {
   691  			// already received transactions for this collection
   692  			continue
   693  		}
   694  
   695  		// update the transactions of the collection
   696  		// Note: it's guaranteed the transactions are for this collection, because
   697  		// the collection id matches with the CollectionID from the collection guarantee
   698  		completeCollection.Transactions = collection.Transactions
   699  
   700  		// check if the block becomes executable
   701  		_ = e.executeBlockIfComplete(executableBlock)
   702  	}
   703  
   704  	// since we've received this collection, remove it from the index
   705  	// this also prevents from executing the same block twice, because the second
   706  	// time when the collection arrives, it will not be found in the blockByCollectionID
   707  	// index.
   708  	backdata.Remove(collID)
   709  
   710  	return nil
   711  }
   712  
   713  func newQueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) (
   714  	*queue.Queue,
   715  	bool,
   716  ) {
   717  	q := queue.NewQueue(blockify)
   718  	qID := q.ID()
   719  	return q, queues.Add(qID, q)
   720  }
   721  
   722  // enqueue adds a block to the queues, return the queue that includes the block and booleans
   723  // * is block new one (it's not already enqueued, not a duplicate)
   724  // * is head of the queue (new queue has been created)
   725  //
   726  // Queues are chained blocks. Since a block can't be executable until its parent has been
   727  // executed, the chained structure allows us to only check the head of each queue to see if
   728  // any block becomes executable.
   729  // for instance we have one queue whose head is A:
   730  //
   731  //	A <- B <- C
   732  //	  ^- D <- E
   733  //
   734  // If we receive E <- F, then we will add it to the queue:
   735  //
   736  //	A <- B <- C
   737  //	  ^- D <- E <- F
   738  //
   739  // Even through there are 6 blocks, we only need to check if block A becomes executable.
   740  // when the parent block isn't in the queue, we add it as a new queue. for instance, if
   741  // we receive H <- G, then the queues will become:
   742  //
   743  //	A <- B <- C
   744  //	  ^- D <- E
   745  //	G
   746  func enqueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) (
   747  	*queue.Queue,
   748  	bool,
   749  	bool,
   750  ) {
   751  	for _, queue := range queues.All() {
   752  		if stored, isNew := queue.TryAdd(blockify); stored {
   753  			return queue, isNew, false
   754  		}
   755  	}
   756  	queue, isNew := newQueue(blockify, queues)
   757  	return queue, isNew, true
   758  }
   759  
   760  // check if the block's collections have been received,
   761  // if yes, add the collection to the executable block
   762  // if no, fetch the collection.
   763  // if a block has 3 collection, it would be 3 reqs to fetch them.
   764  // mark the collection belongs to the block,
   765  // mark the block contains this collection.
   766  // It returns the missing collections to be fetched
   767  // TODO: to rename
   768  func (e *Engine) matchAndFindMissingCollections(
   769  	executableBlock *entity.ExecutableBlock,
   770  	collectionsBackdata *stdmap.BlockByCollectionBackdata,
   771  ) ([]*flow.CollectionGuarantee, error) {
   772  	missingCollections := make([]*flow.CollectionGuarantee, 0, len(executableBlock.Block.Payload.Guarantees))
   773  
   774  	for _, guarantee := range executableBlock.Block.Payload.Guarantees {
   775  		coll := &entity.CompleteCollection{
   776  			Guarantee: guarantee,
   777  		}
   778  		executableBlock.CompleteCollections[guarantee.ID()] = coll
   779  
   780  		// check if we have requested this collection before.
   781  		// blocksNeedingCollection stores all the blocks that contain this collection
   782  
   783  		if blocksNeedingCollection, exists := collectionsBackdata.ByID(guarantee.ID()); exists {
   784  			// if we've requested this collection, it means other block might also contain this collection.
   785  			// in this case, add this block to the map so that when the collection is received,
   786  			// we could update the executable block
   787  			blocksNeedingCollection.ExecutableBlocks[executableBlock.ID()] = executableBlock
   788  
   789  			// since the collection is still being requested, we don't have the transactions
   790  			// yet, so exit
   791  			continue
   792  		}
   793  
   794  		// the storage doesn't have this collection, meaning this is our first time seeing this
   795  		// collection guarantee, create an entry to store in collectionsBackdata in order to
   796  		// update the executable blocks when the collection is received.
   797  		blocksNeedingCollection := &entity.BlocksByCollection{
   798  			CollectionID:     guarantee.ID(),
   799  			ExecutableBlocks: map[flow.Identifier]*entity.ExecutableBlock{executableBlock.ID(): executableBlock},
   800  		}
   801  
   802  		added := collectionsBackdata.Add(blocksNeedingCollection.ID(), blocksNeedingCollection)
   803  		if !added {
   804  			// sanity check, should not happen, unless mempool implementation has a bug
   805  			return nil, fmt.Errorf("collection already mapped to block")
   806  		}
   807  
   808  		missingCollections = append(missingCollections, guarantee)
   809  	}
   810  
   811  	return missingCollections, nil
   812  }
   813  
   814  // save the execution result of a block
   815  func (e *Engine) saveExecutionResults(
   816  	ctx context.Context,
   817  	result *execution.ComputationResult,
   818  ) error {
   819  	span, childCtx := e.tracer.StartSpanFromContext(ctx, trace.EXESaveExecutionResults)
   820  	defer span.End()
   821  
   822  	e.log.Debug().
   823  		Hex("block_id", logging.Entity(result.ExecutableBlock)).
   824  		Msg("received computation result")
   825  
   826  	for _, event := range result.ExecutionResult.ServiceEvents {
   827  		e.log.Info().
   828  			Uint64("block_height", result.ExecutableBlock.Height()).
   829  			Hex("block_id", logging.Entity(result.ExecutableBlock)).
   830  			Str("event_type", event.Type.String()).
   831  			Msg("service event emitted")
   832  	}
   833  
   834  	err := e.execState.SaveExecutionResults(childCtx, result)
   835  	if err != nil {
   836  		return fmt.Errorf("cannot persist execution state: %w", err)
   837  	}
   838  
   839  	finalEndState := result.CurrentEndState()
   840  	e.log.Debug().
   841  		Hex("block_id", logging.Entity(result.ExecutableBlock)).
   842  		Hex("start_state", result.ExecutableBlock.StartState[:]).
   843  		Hex("final_state", finalEndState[:]).
   844  		Msg("saved computation results")
   845  
   846  	return nil
   847  }
   848  
   849  // logExecutableBlock logs all data about an executable block
   850  // over time we should skip this
   851  func (e *Engine) logExecutableBlock(eb *entity.ExecutableBlock) {
   852  	// log block
   853  	e.log.Debug().
   854  		Hex("block_id", logging.Entity(eb)).
   855  		Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)).
   856  		Uint64("block_height", eb.Block.Header.Height).
   857  		Int("number_of_collections", len(eb.Collections())).
   858  		RawJSON("block_header", logging.AsJSON(eb.Block.Header)).
   859  		Msg("extensive log: block header")
   860  
   861  	// logs transactions
   862  	for i, col := range eb.Collections() {
   863  		for j, tx := range col.Transactions {
   864  			e.log.Debug().
   865  				Hex("block_id", logging.Entity(eb)).
   866  				Int("block_height", int(eb.Block.Header.Height)).
   867  				Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)).
   868  				Int("collection_index", i).
   869  				Int("tx_index", j).
   870  				Hex("collection_id", logging.ID(col.Guarantee.CollectionID)).
   871  				Hex("tx_hash", logging.Entity(tx)).
   872  				Hex("start_state_commitment", eb.StartState[:]).
   873  				RawJSON("transaction", logging.AsJSON(tx)).
   874  				Msg("extensive log: executed tx content")
   875  		}
   876  	}
   877  }
   878  
   879  // addOrFetch checks if there are stored collections for the given guarantees, if there is,
   880  // forward them to mempool to process the collection, otherwise fetch the collections.
   881  // any error returned are exception
   882  func (e *Engine) addOrFetch(
   883  	blockID flow.Identifier,
   884  	height uint64,
   885  	guarantees []*flow.CollectionGuarantee,
   886  ) error {
   887  	return e.fetchAndHandleCollection(blockID, height, guarantees, func(collection *flow.Collection) error {
   888  		err := e.mempool.BlockByCollection.Run(
   889  			func(backdata *stdmap.BlockByCollectionBackdata) error {
   890  				return e.addCollectionToMempool(collection, backdata)
   891  			})
   892  
   893  		if err != nil {
   894  			return fmt.Errorf("could not add collection to mempool: %w", err)
   895  		}
   896  		return nil
   897  	})
   898  }
   899  
   900  // addOrFetch checks if there are stored collections for the given guarantees, if there is,
   901  // forward them to the handler to process the collection, otherwise fetch the collections.
   902  // any error returned are exception
   903  func (e *Engine) fetchAndHandleCollection(
   904  	blockID flow.Identifier,
   905  	height uint64,
   906  	guarantees []*flow.CollectionGuarantee,
   907  	handleCollection func(*flow.Collection) error,
   908  ) error {
   909  	fetched := false
   910  	for _, guarantee := range guarantees {
   911  		// if we've requested this collection, we will store it in the storage,
   912  		// so check the storage to see whether we've seen it.
   913  		collection, err := e.collections.ByID(guarantee.CollectionID)
   914  
   915  		if err == nil {
   916  			// we found the collection from storage, forward this collection to handler
   917  			err = handleCollection(collection)
   918  			if err != nil {
   919  				return fmt.Errorf("could not handle collection: %w", err)
   920  			}
   921  
   922  			continue
   923  		}
   924  
   925  		// check if there was exception
   926  		if !errors.Is(err, storage.ErrNotFound) {
   927  			return fmt.Errorf("error while querying for collection: %w", err)
   928  		}
   929  
   930  		err = e.collectionFetcher.FetchCollection(blockID, height, guarantee)
   931  		if err != nil {
   932  			return fmt.Errorf("could not fetch collection: %w", err)
   933  		}
   934  		fetched = true
   935  	}
   936  
   937  	// make sure that the requests are dispatched immediately by the requester
   938  	if fetched {
   939  		e.collectionFetcher.Force()
   940  		e.metrics.ExecutionCollectionRequestSent()
   941  	}
   942  
   943  	return nil
   944  }
   945  
   946  func logQueueState(log zerolog.Logger, queues *stdmap.QueuesBackdata, blockID flow.Identifier) {
   947  	all := queues.All()
   948  
   949  	log.With().Hex("queue_state__executed_block_id", blockID[:]).Int("count", len(all)).Logger()
   950  	for i, queue := range all {
   951  		log.Error().Msgf("%v-th queue state: %v", i, queue.String())
   952  	}
   953  }