github.com/onflow/flow-go@v0.33.17/engine/execution/ingestion/engine.go (about)

     1  package ingestion
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/rs/zerolog"
    11  	"github.com/rs/zerolog/log"
    12  
    13  	"github.com/onflow/flow-go/engine"
    14  	"github.com/onflow/flow-go/engine/execution"
    15  	"github.com/onflow/flow-go/engine/execution/computation"
    16  	"github.com/onflow/flow-go/engine/execution/ingestion/stop"
    17  	"github.com/onflow/flow-go/engine/execution/ingestion/uploader"
    18  	"github.com/onflow/flow-go/engine/execution/provider"
    19  	"github.com/onflow/flow-go/engine/execution/state"
    20  	"github.com/onflow/flow-go/model/flow"
    21  	"github.com/onflow/flow-go/module"
    22  	"github.com/onflow/flow-go/module/executiondatasync/pruner"
    23  	"github.com/onflow/flow-go/module/mempool/entity"
    24  	"github.com/onflow/flow-go/module/mempool/queue"
    25  	"github.com/onflow/flow-go/module/mempool/stdmap"
    26  	"github.com/onflow/flow-go/module/trace"
    27  	"github.com/onflow/flow-go/network"
    28  	"github.com/onflow/flow-go/network/channels"
    29  	psEvents "github.com/onflow/flow-go/state/protocol/events"
    30  	"github.com/onflow/flow-go/storage"
    31  	"github.com/onflow/flow-go/utils/logging"
    32  )
    33  
    34  // An Engine receives and saves incoming blocks.
    35  type Engine struct {
    36  	psEvents.Noop // satisfy protocol events consumer interface
    37  
    38  	unit                *engine.Unit
    39  	log                 zerolog.Logger
    40  	me                  module.Local
    41  	collectionFetcher   CollectionFetcher
    42  	headers             storage.Headers // see comments on getHeaderByHeight for why we need it
    43  	blocks              storage.Blocks
    44  	collections         storage.Collections
    45  	computationManager  computation.ComputationManager
    46  	providerEngine      provider.ProviderEngine
    47  	mempool             *Mempool
    48  	execState           state.ExecutionState
    49  	metrics             module.ExecutionMetrics
    50  	maxCollectionHeight uint64
    51  	tracer              module.Tracer
    52  	extensiveLogging    bool
    53  	executionDataPruner *pruner.Pruner
    54  	uploader            *uploader.Manager
    55  	stopControl         *stop.StopControl
    56  	loader              BlockLoader
    57  }
    58  
    59  func New(
    60  	unit *engine.Unit,
    61  	logger zerolog.Logger,
    62  	net network.EngineRegistry,
    63  	me module.Local,
    64  	collectionFetcher CollectionFetcher,
    65  	headers storage.Headers,
    66  	blocks storage.Blocks,
    67  	collections storage.Collections,
    68  	executionEngine computation.ComputationManager,
    69  	providerEngine provider.ProviderEngine,
    70  	execState state.ExecutionState,
    71  	metrics module.ExecutionMetrics,
    72  	tracer module.Tracer,
    73  	extLog bool,
    74  	pruner *pruner.Pruner,
    75  	uploader *uploader.Manager,
    76  	stopControl *stop.StopControl,
    77  	loader BlockLoader,
    78  ) (*Engine, error) {
    79  	log := logger.With().Str("engine", "ingestion").Logger()
    80  
    81  	mempool := newMempool()
    82  
    83  	eng := Engine{
    84  		unit:                unit,
    85  		log:                 log,
    86  		me:                  me,
    87  		collectionFetcher:   collectionFetcher,
    88  		headers:             headers,
    89  		blocks:              blocks,
    90  		collections:         collections,
    91  		computationManager:  executionEngine,
    92  		providerEngine:      providerEngine,
    93  		mempool:             mempool,
    94  		execState:           execState,
    95  		metrics:             metrics,
    96  		maxCollectionHeight: 0,
    97  		tracer:              tracer,
    98  		extensiveLogging:    extLog,
    99  		executionDataPruner: pruner,
   100  		uploader:            uploader,
   101  		stopControl:         stopControl,
   102  		loader:              loader,
   103  	}
   104  
   105  	return &eng, nil
   106  }
   107  
   108  // Ready returns a channel that will close when the engine has
   109  // successfully started.
   110  func (e *Engine) Ready() <-chan struct{} {
   111  	if e.stopControl.IsExecutionStopped() {
   112  		return e.unit.Ready()
   113  	}
   114  
   115  	if err := e.uploader.RetryUploads(); err != nil {
   116  		e.log.Warn().Msg("failed to re-upload all ComputationResults")
   117  	}
   118  
   119  	err := e.reloadUnexecutedBlocks()
   120  	if err != nil {
   121  		e.log.Fatal().Err(err).Msg("failed to load all unexecuted blocks")
   122  	}
   123  
   124  	return e.unit.Ready()
   125  }
   126  
   127  // Done returns a channel that will close when the engine has
   128  // successfully stopped.
   129  func (e *Engine) Done() <-chan struct{} {
   130  	return e.unit.Done()
   131  }
   132  
   133  // SubmitLocal submits an event originating on the local node.
   134  func (e *Engine) SubmitLocal(event interface{}) {
   135  	e.unit.Launch(func() {
   136  		err := e.process(e.me.NodeID(), event)
   137  		if err != nil {
   138  			engine.LogError(e.log, err)
   139  		}
   140  	})
   141  }
   142  
   143  // Submit submits the given event from the node with the given origin ID
   144  // for processing in a non-blocking manner. It returns instantly and logs
   145  // a potential processing error internally when done.
   146  func (e *Engine) Submit(
   147  	channel channels.Channel,
   148  	originID flow.Identifier,
   149  	event interface{},
   150  ) {
   151  	e.unit.Launch(func() {
   152  		err := e.process(originID, event)
   153  		if err != nil {
   154  			engine.LogError(e.log, err)
   155  		}
   156  	})
   157  }
   158  
   159  // ProcessLocal processes an event originating on the local node.
   160  func (e *Engine) ProcessLocal(event interface{}) error {
   161  	return fmt.Errorf("ingestion error does not process local events")
   162  }
   163  
   164  func (e *Engine) Process(
   165  	channel channels.Channel,
   166  	originID flow.Identifier,
   167  	event interface{},
   168  ) error {
   169  	return e.unit.Do(func() error {
   170  		return e.process(originID, event)
   171  	})
   172  }
   173  
   174  func (e *Engine) process(originID flow.Identifier, event interface{}) error {
   175  	return nil
   176  }
   177  
   178  // on nodes startup, we need to load all the unexecuted blocks to the execution queues.
   179  // blocks have to be loaded in the way that the parent has been loaded before loading its children
   180  func (e *Engine) reloadUnexecutedBlocks() error {
   181  	unexecuted, err := e.loader.LoadUnexecuted(e.unit.Ctx())
   182  	if err != nil {
   183  		return fmt.Errorf("could not load unexecuted blocks: %w", err)
   184  	}
   185  	// it's possible the BlockProcessable is called during the reloading, as the follower engine
   186  	// will receive blocks before ingestion engine is ready.
   187  	// The problem with that is, since the reloading hasn't finished yet, enqueuing the new block from
   188  	// the BlockProcessable callback will fail, because its parent block might have not been reloaded
   189  	// to the queues yet.
   190  	// So one solution here is to lock the execution queues during reloading, so that if BlockProcessable
   191  	// is called before reloading is finished, it will be blocked, which will avoid that edge case.
   192  	return e.mempool.Run(func(
   193  		blockByCollection *stdmap.BlockByCollectionBackdata,
   194  		executionQueues *stdmap.QueuesBackdata,
   195  	) error {
   196  		for _, blockID := range unexecuted {
   197  			err := e.reloadBlock(blockByCollection, executionQueues, blockID)
   198  			if err != nil {
   199  				return fmt.Errorf("could not reload block: %v, %w", blockID, err)
   200  			}
   201  
   202  			e.log.Debug().Hex("block_id", blockID[:]).Msg("reloaded block")
   203  		}
   204  
   205  		e.log.Info().Int("count", len(unexecuted)).Msg("all unexecuted have been successfully reloaded")
   206  
   207  		return nil
   208  	})
   209  }
   210  
   211  func (e *Engine) reloadBlock(
   212  	blockByCollection *stdmap.BlockByCollectionBackdata,
   213  	executionQueues *stdmap.QueuesBackdata,
   214  	blockID flow.Identifier,
   215  ) error {
   216  	block, err := e.blocks.ByID(blockID)
   217  	if err != nil {
   218  		return fmt.Errorf("could not get block by ID: %v %w", blockID, err)
   219  	}
   220  
   221  	// enqueue the block and check if there is any missing collections
   222  	missingCollections, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false)
   223  
   224  	if err != nil {
   225  		return fmt.Errorf("could not enqueue block %x on reloading: %w", blockID, err)
   226  	}
   227  
   228  	// forward the missing collections to requester engine for requesting them from collection nodes,
   229  	// adding the missing collections to mempool in order to trigger the block execution as soon as
   230  	// all missing collections are received.
   231  	err = e.fetchAndHandleCollection(blockID, block.Header.Height, missingCollections, func(collection *flow.Collection) error {
   232  		err := e.addCollectionToMempool(collection, blockByCollection)
   233  
   234  		if err != nil {
   235  			return fmt.Errorf("could not add collection to mempool: %w", err)
   236  		}
   237  		return nil
   238  	})
   239  
   240  	if err != nil {
   241  		return fmt.Errorf("could not fetch or handle collection %w", err)
   242  	}
   243  	return nil
   244  }
   245  
   246  // BlockProcessable handles the new verified blocks (blocks that
   247  // have passed consensus validation) received from the consensus nodes
   248  // NOTE: BlockProcessable might be called multiple times for the same block.
   249  // NOTE: Ready calls reloadUnexecutedBlocks during initialization, which handles dropped protocol events.
   250  func (e *Engine) BlockProcessable(b *flow.Header, _ *flow.QuorumCertificate) {
   251  
   252  	// TODO: this should not be blocking: https://github.com/onflow/flow-go/issues/4400
   253  
   254  	// skip if stopControl tells to skip
   255  	if !e.stopControl.ShouldExecuteBlock(b) {
   256  		return
   257  	}
   258  
   259  	blockID := b.ID()
   260  	newBlock, err := e.blocks.ByID(blockID)
   261  	if err != nil {
   262  		e.log.Fatal().Err(err).Msgf("could not get incorporated block(%v): %v", blockID, err)
   263  	}
   264  
   265  	e.log.Info().Hex("block_id", blockID[:]).
   266  		Uint64("height", b.Height).
   267  		Msg("handling new block")
   268  
   269  	err = e.handleBlock(e.unit.Ctx(), newBlock)
   270  	if err != nil {
   271  		e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to handle block")
   272  	}
   273  }
   274  
   275  // Main handling
   276  
   277  // handle block will process the incoming block.
   278  // the block has passed the consensus validation.
   279  func (e *Engine) handleBlock(ctx context.Context, block *flow.Block) error {
   280  
   281  	blockID := block.ID()
   282  	log := e.log.With().Hex("block_id", blockID[:]).Logger()
   283  
   284  	span, _ := e.tracer.StartBlockSpan(ctx, blockID, trace.EXEHandleBlock)
   285  	defer span.End()
   286  
   287  	executed, err := e.execState.IsBlockExecuted(block.Header.Height, blockID)
   288  	if err != nil {
   289  		return fmt.Errorf("could not check whether block is executed: %w", err)
   290  	}
   291  
   292  	if executed {
   293  		log.Debug().Msg("block has been executed already")
   294  		return nil
   295  	}
   296  
   297  	var missingCollections []*flow.CollectionGuarantee
   298  	// unexecuted block
   299  	// acquiring the lock so that there is only one process modifying the queue
   300  	err = e.mempool.Run(func(
   301  		blockByCollection *stdmap.BlockByCollectionBackdata,
   302  		executionQueues *stdmap.QueuesBackdata,
   303  	) error {
   304  		missing, err := e.enqueueBlockAndCheckExecutable(blockByCollection, executionQueues, block, false)
   305  		if err != nil {
   306  			return err
   307  		}
   308  		missingCollections = missing
   309  		return nil
   310  	})
   311  
   312  	if err != nil {
   313  		return fmt.Errorf("could not enqueue block %v: %w", blockID, err)
   314  	}
   315  
   316  	return e.addOrFetch(blockID, block.Header.Height, missingCollections)
   317  }
   318  
   319  func (e *Engine) enqueueBlockAndCheckExecutable(
   320  	blockByCollection *stdmap.BlockByCollectionBackdata,
   321  	executionQueues *stdmap.QueuesBackdata,
   322  	block *flow.Block,
   323  	checkStateSync bool,
   324  ) ([]*flow.CollectionGuarantee, error) {
   325  	executableBlock := &entity.ExecutableBlock{
   326  		Block:               block,
   327  		CompleteCollections: make(map[flow.Identifier]*entity.CompleteCollection),
   328  	}
   329  
   330  	blockID := executableBlock.ID()
   331  
   332  	lg := e.log.With().
   333  		Hex("block_id", blockID[:]).
   334  		Uint64("block_height", executableBlock.Block.Header.Height).
   335  		Logger()
   336  
   337  	// adding the block to the queue,
   338  	queue, added, head := enqueue(executableBlock, executionQueues)
   339  
   340  	// if it's not added, it means the block is not a new block, it already
   341  	// exists in the queue, then bail
   342  	if !added {
   343  		log.Debug().Hex("block_id", logging.Entity(executableBlock)).
   344  			Int("block_height", int(executableBlock.Height())).
   345  			Msg("block already exists in the execution queue")
   346  		return nil, nil
   347  	}
   348  
   349  	firstUnexecutedHeight := queue.Head.Item.Height()
   350  
   351  	// check if a block is executable.
   352  	// a block is executable if the following conditions are all true
   353  	// 1) the parent state commitment is ready
   354  	// 2) the collections for the block payload are ready
   355  	// 3) the child block is ready for querying the randomness
   356  
   357  	// check if the block's parent has been executed. (we can't execute the block if the parent has
   358  	// not been executed yet)
   359  	// check if there is a statecommitment for the parent block
   360  	parentCommitment, err := e.execState.StateCommitmentByBlockID(block.Header.ParentID)
   361  
   362  	// if we found the statecommitment for the parent block, then add it to the executable block.
   363  	if err == nil {
   364  		executableBlock.StartState = &parentCommitment
   365  	} else if errors.Is(err, storage.ErrNotFound) {
   366  		// the parent block is an unexecuted block.
   367  		// if the queue only has one block, and its parent doesn't
   368  		// exist in the queue, then we need to load the block from the storage.
   369  		_, ok := queue.Nodes[blockID]
   370  		if !ok {
   371  			lg.Error().Msgf("an unexecuted parent block is missing in the queue")
   372  		}
   373  	} else {
   374  		// if there is exception, then crash
   375  		lg.Fatal().Err(err).Msg("unexpected error while accessing storage, shutting down")
   376  	}
   377  
   378  	// check if we have all the collections for the block, and request them if there is missing.
   379  	missingCollections, err := e.matchAndFindMissingCollections(executableBlock, blockByCollection)
   380  	if err != nil {
   381  		return nil, fmt.Errorf("cannot send collection requests: %w", err)
   382  	}
   383  
   384  	complete := false
   385  
   386  	// if newly enqueued block is inside any existing queue, we should skip now and wait
   387  	// for parent to finish execution
   388  	if head {
   389  		// execute the block if the block is ready to be executed
   390  		complete = e.executeBlockIfComplete(executableBlock)
   391  	}
   392  
   393  	lg.Info().
   394  		// if the execution is halt, but the queue keeps growing, we could check which block
   395  		// hasn't been executed.
   396  		Uint64("first_unexecuted_in_queue", firstUnexecutedHeight).
   397  		Bool("complete", complete).
   398  		Bool("head_of_queue", head).
   399  		Int("cols", len(executableBlock.Block.Payload.Guarantees)).
   400  		Int("missing_cols", len(missingCollections)).
   401  		Msg("block is enqueued")
   402  
   403  	return missingCollections, nil
   404  }
   405  
   406  // executeBlock will execute the block.
   407  // When finish executing, it will check if the children becomes executable and execute them if yes.
   408  func (e *Engine) executeBlock(
   409  	ctx context.Context,
   410  	executableBlock *entity.ExecutableBlock,
   411  ) {
   412  	lg := e.log.With().
   413  		Hex("block_id", logging.Entity(executableBlock)).
   414  		Uint64("height", executableBlock.Block.Header.Height).
   415  		Int("collections", len(executableBlock.CompleteCollections)).
   416  		Logger()
   417  
   418  	lg.Info().Msg("executing block")
   419  
   420  	startedAt := time.Now()
   421  
   422  	span, ctx := e.tracer.StartSpanFromContext(ctx, trace.EXEExecuteBlock)
   423  	defer span.End()
   424  
   425  	parentID := executableBlock.Block.Header.ParentID
   426  	parentErID, err := e.execState.GetExecutionResultID(ctx, parentID)
   427  	if err != nil {
   428  		lg.Err(err).
   429  			Str("parentID", parentID.String()).
   430  			Msg("could not get execution result ID for parent block")
   431  		return
   432  	}
   433  
   434  	snapshot := e.execState.NewStorageSnapshot(*executableBlock.StartState,
   435  		executableBlock.Block.Header.ParentID,
   436  		executableBlock.Block.Header.Height-1,
   437  	)
   438  
   439  	computationResult, err := e.computationManager.ComputeBlock(
   440  		ctx,
   441  		parentErID,
   442  		executableBlock,
   443  		snapshot)
   444  	if err != nil {
   445  		lg.Err(err).Msg("error while computing block")
   446  		return
   447  	}
   448  
   449  	wg := sync.WaitGroup{}
   450  	wg.Add(1)
   451  	defer wg.Wait()
   452  
   453  	go func() {
   454  		defer wg.Done()
   455  		err := e.uploader.Upload(ctx, computationResult)
   456  		if err != nil {
   457  			lg.Err(err).Msg("error while uploading block")
   458  			// continue processing. uploads should not block execution
   459  		}
   460  	}()
   461  
   462  	err = e.saveExecutionResults(ctx, computationResult)
   463  	if errors.Is(err, storage.ErrDataMismatch) {
   464  		lg.Fatal().Err(err).Msg("fatal: trying to store different results for the same block")
   465  	}
   466  
   467  	if err != nil {
   468  		lg.Err(err).Msg("error while handing computation results")
   469  		return
   470  	}
   471  
   472  	receipt := computationResult.ExecutionReceipt
   473  	broadcasted, err := e.providerEngine.BroadcastExecutionReceipt(
   474  		ctx, executableBlock.Block.Header.Height, receipt)
   475  	if err != nil {
   476  		lg.Err(err).Msg("critical: failed to broadcast the receipt")
   477  	}
   478  
   479  	finalEndState := computationResult.CurrentEndState()
   480  	lg.Info().
   481  		Hex("parent_block", executableBlock.Block.Header.ParentID[:]).
   482  		Int("collections", len(executableBlock.Block.Payload.Guarantees)).
   483  		Hex("start_state", executableBlock.StartState[:]).
   484  		Hex("final_state", finalEndState[:]).
   485  		Hex("receipt_id", logging.Entity(receipt)).
   486  		Hex("result_id", logging.Entity(receipt.ExecutionResult)).
   487  		Hex("execution_data_id", receipt.ExecutionResult.ExecutionDataID[:]).
   488  		Bool("state_changed", finalEndState != *executableBlock.StartState).
   489  		Uint64("num_txs", nonSystemTransactionCount(receipt.ExecutionResult)).
   490  		Bool("broadcasted", broadcasted).
   491  		Int64("timeSpentInMS", time.Since(startedAt).Milliseconds()).
   492  		Msg("block executed")
   493  
   494  	err = e.onBlockExecuted(executableBlock, finalEndState)
   495  	if err != nil {
   496  		lg.Err(err).Msg("failed in process block's children")
   497  	}
   498  
   499  	if e.executionDataPruner != nil {
   500  		e.executionDataPruner.NotifyFulfilledHeight(executableBlock.Height())
   501  	}
   502  
   503  	e.stopControl.OnBlockExecuted(executableBlock.Block.Header)
   504  
   505  	e.unit.Ctx()
   506  
   507  }
   508  
   509  func nonSystemTransactionCount(result flow.ExecutionResult) uint64 {
   510  	count := uint64(0)
   511  	for _, chunk := range result.Chunks {
   512  		count += chunk.NumberOfTransactions
   513  	}
   514  	return count
   515  }
   516  
   517  // we've executed the block, now we need to check:
   518  // 1. whether the state syncing can be turned off
   519  // 2. whether its children can be executed
   520  //   the executionQueues stores blocks as a tree:
   521  //
   522  //   10 <- 11 <- 12
   523  //   	 ^-- 13
   524  //   14 <- 15 <- 16
   525  //
   526  //   if block 10 is the one just executed, then we will remove it from the queue, and add
   527  //   its children back, meaning the tree will become:
   528  //
   529  //   11 <- 12
   530  //   13
   531  //   14 <- 15 <- 16
   532  
   533  func (e *Engine) onBlockExecuted(
   534  	executed *entity.ExecutableBlock,
   535  	finalState flow.StateCommitment,
   536  ) error {
   537  
   538  	e.metrics.ExecutionStorageStateCommitment(int64(len(finalState)))
   539  	e.metrics.ExecutionLastExecutedBlockHeight(executed.Block.Header.Height)
   540  
   541  	missingCollections := make(map[*entity.ExecutableBlock][]*flow.CollectionGuarantee)
   542  	err := e.mempool.Run(
   543  		func(
   544  			blockByCollection *stdmap.BlockByCollectionBackdata,
   545  			executionQueues *stdmap.QueuesBackdata,
   546  		) error {
   547  			// find the block that was just executed
   548  			executionQueue, exists := executionQueues.ByID(executed.ID())
   549  			if !exists {
   550  				logQueueState(e.log, executionQueues, executed.ID())
   551  				// when the block no longer exists in the queue, it means there was a race condition that
   552  				// two onBlockExecuted was called for the same block, and one process has already removed the
   553  				// block from the queue, so we will print an error here
   554  				return fmt.Errorf("block has been executed already, no longer exists in the queue")
   555  			}
   556  
   557  			// dismount the executed block and all its children
   558  			_, newQueues := executionQueue.Dismount()
   559  
   560  			// go through each children, add them back to the queue, and check
   561  			// if the children is executable
   562  			for _, queue := range newQueues {
   563  				queueID := queue.ID()
   564  				added := executionQueues.Add(queueID, queue)
   565  				if !added {
   566  					// blocks should be unique in execution queues, if we dismount all the children blocks, then
   567  					// add it back to the queues, then it should always be able to add.
   568  					// If not, then there is a bug that the queues have duplicated blocks
   569  					return fmt.Errorf("fatal error - child block already in execution queue")
   570  				}
   571  
   572  				// the parent block has been executed, update the StartState of
   573  				// each child block.
   574  				child := queue.Head.Item.(*entity.ExecutableBlock)
   575  				child.StartState = &finalState
   576  
   577  				missing, err := e.matchAndFindMissingCollections(child, blockByCollection)
   578  				if err != nil {
   579  					return fmt.Errorf("cannot send collection requests: %w", err)
   580  				}
   581  				if len(missing) > 0 {
   582  					missingCollections[child] = append(missingCollections[child], missing...)
   583  				}
   584  
   585  				completed := e.executeBlockIfComplete(child)
   586  				if !completed {
   587  					e.log.Debug().
   588  						Hex("executed_block", logging.Entity(executed)).
   589  						Hex("child_block", logging.Entity(child)).
   590  						Msg("child block is not ready to be executed yet")
   591  				} else {
   592  					e.log.Debug().
   593  						Hex("executed_block", logging.Entity(executed)).
   594  						Hex("child_block", logging.Entity(child)).
   595  						Msg("child block is ready to be executed")
   596  				}
   597  			}
   598  
   599  			// remove the executed block
   600  			executionQueues.Remove(executed.ID())
   601  
   602  			return nil
   603  		})
   604  
   605  	if err != nil {
   606  		e.log.Fatal().Err(err).
   607  			Hex("block", logging.Entity(executed)).
   608  			Uint64("height", executed.Block.Header.Height).
   609  			Msg("error while requeueing blocks after execution")
   610  	}
   611  
   612  	for child, missing := range missingCollections {
   613  		err := e.addOrFetch(child.ID(), child.Block.Header.Height, missing)
   614  		if err != nil {
   615  			return fmt.Errorf("fail to add missing collections: %w", err)
   616  		}
   617  	}
   618  
   619  	return nil
   620  }
   621  
   622  // executeBlockIfComplete checks whether the block is ready to be executed.
   623  // if yes, execute the block
   624  // return a bool indicates whether the block was completed
   625  func (e *Engine) executeBlockIfComplete(eb *entity.ExecutableBlock) bool {
   626  
   627  	if eb.Executing {
   628  		return false
   629  	}
   630  
   631  	// if don't have the delta, then check if everything is ready for executing
   632  	// the block
   633  	if eb.IsComplete() {
   634  
   635  		if e.extensiveLogging {
   636  			e.logExecutableBlock(eb)
   637  		}
   638  
   639  		// no external synchronisation is used because this method must be run in a thread-safe context
   640  		eb.Executing = true
   641  
   642  		e.unit.Launch(func() {
   643  			e.executeBlock(e.unit.Ctx(), eb)
   644  		})
   645  		return true
   646  	}
   647  	return false
   648  }
   649  
   650  // OnCollection is a callback for handling the collections requested by the
   651  // collection requester.
   652  func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) {
   653  	// convert entity to strongly typed collection
   654  	collection, ok := entity.(*flow.Collection)
   655  	if !ok {
   656  		e.log.Error().Msgf("invalid entity type (%T)", entity)
   657  		return
   658  	}
   659  
   660  	// no need to validate the origin ID, since the collection requester has
   661  	// checked the origin must be a collection node.
   662  
   663  	err := e.handleCollection(originID, collection)
   664  	if err != nil {
   665  		e.log.Error().Err(err).Msg("could not handle collection")
   666  	}
   667  }
   668  
   669  // a block can't be executed if its collection is missing.
   670  // since a collection can belong to multiple blocks, we need to
   671  // find all the blocks that are needing this collection, and then
   672  // check if any of these block becomes executable and execute it if
   673  // is.
   674  func (e *Engine) handleCollection(
   675  	originID flow.Identifier,
   676  	collection *flow.Collection,
   677  ) error {
   678  	collID := collection.ID()
   679  
   680  	span, _ := e.tracer.StartCollectionSpan(context.Background(), collID, trace.EXEHandleCollection)
   681  	defer span.End()
   682  
   683  	lg := e.log.With().Hex("collection_id", collID[:]).Logger()
   684  
   685  	lg.Info().Hex("sender", originID[:]).Int("len", collection.Len()).Msg("handle collection")
   686  	defer func(startTime time.Time) {
   687  		lg.Info().TimeDiff("duration", time.Now(), startTime).Msg("collection handled")
   688  	}(time.Now())
   689  
   690  	// TODO: bail if have seen this collection before.
   691  	err := e.collections.Store(collection)
   692  	if err != nil {
   693  		return fmt.Errorf("cannot store collection: %w", err)
   694  	}
   695  
   696  	return e.mempool.BlockByCollection.Run(
   697  		func(backdata *stdmap.BlockByCollectionBackdata) error {
   698  			return e.addCollectionToMempool(collection, backdata)
   699  		},
   700  	)
   701  }
   702  
   703  func (e *Engine) addCollectionToMempool(
   704  	collection *flow.Collection,
   705  	backdata *stdmap.BlockByCollectionBackdata,
   706  ) error {
   707  	collID := collection.ID()
   708  	blockByCollectionID, exists := backdata.ByID(collID)
   709  
   710  	// if we don't find any block for this collection, then
   711  	// means we don't need this collection any more.
   712  	// or it was ejected from the mempool when it was full.
   713  	// either way, we will return
   714  	if !exists {
   715  		return nil
   716  	}
   717  
   718  	for _, executableBlock := range blockByCollectionID.ExecutableBlocks {
   719  		blockID := executableBlock.ID()
   720  
   721  		completeCollection, ok := executableBlock.CompleteCollections[collID]
   722  		if !ok {
   723  			return fmt.Errorf("cannot handle collection: internal inconsistency - collection pointing to block %v which does not contain said collection",
   724  				blockID)
   725  		}
   726  
   727  		// record collection max height metrics
   728  		blockHeight := executableBlock.Block.Header.Height
   729  		if blockHeight > e.maxCollectionHeight {
   730  			e.metrics.UpdateCollectionMaxHeight(blockHeight)
   731  			e.maxCollectionHeight = blockHeight
   732  		}
   733  
   734  		if completeCollection.IsCompleted() {
   735  			// already received transactions for this collection
   736  			continue
   737  		}
   738  
   739  		// update the transactions of the collection
   740  		// Note: it's guaranteed the transactions are for this collection, because
   741  		// the collection id matches with the CollectionID from the collection guarantee
   742  		completeCollection.Transactions = collection.Transactions
   743  
   744  		// check if the block becomes executable
   745  		_ = e.executeBlockIfComplete(executableBlock)
   746  	}
   747  
   748  	// since we've received this collection, remove it from the index
   749  	// this also prevents from executing the same block twice, because the second
   750  	// time when the collection arrives, it will not be found in the blockByCollectionID
   751  	// index.
   752  	backdata.Remove(collID)
   753  
   754  	return nil
   755  }
   756  
   757  func newQueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) (
   758  	*queue.Queue,
   759  	bool,
   760  ) {
   761  	q := queue.NewQueue(blockify)
   762  	qID := q.ID()
   763  	return q, queues.Add(qID, q)
   764  }
   765  
   766  // enqueue adds a block to the queues, return the queue that includes the block and booleans
   767  // * is block new one (it's not already enqueued, not a duplicate)
   768  // * is head of the queue (new queue has been created)
   769  //
   770  // Queues are chained blocks. Since a block can't be executable until its parent has been
   771  // executed, the chained structure allows us to only check the head of each queue to see if
   772  // any block becomes executable.
   773  // for instance we have one queue whose head is A:
   774  //
   775  //	A <- B <- C
   776  //	  ^- D <- E
   777  //
   778  // If we receive E <- F, then we will add it to the queue:
   779  //
   780  //	A <- B <- C
   781  //	  ^- D <- E <- F
   782  //
   783  // Even through there are 6 blocks, we only need to check if block A becomes executable.
   784  // when the parent block isn't in the queue, we add it as a new queue. for instance, if
   785  // we receive H <- G, then the queues will become:
   786  //
   787  //	A <- B <- C
   788  //	  ^- D <- E
   789  //	G
   790  func enqueue(blockify queue.Blockify, queues *stdmap.QueuesBackdata) (
   791  	*queue.Queue,
   792  	bool,
   793  	bool,
   794  ) {
   795  	for _, queue := range queues.All() {
   796  		if stored, isNew := queue.TryAdd(blockify); stored {
   797  			return queue, isNew, false
   798  		}
   799  	}
   800  	queue, isNew := newQueue(blockify, queues)
   801  	return queue, isNew, true
   802  }
   803  
   804  // check if the block's collections have been received,
   805  // if yes, add the collection to the executable block
   806  // if no, fetch the collection.
   807  // if a block has 3 collection, it would be 3 reqs to fetch them.
   808  // mark the collection belongs to the block,
   809  // mark the block contains this collection.
   810  // It returns the missing collections to be fetched
   811  // TODO: to rename
   812  func (e *Engine) matchAndFindMissingCollections(
   813  	executableBlock *entity.ExecutableBlock,
   814  	collectionsBackdata *stdmap.BlockByCollectionBackdata,
   815  ) ([]*flow.CollectionGuarantee, error) {
   816  	missingCollections := make([]*flow.CollectionGuarantee, 0, len(executableBlock.Block.Payload.Guarantees))
   817  
   818  	for _, guarantee := range executableBlock.Block.Payload.Guarantees {
   819  		coll := &entity.CompleteCollection{
   820  			Guarantee: guarantee,
   821  		}
   822  		executableBlock.CompleteCollections[guarantee.ID()] = coll
   823  
   824  		// check if we have requested this collection before.
   825  		// blocksNeedingCollection stores all the blocks that contain this collection
   826  
   827  		if blocksNeedingCollection, exists := collectionsBackdata.ByID(guarantee.ID()); exists {
   828  			// if we've requested this collection, it means other block might also contain this collection.
   829  			// in this case, add this block to the map so that when the collection is received,
   830  			// we could update the executable block
   831  			blocksNeedingCollection.ExecutableBlocks[executableBlock.ID()] = executableBlock
   832  
   833  			// since the collection is still being requested, we don't have the transactions
   834  			// yet, so exit
   835  			continue
   836  		}
   837  
   838  		// the storage doesn't have this collection, meaning this is our first time seeing this
   839  		// collection guarantee, create an entry to store in collectionsBackdata in order to
   840  		// update the executable blocks when the collection is received.
   841  		blocksNeedingCollection := &entity.BlocksByCollection{
   842  			CollectionID:     guarantee.ID(),
   843  			ExecutableBlocks: map[flow.Identifier]*entity.ExecutableBlock{executableBlock.ID(): executableBlock},
   844  		}
   845  
   846  		added := collectionsBackdata.Add(blocksNeedingCollection.ID(), blocksNeedingCollection)
   847  		if !added {
   848  			// sanity check, should not happen, unless mempool implementation has a bug
   849  			return nil, fmt.Errorf("collection already mapped to block")
   850  		}
   851  
   852  		missingCollections = append(missingCollections, guarantee)
   853  	}
   854  
   855  	return missingCollections, nil
   856  }
   857  
   858  // save the execution result of a block
   859  func (e *Engine) saveExecutionResults(
   860  	ctx context.Context,
   861  	result *execution.ComputationResult,
   862  ) error {
   863  	span, childCtx := e.tracer.StartSpanFromContext(ctx, trace.EXESaveExecutionResults)
   864  	defer span.End()
   865  
   866  	e.log.Debug().
   867  		Hex("block_id", logging.Entity(result.ExecutableBlock)).
   868  		Msg("received computation result")
   869  
   870  	for _, event := range result.ExecutionResult.ServiceEvents {
   871  		e.log.Info().
   872  			Uint64("block_height", result.ExecutableBlock.Height()).
   873  			Hex("block_id", logging.Entity(result.ExecutableBlock)).
   874  			Str("event_type", event.Type.String()).
   875  			Msg("service event emitted")
   876  	}
   877  
   878  	err := e.execState.SaveExecutionResults(childCtx, result)
   879  	if err != nil {
   880  		return fmt.Errorf("cannot persist execution state: %w", err)
   881  	}
   882  
   883  	finalEndState := result.CurrentEndState()
   884  	e.log.Debug().
   885  		Hex("block_id", logging.Entity(result.ExecutableBlock)).
   886  		Hex("start_state", result.ExecutableBlock.StartState[:]).
   887  		Hex("final_state", finalEndState[:]).
   888  		Msg("saved computation results")
   889  
   890  	return nil
   891  }
   892  
   893  // logExecutableBlock logs all data about an executable block
   894  // over time we should skip this
   895  func (e *Engine) logExecutableBlock(eb *entity.ExecutableBlock) {
   896  	// log block
   897  	e.log.Debug().
   898  		Hex("block_id", logging.Entity(eb)).
   899  		Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)).
   900  		Uint64("block_height", eb.Block.Header.Height).
   901  		Int("number_of_collections", len(eb.Collections())).
   902  		RawJSON("block_header", logging.AsJSON(eb.Block.Header)).
   903  		Msg("extensive log: block header")
   904  
   905  	// logs transactions
   906  	for i, col := range eb.Collections() {
   907  		for j, tx := range col.Transactions {
   908  			e.log.Debug().
   909  				Hex("block_id", logging.Entity(eb)).
   910  				Int("block_height", int(eb.Block.Header.Height)).
   911  				Hex("prev_block_id", logging.ID(eb.Block.Header.ParentID)).
   912  				Int("collection_index", i).
   913  				Int("tx_index", j).
   914  				Hex("collection_id", logging.ID(col.Guarantee.CollectionID)).
   915  				Hex("tx_hash", logging.Entity(tx)).
   916  				Hex("start_state_commitment", eb.StartState[:]).
   917  				RawJSON("transaction", logging.AsJSON(tx)).
   918  				Msg("extensive log: executed tx content")
   919  		}
   920  	}
   921  }
   922  
   923  // addOrFetch checks if there are stored collections for the given guarantees, if there is,
   924  // forward them to mempool to process the collection, otherwise fetch the collections.
   925  // any error returned are exception
   926  func (e *Engine) addOrFetch(
   927  	blockID flow.Identifier,
   928  	height uint64,
   929  	guarantees []*flow.CollectionGuarantee,
   930  ) error {
   931  	return e.fetchAndHandleCollection(blockID, height, guarantees, func(collection *flow.Collection) error {
   932  		err := e.mempool.BlockByCollection.Run(
   933  			func(backdata *stdmap.BlockByCollectionBackdata) error {
   934  				return e.addCollectionToMempool(collection, backdata)
   935  			})
   936  
   937  		if err != nil {
   938  			return fmt.Errorf("could not add collection to mempool: %w", err)
   939  		}
   940  		return nil
   941  	})
   942  }
   943  
   944  // addOrFetch checks if there are stored collections for the given guarantees, if there is,
   945  // forward them to the handler to process the collection, otherwise fetch the collections.
   946  // any error returned are exception
   947  func (e *Engine) fetchAndHandleCollection(
   948  	blockID flow.Identifier,
   949  	height uint64,
   950  	guarantees []*flow.CollectionGuarantee,
   951  	handleCollection func(*flow.Collection) error,
   952  ) error {
   953  	fetched := false
   954  	for _, guarantee := range guarantees {
   955  		// if we've requested this collection, we will store it in the storage,
   956  		// so check the storage to see whether we've seen it.
   957  		collection, err := e.collections.ByID(guarantee.CollectionID)
   958  
   959  		if err == nil {
   960  			// we found the collection from storage, forward this collection to handler
   961  			err = handleCollection(collection)
   962  			if err != nil {
   963  				return fmt.Errorf("could not handle collection: %w", err)
   964  			}
   965  
   966  			continue
   967  		}
   968  
   969  		// check if there was exception
   970  		if !errors.Is(err, storage.ErrNotFound) {
   971  			return fmt.Errorf("error while querying for collection: %w", err)
   972  		}
   973  
   974  		err = e.collectionFetcher.FetchCollection(blockID, height, guarantee)
   975  		if err != nil {
   976  			return fmt.Errorf("could not fetch collection: %w", err)
   977  		}
   978  		fetched = true
   979  	}
   980  
   981  	// make sure that the requests are dispatched immediately by the requester
   982  	if fetched {
   983  		e.collectionFetcher.Force()
   984  		e.metrics.ExecutionCollectionRequestSent()
   985  	}
   986  
   987  	return nil
   988  }
   989  
   990  func logQueueState(log zerolog.Logger, queues *stdmap.QueuesBackdata, blockID flow.Identifier) {
   991  	all := queues.All()
   992  
   993  	log.With().Hex("queue_state__executed_block_id", blockID[:]).Int("count", len(all)).Logger()
   994  	for i, queue := range all {
   995  		log.Error().Msgf("%v-th queue state: %v", i, queue.String())
   996  	}
   997  }