github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/access/ingestion/engine.go (about)

     1  package ingestion
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    12  	"github.com/onflow/flow-go/engine"
    13  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    14  	"github.com/onflow/flow-go/model/flow"
    15  	"github.com/onflow/flow-go/model/flow/filter"
    16  	"github.com/onflow/flow-go/module"
    17  	"github.com/onflow/flow-go/module/component"
    18  	"github.com/onflow/flow-go/module/counters"
    19  	"github.com/onflow/flow-go/module/irrecoverable"
    20  	"github.com/onflow/flow-go/module/jobqueue"
    21  	"github.com/onflow/flow-go/module/state_synchronization/indexer"
    22  	"github.com/onflow/flow-go/module/util"
    23  	"github.com/onflow/flow-go/network"
    24  	"github.com/onflow/flow-go/network/channels"
    25  	"github.com/onflow/flow-go/state/protocol"
    26  	"github.com/onflow/flow-go/storage"
    27  )
    28  
    29  const (
    30  	// time to wait for the all the missing collections to be received at node startup
    31  	collectionCatchupTimeout = 30 * time.Second
    32  
    33  	// time to poll the storage to check if missing collections have been received
    34  	collectionCatchupDBPollInterval = 10 * time.Millisecond
    35  
    36  	// time to update the FullBlockHeight index
    37  	fullBlockRefreshInterval = 1 * time.Second
    38  
    39  	// time to request missing collections from the network
    40  	missingCollsRequestInterval = 1 * time.Minute
    41  
    42  	// a threshold of number of blocks with missing collections beyond which collections should be re-requested
    43  	// this is to prevent spamming the collection nodes with request
    44  	missingCollsForBlkThreshold = 100
    45  
    46  	// a threshold of block height beyond which collections should be re-requested (regardless of the number of blocks for which collection are missing)
    47  	// this is to ensure that if a collection is missing for a long time (in terms of block height) it is eventually re-requested
    48  	missingCollsForAgeThreshold = 100
    49  
    50  	// default queue capacity
    51  	defaultQueueCapacity = 10_000
    52  
    53  	// how many workers will concurrently process the tasks in the jobqueue
    54  	workersCount = 1
    55  
    56  	// ensure blocks are processed sequentially by jobqueue
    57  	searchAhead = 1
    58  )
    59  
    60  var (
    61  	defaultCollectionCatchupTimeout               = collectionCatchupTimeout
    62  	defaultCollectionCatchupDBPollInterval        = collectionCatchupDBPollInterval
    63  	defaultFullBlockRefreshInterval               = fullBlockRefreshInterval
    64  	defaultMissingCollsRequestInterval            = missingCollsRequestInterval
    65  	defaultMissingCollsForBlkThreshold            = missingCollsForBlkThreshold
    66  	defaultMissingCollsForAgeThreshold     uint64 = missingCollsForAgeThreshold
    67  )
    68  
    69  // Engine represents the ingestion engine, used to funnel data from other nodes
    70  // to a centralized location that can be queried by a user
    71  //
    72  // No errors are expected during normal operation.
    73  type Engine struct {
    74  	*component.ComponentManager
    75  	messageHandler            *engine.MessageHandler
    76  	executionReceiptsNotifier engine.Notifier
    77  	executionReceiptsQueue    engine.MessageStore
    78  	// Job queue
    79  	finalizedBlockConsumer *jobqueue.ComponentConsumer
    80  
    81  	// Notifier for queue consumer
    82  	finalizedBlockNotifier engine.Notifier
    83  
    84  	log     zerolog.Logger   // used to log relevant actions with context
    85  	state   protocol.State   // used to access the  protocol state
    86  	me      module.Local     // used to access local node information
    87  	request module.Requester // used to request collections
    88  
    89  	// storage
    90  	// FIX: remove direct DB access by substituting indexer module
    91  	blocks            storage.Blocks
    92  	headers           storage.Headers
    93  	collections       storage.Collections
    94  	transactions      storage.Transactions
    95  	executionReceipts storage.ExecutionReceipts
    96  	maxReceiptHeight  uint64
    97  	executionResults  storage.ExecutionResults
    98  
    99  	lastFullBlockHeight *counters.PersistentStrictMonotonicCounter
   100  	// metrics
   101  	collectionExecutedMetric module.CollectionExecutedMetric
   102  }
   103  
   104  var _ network.MessageProcessor = (*Engine)(nil)
   105  
   106  // New creates a new access ingestion engine
   107  //
   108  // No errors are expected during normal operation.
   109  func New(
   110  	log zerolog.Logger,
   111  	net network.EngineRegistry,
   112  	state protocol.State,
   113  	me module.Local,
   114  	request module.Requester,
   115  	blocks storage.Blocks,
   116  	headers storage.Headers,
   117  	collections storage.Collections,
   118  	transactions storage.Transactions,
   119  	executionResults storage.ExecutionResults,
   120  	executionReceipts storage.ExecutionReceipts,
   121  	collectionExecutedMetric module.CollectionExecutedMetric,
   122  	processedHeight storage.ConsumerProgress,
   123  	lastFullBlockHeight *counters.PersistentStrictMonotonicCounter,
   124  ) (*Engine, error) {
   125  	executionReceiptsRawQueue, err := fifoqueue.NewFifoQueue(defaultQueueCapacity)
   126  	if err != nil {
   127  		return nil, fmt.Errorf("could not create execution receipts queue: %w", err)
   128  	}
   129  
   130  	executionReceiptsQueue := &engine.FifoMessageStore{FifoQueue: executionReceiptsRawQueue}
   131  
   132  	messageHandler := engine.NewMessageHandler(
   133  		log,
   134  		engine.NewNotifier(),
   135  		engine.Pattern{
   136  			Match: func(msg *engine.Message) bool {
   137  				_, ok := msg.Payload.(*flow.ExecutionReceipt)
   138  				return ok
   139  			},
   140  			Store: executionReceiptsQueue,
   141  		},
   142  	)
   143  
   144  	collectionExecutedMetric.UpdateLastFullBlockHeight(lastFullBlockHeight.Value())
   145  
   146  	// initialize the propagation engine with its dependencies
   147  	e := &Engine{
   148  		log:                      log.With().Str("engine", "ingestion").Logger(),
   149  		state:                    state,
   150  		me:                       me,
   151  		request:                  request,
   152  		blocks:                   blocks,
   153  		headers:                  headers,
   154  		collections:              collections,
   155  		transactions:             transactions,
   156  		executionResults:         executionResults,
   157  		executionReceipts:        executionReceipts,
   158  		maxReceiptHeight:         0,
   159  		collectionExecutedMetric: collectionExecutedMetric,
   160  		finalizedBlockNotifier:   engine.NewNotifier(),
   161  		lastFullBlockHeight:      lastFullBlockHeight,
   162  
   163  		// queue / notifier for execution receipts
   164  		executionReceiptsNotifier: engine.NewNotifier(),
   165  		executionReceiptsQueue:    executionReceiptsQueue,
   166  		messageHandler:            messageHandler,
   167  	}
   168  
   169  	// jobqueue Jobs object that tracks finalized blocks by height. This is used by the finalizedBlockConsumer
   170  	// to get a sequential list of finalized blocks.
   171  	finalizedBlockReader := jobqueue.NewFinalizedBlockReader(state, blocks)
   172  
   173  	defaultIndex, err := e.defaultProcessedIndex()
   174  	if err != nil {
   175  		return nil, fmt.Errorf("could not read default processed index: %w", err)
   176  	}
   177  
   178  	// create a jobqueue that will process new available finalized block. The `finalizedBlockNotifier` is used to
   179  	// signal new work, which is being triggered on the `processFinalizedBlockJob` handler.
   180  	e.finalizedBlockConsumer, err = jobqueue.NewComponentConsumer(
   181  		e.log.With().Str("module", "ingestion_block_consumer").Logger(),
   182  		e.finalizedBlockNotifier.Channel(),
   183  		processedHeight,
   184  		finalizedBlockReader,
   185  		defaultIndex,
   186  		e.processFinalizedBlockJob,
   187  		workersCount,
   188  		searchAhead,
   189  	)
   190  	if err != nil {
   191  		return nil, fmt.Errorf("error creating finalizedBlock jobqueue: %w", err)
   192  	}
   193  
   194  	// Add workers
   195  	e.ComponentManager = component.NewComponentManagerBuilder().
   196  		AddWorker(e.processBackground).
   197  		AddWorker(e.processExecutionReceipts).
   198  		AddWorker(e.runFinalizedBlockConsumer).
   199  		Build()
   200  
   201  	// register engine with the execution receipt provider
   202  	_, err = net.Register(channels.ReceiveReceipts, e)
   203  	if err != nil {
   204  		return nil, fmt.Errorf("could not register for results: %w", err)
   205  	}
   206  
   207  	return e, nil
   208  }
   209  
   210  // defaultProcessedIndex returns the last finalized block height from the protocol state.
   211  //
   212  // The BlockConsumer utilizes this return height to fetch and consume block jobs from
   213  // jobs queue the first time it initializes.
   214  //
   215  // No errors are expected during normal operation.
   216  func (e *Engine) defaultProcessedIndex() (uint64, error) {
   217  	final, err := e.state.Final().Head()
   218  	if err != nil {
   219  		return 0, fmt.Errorf("could not get finalized height: %w", err)
   220  	}
   221  	return final.Height, nil
   222  }
   223  
   224  // runFinalizedBlockConsumer runs the finalizedBlockConsumer component
   225  func (e *Engine) runFinalizedBlockConsumer(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   226  	e.finalizedBlockConsumer.Start(ctx)
   227  
   228  	err := util.WaitClosed(ctx, e.finalizedBlockConsumer.Ready())
   229  	if err == nil {
   230  		ready()
   231  	}
   232  
   233  	<-e.finalizedBlockConsumer.Done()
   234  }
   235  
   236  // processFinalizedBlockJob is a handler function for processing finalized block jobs.
   237  // It converts the job to a block, processes the block, and logs any errors encountered during processing.
   238  func (e *Engine) processFinalizedBlockJob(ctx irrecoverable.SignalerContext, job module.Job, done func()) {
   239  	block, err := jobqueue.JobToBlock(job)
   240  	if err != nil {
   241  		ctx.Throw(fmt.Errorf("failed to convert job to block: %w", err))
   242  	}
   243  
   244  	err = e.processFinalizedBlock(block)
   245  	if err == nil {
   246  		done()
   247  		return
   248  	}
   249  
   250  	e.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error during finalized block processing job")
   251  }
   252  
   253  // processBackground is a background routine responsible for executing periodic tasks related to block processing and collection retrieval.
   254  // It performs tasks such as updating indexes of processed blocks and requesting missing collections from the network.
   255  // This function runs indefinitely until the context is canceled.
   256  // Periodically, it checks for updates in the last fully processed block index and requests missing collections if necessary.
   257  // Additionally, it checks for missing collections across a range of blocks and requests them if certain thresholds are met.
   258  func (e *Engine) processBackground(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   259  	// context with timeout
   260  	requestCtx, cancel := context.WithTimeout(ctx, defaultCollectionCatchupTimeout)
   261  	defer cancel()
   262  
   263  	// request missing collections
   264  	err := e.requestMissingCollections(requestCtx)
   265  	if err != nil {
   266  		e.log.Error().Err(err).Msg("requesting missing collections failed")
   267  	}
   268  	ready()
   269  
   270  	updateTicker := time.NewTicker(defaultFullBlockRefreshInterval)
   271  	defer updateTicker.Stop()
   272  
   273  	requestTicker := time.NewTicker(defaultMissingCollsRequestInterval)
   274  	defer requestTicker.Stop()
   275  
   276  	for {
   277  		select {
   278  		case <-ctx.Done():
   279  			return
   280  
   281  		// refresh the LastFullBlockReceived index
   282  		case <-updateTicker.C:
   283  			err := e.updateLastFullBlockReceivedIndex()
   284  			if err != nil {
   285  				ctx.Throw(err)
   286  			}
   287  
   288  		// request missing collections from the network
   289  		case <-requestTicker.C:
   290  			err := e.checkMissingCollections()
   291  			if err != nil {
   292  				ctx.Throw(err)
   293  			}
   294  		}
   295  	}
   296  }
   297  
   298  // processExecutionReceipts is responsible for processing the execution receipts.
   299  // It listens for incoming execution receipts and processes them asynchronously.
   300  func (e *Engine) processExecutionReceipts(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   301  	ready()
   302  	notifier := e.executionReceiptsNotifier.Channel()
   303  
   304  	for {
   305  		select {
   306  		case <-ctx.Done():
   307  			return
   308  		case <-notifier:
   309  			err := e.processAvailableExecutionReceipts(ctx)
   310  			if err != nil {
   311  				// if an error reaches this point, it is unexpected
   312  				ctx.Throw(err)
   313  				return
   314  			}
   315  		}
   316  	}
   317  }
   318  
   319  // processAvailableExecutionReceipts processes available execution receipts in the queue and handles it.
   320  // It continues processing until the context is canceled.
   321  //
   322  // No errors are expected during normal operation.
   323  func (e *Engine) processAvailableExecutionReceipts(ctx context.Context) error {
   324  	for {
   325  		select {
   326  		case <-ctx.Done():
   327  			return nil
   328  		default:
   329  		}
   330  		msg, ok := e.executionReceiptsQueue.Get()
   331  		if !ok {
   332  			return nil
   333  		}
   334  
   335  		receipt := msg.Payload.(*flow.ExecutionReceipt)
   336  
   337  		if err := e.handleExecutionReceipt(msg.OriginID, receipt); err != nil {
   338  			return err
   339  		}
   340  	}
   341  }
   342  
   343  // process processes the given ingestion engine event. Events that are given
   344  // to this function originate within the expulsion engine on the node with the
   345  // given origin ID.
   346  func (e *Engine) process(originID flow.Identifier, event interface{}) error {
   347  	select {
   348  	case <-e.ComponentManager.ShutdownSignal():
   349  		return component.ErrComponentShutdown
   350  	default:
   351  	}
   352  
   353  	switch event.(type) {
   354  	case *flow.ExecutionReceipt:
   355  		err := e.messageHandler.Process(originID, event)
   356  		e.executionReceiptsNotifier.Notify()
   357  		return err
   358  	default:
   359  		return fmt.Errorf("invalid event type (%T)", event)
   360  	}
   361  }
   362  
   363  // Process processes the given event from the node with the given origin ID in
   364  // a blocking manner. It returns the potential processing error when done.
   365  func (e *Engine) Process(_ channels.Channel, originID flow.Identifier, event interface{}) error {
   366  	return e.process(originID, event)
   367  }
   368  
   369  // OnFinalizedBlock is called by the follower engine after a block has been finalized and the state has been updated.
   370  // Receives block finalized events from the finalization distributor and forwards them to the finalizedBlockConsumer.
   371  func (e *Engine) OnFinalizedBlock(*model.Block) {
   372  	e.finalizedBlockNotifier.Notify()
   373  }
   374  
   375  // processFinalizedBlock handles an incoming finalized block.
   376  // It processes the block, indexes it for further processing, and requests missing collections if necessary.
   377  //
   378  // Expected errors during normal operation:
   379  //   - storage.ErrNotFound - if last full block height does not exist in the database.
   380  //   - storage.ErrAlreadyExists - if the collection within block or an execution result ID already exists in the database.
   381  //   - generic error in case of unexpected failure from the database layer, or failure
   382  //     to decode an existing database value.
   383  func (e *Engine) processFinalizedBlock(block *flow.Block) error {
   384  	// FIX: we can't index guarantees here, as we might have more than one block
   385  	// with the same collection as long as it is not finalized
   386  
   387  	// TODO: substitute an indexer module as layer between engine and storage
   388  
   389  	// index the block storage with each of the collection guarantee
   390  	err := e.blocks.IndexBlockForCollections(block.Header.ID(), flow.GetIDs(block.Payload.Guarantees))
   391  	if err != nil {
   392  		return fmt.Errorf("could not index block for collections: %w", err)
   393  	}
   394  
   395  	// loop through seals and index ID -> result ID
   396  	for _, seal := range block.Payload.Seals {
   397  		err := e.executionResults.Index(seal.BlockID, seal.ResultID)
   398  		if err != nil {
   399  			return fmt.Errorf("could not index block for execution result: %w", err)
   400  		}
   401  	}
   402  
   403  	// skip requesting collections, if this block is below the last full block height
   404  	// this means that either we have already received these collections, or the block
   405  	// may contain unverifiable guarantees (in case this node has just joined the network)
   406  	lastFullBlockHeight := e.lastFullBlockHeight.Value()
   407  	if block.Header.Height <= lastFullBlockHeight {
   408  		e.log.Info().Msgf("skipping requesting collections for finalized block below last full block height (%d<=%d)", block.Header.Height, lastFullBlockHeight)
   409  		return nil
   410  	}
   411  
   412  	// queue requesting each of the collections from the collection node
   413  	e.requestCollectionsInFinalizedBlock(block.Payload.Guarantees)
   414  
   415  	e.collectionExecutedMetric.BlockFinalized(block)
   416  
   417  	return nil
   418  }
   419  
   420  // handleExecutionReceipt persists the execution receipt locally.
   421  // Storing the execution receipt and updates the collection executed metric.
   422  //
   423  // No errors are expected during normal operation.
   424  func (e *Engine) handleExecutionReceipt(_ flow.Identifier, r *flow.ExecutionReceipt) error {
   425  	// persist the execution receipt locally, storing will also index the receipt
   426  	err := e.executionReceipts.Store(r)
   427  	if err != nil {
   428  		return fmt.Errorf("failed to store execution receipt: %w", err)
   429  	}
   430  
   431  	e.collectionExecutedMetric.ExecutionReceiptReceived(r)
   432  	return nil
   433  }
   434  
   435  // OnCollection handles the response of the collection request made earlier when a block was received.
   436  // No errors expected during normal operations.
   437  func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) {
   438  	collection, ok := entity.(*flow.Collection)
   439  	if !ok {
   440  		e.log.Error().Msgf("invalid entity type (%T)", entity)
   441  		return
   442  	}
   443  
   444  	err := indexer.HandleCollection(collection, e.collections, e.transactions, e.log, e.collectionExecutedMetric)
   445  	if err != nil {
   446  		e.log.Error().Err(err).Msg("could not handle collection")
   447  		return
   448  	}
   449  }
   450  
   451  // requestMissingCollections requests missing collections for all blocks in the local db storage once at startup
   452  func (e *Engine) requestMissingCollections(ctx context.Context) error {
   453  	var startHeight, endHeight uint64
   454  
   455  	// get the height of the last block for which all collections were received
   456  	lastFullHeight := e.lastFullBlockHeight.Value()
   457  	// start from the next block
   458  	startHeight = lastFullHeight + 1
   459  
   460  	// end at the finalized block
   461  	finalBlk, err := e.state.Final().Head()
   462  	if err != nil {
   463  		return err
   464  	}
   465  	endHeight = finalBlk.Height
   466  
   467  	e.log.Info().
   468  		Uint64("start_height", startHeight).
   469  		Uint64("end_height", endHeight).
   470  		Msg("starting collection catchup")
   471  
   472  	// collect all missing collection ids in a map
   473  	var missingCollMap = make(map[flow.Identifier]struct{})
   474  
   475  	// iterate through the complete chain and request the missing collections
   476  	for i := startHeight; i <= endHeight; i++ {
   477  
   478  		// if deadline exceeded or someone cancelled the context
   479  		if ctx.Err() != nil {
   480  			return fmt.Errorf("failed to complete requests for missing collections: %w", ctx.Err())
   481  		}
   482  
   483  		missingColls, err := e.missingCollectionsAtHeight(i)
   484  		if err != nil {
   485  			return fmt.Errorf("failed to retrieve missing collections by height %d during collection catchup: %w", i, err)
   486  		}
   487  
   488  		// request the missing collections
   489  		e.requestCollectionsInFinalizedBlock(missingColls)
   490  
   491  		// add them to the missing collection id map to track later
   492  		for _, cg := range missingColls {
   493  			missingCollMap[cg.CollectionID] = struct{}{}
   494  		}
   495  	}
   496  
   497  	// if no collections were found to be missing we are done.
   498  	if len(missingCollMap) == 0 {
   499  		// nothing more to do
   500  		e.log.Info().Msg("no missing collections found")
   501  		return nil
   502  	}
   503  
   504  	// the collection catchup needs to happen ASAP when the node starts up. Hence, force the requester to dispatch all request
   505  	e.request.Force()
   506  
   507  	// track progress of retrieving all the missing collections by polling the db periodically
   508  	ticker := time.NewTicker(defaultCollectionCatchupDBPollInterval)
   509  	defer ticker.Stop()
   510  
   511  	// while there are still missing collections, keep polling
   512  	for len(missingCollMap) > 0 {
   513  		select {
   514  		case <-ctx.Done():
   515  			// context may have expired
   516  			return fmt.Errorf("failed to complete collection retreival: %w", ctx.Err())
   517  		case <-ticker.C:
   518  
   519  			// log progress
   520  			e.log.Info().
   521  				Int("total_missing_collections", len(missingCollMap)).
   522  				Msg("retrieving missing collections...")
   523  
   524  			var foundColls []flow.Identifier
   525  			// query db to find if collections are still missing
   526  			for collID := range missingCollMap {
   527  				found, err := e.haveCollection(collID)
   528  				if err != nil {
   529  					return err
   530  				}
   531  				// if collection found in local db, remove it from missingColls later
   532  				if found {
   533  					foundColls = append(foundColls, collID)
   534  				}
   535  			}
   536  
   537  			// update the missingColls list by removing collections that have now been received
   538  			for _, c := range foundColls {
   539  				delete(missingCollMap, c)
   540  			}
   541  		}
   542  	}
   543  
   544  	e.log.Info().Msg("collection catchup done")
   545  	return nil
   546  }
   547  
   548  // updateLastFullBlockReceivedIndex finds the next highest height where all previous collections
   549  // have been indexed, and updates the LastFullBlockReceived index to that height
   550  func (e *Engine) updateLastFullBlockReceivedIndex() error {
   551  	lastFullHeight := e.lastFullBlockHeight.Value()
   552  
   553  	finalBlk, err := e.state.Final().Head()
   554  	if err != nil {
   555  		return fmt.Errorf("failed to get finalized block: %w", err)
   556  	}
   557  	finalizedHeight := finalBlk.Height
   558  
   559  	// track the latest contiguous full height
   560  	newLastFullHeight, err := e.lowestHeightWithMissingCollection(lastFullHeight, finalizedHeight)
   561  	if err != nil {
   562  		return fmt.Errorf("failed to find last full block received height: %w", err)
   563  	}
   564  
   565  	// if more contiguous blocks are now complete, update db
   566  	if newLastFullHeight > lastFullHeight {
   567  		err := e.lastFullBlockHeight.Set(newLastFullHeight)
   568  		if err != nil {
   569  			return fmt.Errorf("failed to update last full block height: %w", err)
   570  		}
   571  
   572  		e.collectionExecutedMetric.UpdateLastFullBlockHeight(newLastFullHeight)
   573  
   574  		e.log.Debug().
   575  			Uint64("last_full_block_height", newLastFullHeight).
   576  			Msg("updated LastFullBlockReceived index")
   577  	}
   578  
   579  	return nil
   580  }
   581  
   582  // lowestHeightWithMissingCollection returns the lowest height that is missing collections
   583  func (e *Engine) lowestHeightWithMissingCollection(lastFullHeight, finalizedHeight uint64) (uint64, error) {
   584  	newLastFullHeight := lastFullHeight
   585  
   586  	for i := lastFullHeight + 1; i <= finalizedHeight; i++ {
   587  		missingColls, err := e.missingCollectionsAtHeight(i)
   588  		if err != nil {
   589  			return 0, err
   590  		}
   591  
   592  		// return when we find the first block with missing collections
   593  		if len(missingColls) > 0 {
   594  			return newLastFullHeight, nil
   595  		}
   596  
   597  		newLastFullHeight = i
   598  	}
   599  
   600  	return newLastFullHeight, nil
   601  }
   602  
   603  // checkMissingCollections requests missing collections if the number of blocks missing collections
   604  // have reached the defaultMissingCollsForBlkThreshold value.
   605  func (e *Engine) checkMissingCollections() error {
   606  	lastFullHeight := e.lastFullBlockHeight.Value()
   607  
   608  	finalBlk, err := e.state.Final().Head()
   609  	if err != nil {
   610  		return fmt.Errorf("failed to get finalized block: %w", err)
   611  	}
   612  	finalizedHeight := finalBlk.Height
   613  
   614  	// number of blocks with missing collections
   615  	incompleteBlksCnt := 0
   616  
   617  	// collect all missing collections
   618  	var allMissingColls []*flow.CollectionGuarantee
   619  
   620  	// start from the next block till we either hit the finalized block or cross the max collection missing threshold
   621  	for i := lastFullHeight + 1; i <= finalizedHeight && incompleteBlksCnt < defaultMissingCollsForBlkThreshold; i++ {
   622  		missingColls, err := e.missingCollectionsAtHeight(i)
   623  		if err != nil {
   624  			return fmt.Errorf("failed to find missing collections at height %d: %w", i, err)
   625  		}
   626  
   627  		if len(missingColls) == 0 {
   628  			continue
   629  		}
   630  
   631  		incompleteBlksCnt++
   632  
   633  		allMissingColls = append(allMissingColls, missingColls...)
   634  	}
   635  
   636  	// additionally, if more than threshold blocks have missing collections OR collections are
   637  	// missing since defaultMissingCollsForAgeThreshold, re-request those collections
   638  	if incompleteBlksCnt >= defaultMissingCollsForBlkThreshold ||
   639  		(finalizedHeight-lastFullHeight) > defaultMissingCollsForAgeThreshold {
   640  		// warn log since this should generally not happen
   641  		e.log.Warn().
   642  			Uint64("finalized_height", finalizedHeight).
   643  			Uint64("last_full_blk_height", lastFullHeight).
   644  			Int("missing_collection_blk_count", incompleteBlksCnt).
   645  			Int("missing_collection_count", len(allMissingColls)).
   646  			Msg("re-requesting missing collections")
   647  		e.requestCollectionsInFinalizedBlock(allMissingColls)
   648  	}
   649  
   650  	return nil
   651  }
   652  
   653  // missingCollectionsAtHeight returns all missing collection guarantees at a given height
   654  func (e *Engine) missingCollectionsAtHeight(h uint64) ([]*flow.CollectionGuarantee, error) {
   655  	block, err := e.blocks.ByHeight(h)
   656  	if err != nil {
   657  		return nil, fmt.Errorf("failed to retrieve block by height %d: %w", h, err)
   658  	}
   659  
   660  	var missingColls []*flow.CollectionGuarantee
   661  	for _, guarantee := range block.Payload.Guarantees {
   662  		collID := guarantee.CollectionID
   663  		found, err := e.haveCollection(collID)
   664  		if err != nil {
   665  			return nil, err
   666  		}
   667  		if !found {
   668  			missingColls = append(missingColls, guarantee)
   669  		}
   670  	}
   671  	return missingColls, nil
   672  }
   673  
   674  // haveCollection looks up the collection from the collection db with collID
   675  func (e *Engine) haveCollection(collID flow.Identifier) (bool, error) {
   676  	_, err := e.collections.LightByID(collID)
   677  	if err == nil {
   678  		return true, nil
   679  	}
   680  	if errors.Is(err, storage.ErrNotFound) {
   681  		return false, nil
   682  	}
   683  	return false, fmt.Errorf("failed to retrieve collection %s: %w", collID.String(), err)
   684  }
   685  
   686  // requestCollectionsInFinalizedBlock registers collection requests with the requester engine
   687  func (e *Engine) requestCollectionsInFinalizedBlock(missingColls []*flow.CollectionGuarantee) {
   688  	for _, cg := range missingColls {
   689  		guarantors, err := protocol.FindGuarantors(e.state, cg)
   690  		if err != nil {
   691  			// failed to find guarantors for guarantees contained in a finalized block is fatal error
   692  			e.log.Fatal().Err(err).Msgf("could not find guarantors for guarantee %v", cg.ID())
   693  		}
   694  		e.request.EntityByID(cg.ID(), filter.HasNodeID[flow.Identity](guarantors...))
   695  	}
   696  }