github.com/onflow/flow-go@v0.33.17/engine/access/ingestion/engine.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package ingestion
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"time"
    10  
    11  	"github.com/rs/zerolog"
    12  
    13  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    14  	"github.com/onflow/flow-go/engine"
    15  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    16  	"github.com/onflow/flow-go/model/flow"
    17  	"github.com/onflow/flow-go/model/flow/filter"
    18  	"github.com/onflow/flow-go/module"
    19  	"github.com/onflow/flow-go/module/component"
    20  	"github.com/onflow/flow-go/module/irrecoverable"
    21  	"github.com/onflow/flow-go/module/state_synchronization/indexer"
    22  	"github.com/onflow/flow-go/network"
    23  	"github.com/onflow/flow-go/network/channels"
    24  	"github.com/onflow/flow-go/state/protocol"
    25  	"github.com/onflow/flow-go/storage"
    26  )
    27  
    28  const (
    29  	// time to wait for the all the missing collections to be received at node startup
    30  	collectionCatchupTimeout = 30 * time.Second
    31  
    32  	// time to poll the storage to check if missing collections have been received
    33  	collectionCatchupDBPollInterval = 10 * time.Millisecond
    34  
    35  	// time to update the FullBlockHeight index
    36  	fullBlockRefreshInterval = 1 * time.Second
    37  
    38  	// time to request missing collections from the network
    39  	missingCollsRequestInterval = 1 * time.Minute
    40  
    41  	// a threshold of number of blocks with missing collections beyond which collections should be re-requested
    42  	// this is to prevent spamming the collection nodes with request
    43  	missingCollsForBlkThreshold = 100
    44  
    45  	// a threshold of block height beyond which collections should be re-requested (regardless of the number of blocks for which collection are missing)
    46  	// this is to ensure that if a collection is missing for a long time (in terms of block height) it is eventually re-requested
    47  	missingCollsForAgeThreshold = 100
    48  
    49  	// default queue capacity
    50  	defaultQueueCapacity = 10_000
    51  )
    52  
    53  var (
    54  	defaultCollectionCatchupTimeout               = collectionCatchupTimeout
    55  	defaultCollectionCatchupDBPollInterval        = collectionCatchupDBPollInterval
    56  	defaultFullBlockRefreshInterval               = fullBlockRefreshInterval
    57  	defaultMissingCollsRequestInterval            = missingCollsRequestInterval
    58  	defaultMissingCollsForBlkThreshold            = missingCollsForBlkThreshold
    59  	defaultMissingCollsForAgeThreshold     uint64 = missingCollsForAgeThreshold
    60  )
    61  
    62  // Engine represents the ingestion engine, used to funnel data from other nodes
    63  // to a centralized location that can be queried by a user
    64  type Engine struct {
    65  	*component.ComponentManager
    66  	messageHandler            *engine.MessageHandler
    67  	executionReceiptsNotifier engine.Notifier
    68  	executionReceiptsQueue    engine.MessageStore
    69  	finalizedBlockNotifier    engine.Notifier
    70  	finalizedBlockQueue       engine.MessageStore
    71  
    72  	log     zerolog.Logger   // used to log relevant actions with context
    73  	state   protocol.State   // used to access the  protocol state
    74  	me      module.Local     // used to access local node information
    75  	request module.Requester // used to request collections
    76  
    77  	// storage
    78  	// FIX: remove direct DB access by substituting indexer module
    79  	blocks            storage.Blocks
    80  	headers           storage.Headers
    81  	collections       storage.Collections
    82  	transactions      storage.Transactions
    83  	executionReceipts storage.ExecutionReceipts
    84  	maxReceiptHeight  uint64
    85  	executionResults  storage.ExecutionResults
    86  
    87  	// metrics
    88  	collectionExecutedMetric module.CollectionExecutedMetric
    89  }
    90  
    91  // New creates a new access ingestion engine
    92  func New(
    93  	log zerolog.Logger,
    94  	net network.EngineRegistry,
    95  	state protocol.State,
    96  	me module.Local,
    97  	request module.Requester,
    98  	blocks storage.Blocks,
    99  	headers storage.Headers,
   100  	collections storage.Collections,
   101  	transactions storage.Transactions,
   102  	executionResults storage.ExecutionResults,
   103  	executionReceipts storage.ExecutionReceipts,
   104  	collectionExecutedMetric module.CollectionExecutedMetric,
   105  ) (*Engine, error) {
   106  	executionReceiptsRawQueue, err := fifoqueue.NewFifoQueue(defaultQueueCapacity)
   107  	if err != nil {
   108  		return nil, fmt.Errorf("could not create execution receipts queue: %w", err)
   109  	}
   110  
   111  	executionReceiptsQueue := &engine.FifoMessageStore{FifoQueue: executionReceiptsRawQueue}
   112  
   113  	finalizedBlocksRawQueue, err := fifoqueue.NewFifoQueue(defaultQueueCapacity)
   114  	if err != nil {
   115  		return nil, fmt.Errorf("could not create finalized block queue: %w", err)
   116  	}
   117  
   118  	finalizedBlocksQueue := &engine.FifoMessageStore{FifoQueue: finalizedBlocksRawQueue}
   119  
   120  	messageHandler := engine.NewMessageHandler(
   121  		log,
   122  		engine.NewNotifier(),
   123  		engine.Pattern{
   124  			Match: func(msg *engine.Message) bool {
   125  				_, ok := msg.Payload.(*model.Block)
   126  				return ok
   127  			},
   128  			Store: finalizedBlocksQueue,
   129  		},
   130  		engine.Pattern{
   131  			Match: func(msg *engine.Message) bool {
   132  				_, ok := msg.Payload.(*flow.ExecutionReceipt)
   133  				return ok
   134  			},
   135  			Store: executionReceiptsQueue,
   136  		},
   137  	)
   138  
   139  	// initialize the propagation engine with its dependencies
   140  	e := &Engine{
   141  		log:                      log.With().Str("engine", "ingestion").Logger(),
   142  		state:                    state,
   143  		me:                       me,
   144  		request:                  request,
   145  		blocks:                   blocks,
   146  		headers:                  headers,
   147  		collections:              collections,
   148  		transactions:             transactions,
   149  		executionResults:         executionResults,
   150  		executionReceipts:        executionReceipts,
   151  		maxReceiptHeight:         0,
   152  		collectionExecutedMetric: collectionExecutedMetric,
   153  
   154  		// queue / notifier for execution receipts
   155  		executionReceiptsNotifier: engine.NewNotifier(),
   156  		executionReceiptsQueue:    executionReceiptsQueue,
   157  
   158  		// queue / notifier for finalized blocks
   159  		finalizedBlockNotifier: engine.NewNotifier(),
   160  		finalizedBlockQueue:    finalizedBlocksQueue,
   161  
   162  		messageHandler: messageHandler,
   163  	}
   164  
   165  	// Add workers
   166  	e.ComponentManager = component.NewComponentManagerBuilder().
   167  		AddWorker(e.processBackground).
   168  		AddWorker(e.processExecutionReceipts).
   169  		AddWorker(e.processFinalizedBlocks).
   170  		Build()
   171  
   172  	// register engine with the execution receipt provider
   173  	_, err = net.Register(channels.ReceiveReceipts, e)
   174  	if err != nil {
   175  		return nil, fmt.Errorf("could not register for results: %w", err)
   176  	}
   177  
   178  	return e, nil
   179  }
   180  
   181  func (e *Engine) Start(parent irrecoverable.SignalerContext) {
   182  	err := e.initLastFullBlockHeightIndex()
   183  	if err != nil {
   184  		parent.Throw(fmt.Errorf("unexpected error initializing full block index: %w", err))
   185  	}
   186  
   187  	e.ComponentManager.Start(parent)
   188  }
   189  
   190  // initializeLastFullBlockHeightIndex initializes the index of full blocks
   191  // (blocks for which we have ingested all collections) to the root block height.
   192  // This means that the Access Node will ingest all collections for all blocks
   193  // ingested after state bootstrapping is complete (all blocks received from the network).
   194  // If the index has already been initialized, this is a no-op.
   195  // No errors are expected during normal operation.
   196  func (e *Engine) initLastFullBlockHeightIndex() error {
   197  	rootBlock, err := e.state.Params().FinalizedRoot()
   198  	if err != nil {
   199  		return fmt.Errorf("failed to get root block: %w", err)
   200  	}
   201  
   202  	// insert is a noop if the index has already been initialized and no error is returned
   203  	err = e.blocks.InsertLastFullBlockHeightIfNotExists(rootBlock.Height)
   204  	if err != nil {
   205  		return fmt.Errorf("failed to update last full block height during ingestion engine startup: %w", err)
   206  	}
   207  
   208  	lastFullHeight, err := e.blocks.GetLastFullBlockHeight()
   209  	if err != nil {
   210  		return fmt.Errorf("failed to get last full block height during ingestion engine startup: %w", err)
   211  	}
   212  
   213  	e.collectionExecutedMetric.UpdateLastFullBlockHeight(lastFullHeight)
   214  
   215  	return nil
   216  }
   217  
   218  func (e *Engine) processBackground(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   219  	// context with timeout
   220  	requestCtx, cancel := context.WithTimeout(ctx, defaultCollectionCatchupTimeout)
   221  	defer cancel()
   222  
   223  	// request missing collections
   224  	err := e.requestMissingCollections(requestCtx)
   225  	if err != nil {
   226  		e.log.Error().Err(err).Msg("requesting missing collections failed")
   227  	}
   228  	ready()
   229  
   230  	updateTicker := time.NewTicker(defaultFullBlockRefreshInterval)
   231  	defer updateTicker.Stop()
   232  
   233  	requestTicker := time.NewTicker(defaultMissingCollsRequestInterval)
   234  	defer requestTicker.Stop()
   235  
   236  	for {
   237  		select {
   238  		case <-ctx.Done():
   239  			return
   240  
   241  		// refresh the LastFullBlockReceived index
   242  		case <-updateTicker.C:
   243  			err := e.updateLastFullBlockReceivedIndex()
   244  			if err != nil {
   245  				ctx.Throw(err)
   246  			}
   247  
   248  		// request missing collections from the network
   249  		case <-requestTicker.C:
   250  			err := e.checkMissingCollections()
   251  			if err != nil {
   252  				ctx.Throw(err)
   253  			}
   254  		}
   255  	}
   256  }
   257  
   258  func (e *Engine) processExecutionReceipts(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   259  	ready()
   260  	notifier := e.executionReceiptsNotifier.Channel()
   261  
   262  	for {
   263  		select {
   264  		case <-ctx.Done():
   265  			return
   266  		case <-notifier:
   267  			err := e.processAvailableExecutionReceipts(ctx)
   268  			if err != nil {
   269  				// if an error reaches this point, it is unexpected
   270  				ctx.Throw(err)
   271  				return
   272  			}
   273  		}
   274  	}
   275  }
   276  
   277  func (e *Engine) processAvailableExecutionReceipts(ctx context.Context) error {
   278  	for {
   279  		select {
   280  		case <-ctx.Done():
   281  			return nil
   282  		default:
   283  		}
   284  		msg, ok := e.executionReceiptsQueue.Get()
   285  		if !ok {
   286  			return nil
   287  		}
   288  
   289  		receipt := msg.Payload.(*flow.ExecutionReceipt)
   290  
   291  		if err := e.handleExecutionReceipt(msg.OriginID, receipt); err != nil {
   292  			return err
   293  		}
   294  	}
   295  
   296  }
   297  
   298  func (e *Engine) processFinalizedBlocks(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   299  	ready()
   300  	notifier := e.finalizedBlockNotifier.Channel()
   301  
   302  	for {
   303  		select {
   304  		case <-ctx.Done():
   305  			return
   306  		case <-notifier:
   307  			_ = e.processAvailableFinalizedBlocks(ctx)
   308  		}
   309  	}
   310  }
   311  
   312  func (e *Engine) processAvailableFinalizedBlocks(ctx context.Context) error {
   313  	for {
   314  		select {
   315  		case <-ctx.Done():
   316  			return nil
   317  		default:
   318  		}
   319  
   320  		msg, ok := e.finalizedBlockQueue.Get()
   321  		if !ok {
   322  			return nil
   323  		}
   324  
   325  		hb := msg.Payload.(*model.Block)
   326  		blockID := hb.BlockID
   327  
   328  		if err := e.processFinalizedBlock(blockID); err != nil {
   329  			e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to process block")
   330  			continue
   331  		}
   332  	}
   333  }
   334  
   335  // process processes the given ingestion engine event. Events that are given
   336  // to this function originate within the expulsion engine on the node with the
   337  // given origin ID.
   338  func (e *Engine) process(originID flow.Identifier, event interface{}) error {
   339  	select {
   340  	case <-e.ComponentManager.ShutdownSignal():
   341  		return component.ErrComponentShutdown
   342  	default:
   343  	}
   344  
   345  	switch event.(type) {
   346  	case *flow.ExecutionReceipt:
   347  		err := e.messageHandler.Process(originID, event)
   348  		e.executionReceiptsNotifier.Notify()
   349  		return err
   350  	case *model.Block:
   351  		err := e.messageHandler.Process(originID, event)
   352  		e.finalizedBlockNotifier.Notify()
   353  		return err
   354  	default:
   355  		return fmt.Errorf("invalid event type (%T)", event)
   356  	}
   357  }
   358  
   359  // SubmitLocal submits an event originating on the local node.
   360  func (e *Engine) SubmitLocal(event interface{}) {
   361  	err := e.process(e.me.NodeID(), event)
   362  	if err != nil {
   363  		engine.LogError(e.log, err)
   364  	}
   365  }
   366  
   367  // Submit submits the given event from the node with the given origin ID
   368  // for processing in a non-blocking manner. It returns instantly and logs
   369  // a potential processing error internally when done.
   370  func (e *Engine) Submit(_ channels.Channel, originID flow.Identifier, event interface{}) {
   371  	err := e.process(originID, event)
   372  	if err != nil {
   373  		engine.LogError(e.log, err)
   374  	}
   375  }
   376  
   377  // ProcessLocal processes an event originating on the local node.
   378  func (e *Engine) ProcessLocal(event interface{}) error {
   379  	return e.process(e.me.NodeID(), event)
   380  }
   381  
   382  // Process processes the given event from the node with the given origin ID in
   383  // a blocking manner. It returns the potential processing error when done.
   384  func (e *Engine) Process(_ channels.Channel, originID flow.Identifier, event interface{}) error {
   385  	return e.process(originID, event)
   386  }
   387  
   388  // OnFinalizedBlock is called by the follower engine after a block has been finalized and the state has been updated
   389  func (e *Engine) OnFinalizedBlock(hb *model.Block) {
   390  	_ = e.ProcessLocal(hb)
   391  }
   392  
   393  // processBlock handles an incoming finalized block.
   394  func (e *Engine) processFinalizedBlock(blockID flow.Identifier) error {
   395  
   396  	// TODO: consider using storage.Index.ByBlockID, the index contains collection id and seals ID
   397  	block, err := e.blocks.ByID(blockID)
   398  	if err != nil {
   399  		return fmt.Errorf("failed to lookup block: %w", err)
   400  	}
   401  
   402  	// FIX: we can't index guarantees here, as we might have more than one block
   403  	// with the same collection as long as it is not finalized
   404  
   405  	// TODO: substitute an indexer module as layer between engine and storage
   406  
   407  	// index the block storage with each of the collection guarantee
   408  	err = e.blocks.IndexBlockForCollections(block.Header.ID(), flow.GetIDs(block.Payload.Guarantees))
   409  	if err != nil {
   410  		return fmt.Errorf("could not index block for collections: %w", err)
   411  	}
   412  
   413  	// loop through seals and index ID -> result ID
   414  	for _, seal := range block.Payload.Seals {
   415  		err := e.executionResults.Index(seal.BlockID, seal.ResultID)
   416  		if err != nil {
   417  			return fmt.Errorf("could not index block for execution result: %w", err)
   418  		}
   419  	}
   420  
   421  	// skip requesting collections, if this block is below the last full block height
   422  	// this means that either we have already received these collections, or the block
   423  	// may contain unverifiable guarantees (in case this node has just joined the network)
   424  	lastFullBlockHeight, err := e.blocks.GetLastFullBlockHeight()
   425  	if err != nil {
   426  		return fmt.Errorf("could not get last full block height: %w", err)
   427  	}
   428  
   429  	if block.Header.Height <= lastFullBlockHeight {
   430  		e.log.Info().Msgf("skipping requesting collections for finalized block below last full block height (%d<=%d)", block.Header.Height, lastFullBlockHeight)
   431  		return nil
   432  	}
   433  
   434  	// queue requesting each of the collections from the collection node
   435  	e.requestCollectionsInFinalizedBlock(block.Payload.Guarantees)
   436  
   437  	e.collectionExecutedMetric.BlockFinalized(block)
   438  
   439  	return nil
   440  }
   441  
   442  func (e *Engine) handleExecutionReceipt(_ flow.Identifier, r *flow.ExecutionReceipt) error {
   443  	// persist the execution receipt locally, storing will also index the receipt
   444  	err := e.executionReceipts.Store(r)
   445  	if err != nil {
   446  		return fmt.Errorf("failed to store execution receipt: %w", err)
   447  	}
   448  
   449  	e.collectionExecutedMetric.ExecutionReceiptReceived(r)
   450  	return nil
   451  }
   452  
   453  // OnCollection handles the response of the a collection request made earlier when a block was received.
   454  // No errors expected during normal operations.
   455  func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) {
   456  	collection, ok := entity.(*flow.Collection)
   457  	if !ok {
   458  		e.log.Error().Msgf("invalid entity type (%T)", entity)
   459  		return
   460  	}
   461  
   462  	err := indexer.HandleCollection(collection, e.collections, e.transactions, e.log, e.collectionExecutedMetric)
   463  	if err != nil {
   464  		e.log.Error().Err(err).Msg("could not handle collection")
   465  		return
   466  	}
   467  }
   468  
   469  // requestMissingCollections requests missing collections for all blocks in the local db storage once at startup
   470  func (e *Engine) requestMissingCollections(ctx context.Context) error {
   471  
   472  	var startHeight, endHeight uint64
   473  
   474  	// get the height of the last block for which all collections were received
   475  	lastFullHeight, err := e.blocks.GetLastFullBlockHeight()
   476  	if err != nil {
   477  		return fmt.Errorf("failed to complete requests for missing collections: %w", err)
   478  	}
   479  
   480  	// start from the next block
   481  	startHeight = lastFullHeight + 1
   482  
   483  	// end at the finalized block
   484  	finalBlk, err := e.state.Final().Head()
   485  	if err != nil {
   486  		return err
   487  	}
   488  	endHeight = finalBlk.Height
   489  
   490  	e.log.Info().
   491  		Uint64("start_height", startHeight).
   492  		Uint64("end_height", endHeight).
   493  		Msg("starting collection catchup")
   494  
   495  	// collect all missing collection ids in a map
   496  	var missingCollMap = make(map[flow.Identifier]struct{})
   497  
   498  	// iterate through the complete chain and request the missing collections
   499  	for i := startHeight; i <= endHeight; i++ {
   500  
   501  		// if deadline exceeded or someone cancelled the context
   502  		if ctx.Err() != nil {
   503  			return fmt.Errorf("failed to complete requests for missing collections: %w", ctx.Err())
   504  		}
   505  
   506  		missingColls, err := e.missingCollectionsAtHeight(i)
   507  		if err != nil {
   508  			return fmt.Errorf("failed to retrieve missing collections by height %d during collection catchup: %w", i, err)
   509  		}
   510  
   511  		// request the missing collections
   512  		e.requestCollectionsInFinalizedBlock(missingColls)
   513  
   514  		// add them to the missing collection id map to track later
   515  		for _, cg := range missingColls {
   516  			missingCollMap[cg.CollectionID] = struct{}{}
   517  		}
   518  	}
   519  
   520  	// if no collections were found to be missing we are done.
   521  	if len(missingCollMap) == 0 {
   522  		// nothing more to do
   523  		e.log.Info().Msg("no missing collections found")
   524  		return nil
   525  	}
   526  
   527  	// the collection catchup needs to happen ASAP when the node starts up. Hence, force the requester to dispatch all request
   528  	e.request.Force()
   529  
   530  	// track progress of retrieving all the missing collections by polling the db periodically
   531  	ticker := time.NewTicker(defaultCollectionCatchupDBPollInterval)
   532  	defer ticker.Stop()
   533  
   534  	// while there are still missing collections, keep polling
   535  	for len(missingCollMap) > 0 {
   536  		select {
   537  		case <-ctx.Done():
   538  			// context may have expired
   539  			return fmt.Errorf("failed to complete collection retreival: %w", ctx.Err())
   540  		case <-ticker.C:
   541  
   542  			// log progress
   543  			e.log.Info().
   544  				Int("total_missing_collections", len(missingCollMap)).
   545  				Msg("retrieving missing collections...")
   546  
   547  			var foundColls []flow.Identifier
   548  			// query db to find if collections are still missing
   549  			for collID := range missingCollMap {
   550  				found, err := e.haveCollection(collID)
   551  				if err != nil {
   552  					return err
   553  				}
   554  				// if collection found in local db, remove it from missingColls later
   555  				if found {
   556  					foundColls = append(foundColls, collID)
   557  				}
   558  			}
   559  
   560  			// update the missingColls list by removing collections that have now been received
   561  			for _, c := range foundColls {
   562  				delete(missingCollMap, c)
   563  			}
   564  		}
   565  	}
   566  
   567  	e.log.Info().Msg("collection catchup done")
   568  	return nil
   569  }
   570  
   571  // updateLastFullBlockReceivedIndex finds the next highest height where all previous collections
   572  // have been indexed, and updates the LastFullBlockReceived index to that height
   573  func (e *Engine) updateLastFullBlockReceivedIndex() error {
   574  	lastFullHeight, err := e.blocks.GetLastFullBlockHeight()
   575  	if err != nil {
   576  		return fmt.Errorf("failed to get last full block height: %w", err)
   577  	}
   578  
   579  	finalBlk, err := e.state.Final().Head()
   580  	if err != nil {
   581  		return fmt.Errorf("failed to get finalized block: %w", err)
   582  	}
   583  	finalizedHeight := finalBlk.Height
   584  
   585  	// track the latest contiguous full height
   586  	newLastFullHeight, err := e.lowestHeightWithMissingCollection(lastFullHeight, finalizedHeight)
   587  	if err != nil {
   588  		return fmt.Errorf("failed to find last full block received height: %w", err)
   589  	}
   590  
   591  	// if more contiguous blocks are now complete, update db
   592  	if newLastFullHeight > lastFullHeight {
   593  		err = e.blocks.UpdateLastFullBlockHeight(newLastFullHeight)
   594  		if err != nil {
   595  			return fmt.Errorf("failed to update last full block height")
   596  		}
   597  
   598  		e.collectionExecutedMetric.UpdateLastFullBlockHeight(newLastFullHeight)
   599  
   600  		e.log.Debug().
   601  			Uint64("last_full_block_height", newLastFullHeight).
   602  			Msg("updated LastFullBlockReceived index")
   603  	}
   604  
   605  	return nil
   606  }
   607  
   608  // lowestHeightWithMissingCollection returns the lowest height that is missing collections
   609  func (e *Engine) lowestHeightWithMissingCollection(lastFullHeight, finalizedHeight uint64) (uint64, error) {
   610  	newLastFullHeight := lastFullHeight
   611  
   612  	for i := lastFullHeight + 1; i <= finalizedHeight; i++ {
   613  		missingColls, err := e.missingCollectionsAtHeight(i)
   614  		if err != nil {
   615  			return 0, err
   616  		}
   617  
   618  		// return when we find the first block with missing collections
   619  		if len(missingColls) > 0 {
   620  			return newLastFullHeight, nil
   621  		}
   622  
   623  		newLastFullHeight = i
   624  	}
   625  
   626  	return newLastFullHeight, nil
   627  }
   628  
   629  // checkMissingCollections requests missing collections if the number of blocks missing collections
   630  // have reached the defaultMissingCollsForBlkThreshold value.
   631  func (e *Engine) checkMissingCollections() error {
   632  	lastFullHeight, err := e.blocks.GetLastFullBlockHeight()
   633  	if err != nil {
   634  		return err
   635  	}
   636  
   637  	finalBlk, err := e.state.Final().Head()
   638  	if err != nil {
   639  		return fmt.Errorf("failed to get finalized block: %w", err)
   640  	}
   641  	finalizedHeight := finalBlk.Height
   642  
   643  	// number of blocks with missing collections
   644  	incompleteBlksCnt := 0
   645  
   646  	// collect all missing collections
   647  	var allMissingColls []*flow.CollectionGuarantee
   648  
   649  	// start from the next block till we either hit the finalized block or cross the max collection missing threshold
   650  	for i := lastFullHeight + 1; i <= finalizedHeight && incompleteBlksCnt < defaultMissingCollsForBlkThreshold; i++ {
   651  		missingColls, err := e.missingCollectionsAtHeight(i)
   652  		if err != nil {
   653  			return fmt.Errorf("failed to find missing collections at height %d: %w", i, err)
   654  		}
   655  
   656  		if len(missingColls) == 0 {
   657  			continue
   658  		}
   659  
   660  		incompleteBlksCnt++
   661  
   662  		allMissingColls = append(allMissingColls, missingColls...)
   663  	}
   664  
   665  	// additionally, if more than threshold blocks have missing collections OR collections are
   666  	// missing since defaultMissingCollsForAgeThreshold, re-request those collections
   667  	if incompleteBlksCnt >= defaultMissingCollsForBlkThreshold ||
   668  		(finalizedHeight-lastFullHeight) > defaultMissingCollsForAgeThreshold {
   669  		// warn log since this should generally not happen
   670  		e.log.Warn().
   671  			Uint64("finalized_height", finalizedHeight).
   672  			Uint64("last_full_blk_height", lastFullHeight).
   673  			Int("missing_collection_blk_count", incompleteBlksCnt).
   674  			Int("missing_collection_count", len(allMissingColls)).
   675  			Msg("re-requesting missing collections")
   676  		e.requestCollectionsInFinalizedBlock(allMissingColls)
   677  	}
   678  
   679  	return nil
   680  }
   681  
   682  // missingCollectionsAtHeight returns all missing collection guarantees at a given height
   683  func (e *Engine) missingCollectionsAtHeight(h uint64) ([]*flow.CollectionGuarantee, error) {
   684  	block, err := e.blocks.ByHeight(h)
   685  	if err != nil {
   686  		return nil, fmt.Errorf("failed to retrieve block by height %d: %w", h, err)
   687  	}
   688  
   689  	var missingColls []*flow.CollectionGuarantee
   690  	for _, guarantee := range block.Payload.Guarantees {
   691  		collID := guarantee.CollectionID
   692  		found, err := e.haveCollection(collID)
   693  		if err != nil {
   694  			return nil, err
   695  		}
   696  		if !found {
   697  			missingColls = append(missingColls, guarantee)
   698  		}
   699  	}
   700  	return missingColls, nil
   701  }
   702  
   703  // haveCollection looks up the collection from the collection db with collID
   704  func (e *Engine) haveCollection(collID flow.Identifier) (bool, error) {
   705  	_, err := e.collections.LightByID(collID)
   706  	if err == nil {
   707  		return true, nil
   708  	}
   709  	if errors.Is(err, storage.ErrNotFound) {
   710  		return false, nil
   711  	}
   712  	return false, fmt.Errorf("failed to retrieve collection %s: %w", collID.String(), err)
   713  }
   714  
   715  // requestCollectionsInFinalizedBlock registers collection requests with the requester engine
   716  func (e *Engine) requestCollectionsInFinalizedBlock(missingColls []*flow.CollectionGuarantee) {
   717  	for _, cg := range missingColls {
   718  		guarantors, err := protocol.FindGuarantors(e.state, cg)
   719  		if err != nil {
   720  			// failed to find guarantors for guarantees contained in a finalized block is fatal error
   721  			e.log.Fatal().Err(err).Msgf("could not find guarantors for guarantee %v", cg.ID())
   722  		}
   723  		e.request.EntityByID(cg.ID(), filter.HasNodeID(guarantors...))
   724  	}
   725  }