github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/state_synchronization/indexer/indexer.go (about)

     1  package indexer
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/module"
    12  	"github.com/onflow/flow-go/module/component"
    13  	"github.com/onflow/flow-go/module/executiondatasync/execution_data"
    14  	"github.com/onflow/flow-go/module/executiondatasync/execution_data/cache"
    15  	"github.com/onflow/flow-go/module/irrecoverable"
    16  	"github.com/onflow/flow-go/module/jobqueue"
    17  	"github.com/onflow/flow-go/module/state_synchronization"
    18  	"github.com/onflow/flow-go/module/state_synchronization/requester/jobs"
    19  	"github.com/onflow/flow-go/storage"
    20  )
    21  
    22  const (
    23  	workersCount = 1 // how many workers will concurrently process the tasks in the jobqueue
    24  	searchAhead  = 1 // how many block heights ahead of the current will be requested and tasked for jobqueue
    25  
    26  	// fetchTimeout is the timeout for retrieving execution data from the datastore
    27  	// This is required by the execution data reader, but in practice, this isn't needed
    28  	// here since the data is in a local db.
    29  	fetchTimeout = 30 * time.Second
    30  )
    31  
    32  // ErrIndexNotInitialized is returned when the indexer is not initialized
    33  //
    34  // This generally indicates that the index databases are still being initialized, and trying again
    35  // later may succeed
    36  var ErrIndexNotInitialized = errors.New("index not initialized")
    37  
    38  var _ state_synchronization.IndexReporter = (*Indexer)(nil)
    39  
    40  // Indexer handles ingestion of new execution data available and uses the execution data indexer module
    41  // to index the data.
    42  // The processing of new available data is done by creating a jobqueue that uses the execution data reader to
    43  // obtain new jobs. The worker also implements the `highestConsecutiveHeight` method which is used by the execution
    44  // data reader, so it doesn't surpass the highest sealed block height when fetching the data.
    45  // The execution state worker has a callback that is used by the upstream queues which download new execution data to
    46  // notify new data is available and kick off indexing.
    47  type Indexer struct {
    48  	component.Component
    49  	log             zerolog.Logger
    50  	exeDataReader   *jobs.ExecutionDataReader
    51  	exeDataNotifier engine.Notifier
    52  	indexer         *IndexerCore
    53  	jobConsumer     *jobqueue.ComponentConsumer
    54  	registers       storage.RegisterIndex
    55  }
    56  
    57  // NewIndexer creates a new execution worker.
    58  func NewIndexer(
    59  	log zerolog.Logger,
    60  	initHeight uint64,
    61  	registers storage.RegisterIndex,
    62  	indexer *IndexerCore,
    63  	executionCache *cache.ExecutionDataCache,
    64  	executionDataLatestHeight func() (uint64, error),
    65  	processedHeight storage.ConsumerProgress,
    66  ) (*Indexer, error) {
    67  	r := &Indexer{
    68  		log:             log.With().Str("module", "execution_indexer").Logger(),
    69  		exeDataNotifier: engine.NewNotifier(),
    70  		indexer:         indexer,
    71  		registers:       registers,
    72  	}
    73  
    74  	r.exeDataReader = jobs.NewExecutionDataReader(executionCache, fetchTimeout, executionDataLatestHeight)
    75  
    76  	// create a jobqueue that will process new available block execution data. The `exeDataNotifier` is used to
    77  	// signal new work, which is being triggered on the `OnExecutionData` handler.
    78  	jobConsumer, err := jobqueue.NewComponentConsumer(
    79  		r.log,
    80  		r.exeDataNotifier.Channel(),
    81  		processedHeight,
    82  		r.exeDataReader,
    83  		initHeight,
    84  		r.processExecutionData,
    85  		workersCount,
    86  		searchAhead,
    87  	)
    88  	if err != nil {
    89  		return nil, fmt.Errorf("error creating execution data jobqueue: %w", err)
    90  	}
    91  
    92  	r.jobConsumer = jobConsumer
    93  
    94  	r.Component = r.jobConsumer
    95  
    96  	return r, nil
    97  }
    98  
    99  // Start the worker jobqueue to consume the available data.
   100  func (i *Indexer) Start(ctx irrecoverable.SignalerContext) {
   101  	i.exeDataReader.AddContext(ctx)
   102  	i.Component.Start(ctx)
   103  }
   104  
   105  // LowestIndexedHeight returns the lowest height indexed by the execution indexer.
   106  func (i *Indexer) LowestIndexedHeight() (uint64, error) {
   107  	// TODO: use a separate value to track the lowest indexed height. We're using the registers db's
   108  	// value here to start because it's convenient. When pruning support is added, this will need to
   109  	// be updated.
   110  	return i.registers.FirstHeight(), nil
   111  }
   112  
   113  // HighestIndexedHeight returns the highest height indexed by the execution indexer.
   114  func (i *Indexer) HighestIndexedHeight() (uint64, error) {
   115  	select {
   116  	case <-i.jobConsumer.Ready():
   117  	default:
   118  		// LastProcessedIndex is not meaningful until the component has completed startup
   119  		return 0, fmt.Errorf("HighestIndexedHeight must not be called before the component is ready")
   120  	}
   121  
   122  	// The jobqueue maintains its own highest indexed height value, separate from the register db.
   123  	// Since jobs are only marked complete when ALL data is indexed, the lastProcessedIndex must
   124  	// be strictly less than or equal to the register db's LatestHeight.
   125  	return i.jobConsumer.LastProcessedIndex(), nil
   126  }
   127  
   128  // OnExecutionData is used to notify when new execution data is downloaded by the execution data requester jobqueue.
   129  func (i *Indexer) OnExecutionData(_ *execution_data.BlockExecutionDataEntity) {
   130  	i.exeDataNotifier.Notify()
   131  }
   132  
   133  // processExecutionData is a worker method that is being called by the jobqueue when processing a new job.
   134  // The job data contains execution data which we provide to the execution indexer to index it.
   135  func (i *Indexer) processExecutionData(ctx irrecoverable.SignalerContext, job module.Job, done func()) {
   136  	entry, err := jobs.JobToBlockEntry(job)
   137  	if err != nil {
   138  		i.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error converting execution data job")
   139  		ctx.Throw(err)
   140  	}
   141  
   142  	err = i.indexer.IndexBlockData(entry.ExecutionData)
   143  	if err != nil {
   144  		i.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error during execution data index processing job")
   145  		ctx.Throw(err)
   146  	}
   147  
   148  	done()
   149  }