github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/state_synchronization/indexer/indexer.go (about) 1 package indexer 2 3 import ( 4 "errors" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 10 "github.com/onflow/flow-go/engine" 11 "github.com/onflow/flow-go/module" 12 "github.com/onflow/flow-go/module/component" 13 "github.com/onflow/flow-go/module/executiondatasync/execution_data" 14 "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" 15 "github.com/onflow/flow-go/module/irrecoverable" 16 "github.com/onflow/flow-go/module/jobqueue" 17 "github.com/onflow/flow-go/module/state_synchronization" 18 "github.com/onflow/flow-go/module/state_synchronization/requester/jobs" 19 "github.com/onflow/flow-go/storage" 20 ) 21 22 const ( 23 workersCount = 1 // how many workers will concurrently process the tasks in the jobqueue 24 searchAhead = 1 // how many block heights ahead of the current will be requested and tasked for jobqueue 25 26 // fetchTimeout is the timeout for retrieving execution data from the datastore 27 // This is required by the execution data reader, but in practice, this isn't needed 28 // here since the data is in a local db. 29 fetchTimeout = 30 * time.Second 30 ) 31 32 // ErrIndexNotInitialized is returned when the indexer is not initialized 33 // 34 // This generally indicates that the index databases are still being initialized, and trying again 35 // later may succeed 36 var ErrIndexNotInitialized = errors.New("index not initialized") 37 38 var _ state_synchronization.IndexReporter = (*Indexer)(nil) 39 40 // Indexer handles ingestion of new execution data available and uses the execution data indexer module 41 // to index the data. 42 // The processing of new available data is done by creating a jobqueue that uses the execution data reader to 43 // obtain new jobs. The worker also implements the `highestConsecutiveHeight` method which is used by the execution 44 // data reader, so it doesn't surpass the highest sealed block height when fetching the data. 45 // The execution state worker has a callback that is used by the upstream queues which download new execution data to 46 // notify new data is available and kick off indexing. 47 type Indexer struct { 48 component.Component 49 log zerolog.Logger 50 exeDataReader *jobs.ExecutionDataReader 51 exeDataNotifier engine.Notifier 52 indexer *IndexerCore 53 jobConsumer *jobqueue.ComponentConsumer 54 registers storage.RegisterIndex 55 } 56 57 // NewIndexer creates a new execution worker. 58 func NewIndexer( 59 log zerolog.Logger, 60 initHeight uint64, 61 registers storage.RegisterIndex, 62 indexer *IndexerCore, 63 executionCache *cache.ExecutionDataCache, 64 executionDataLatestHeight func() (uint64, error), 65 processedHeight storage.ConsumerProgress, 66 ) (*Indexer, error) { 67 r := &Indexer{ 68 log: log.With().Str("module", "execution_indexer").Logger(), 69 exeDataNotifier: engine.NewNotifier(), 70 indexer: indexer, 71 registers: registers, 72 } 73 74 r.exeDataReader = jobs.NewExecutionDataReader(executionCache, fetchTimeout, executionDataLatestHeight) 75 76 // create a jobqueue that will process new available block execution data. The `exeDataNotifier` is used to 77 // signal new work, which is being triggered on the `OnExecutionData` handler. 78 jobConsumer, err := jobqueue.NewComponentConsumer( 79 r.log, 80 r.exeDataNotifier.Channel(), 81 processedHeight, 82 r.exeDataReader, 83 initHeight, 84 r.processExecutionData, 85 workersCount, 86 searchAhead, 87 ) 88 if err != nil { 89 return nil, fmt.Errorf("error creating execution data jobqueue: %w", err) 90 } 91 92 r.jobConsumer = jobConsumer 93 94 r.Component = r.jobConsumer 95 96 return r, nil 97 } 98 99 // Start the worker jobqueue to consume the available data. 100 func (i *Indexer) Start(ctx irrecoverable.SignalerContext) { 101 i.exeDataReader.AddContext(ctx) 102 i.Component.Start(ctx) 103 } 104 105 // LowestIndexedHeight returns the lowest height indexed by the execution indexer. 106 func (i *Indexer) LowestIndexedHeight() (uint64, error) { 107 // TODO: use a separate value to track the lowest indexed height. We're using the registers db's 108 // value here to start because it's convenient. When pruning support is added, this will need to 109 // be updated. 110 return i.registers.FirstHeight(), nil 111 } 112 113 // HighestIndexedHeight returns the highest height indexed by the execution indexer. 114 func (i *Indexer) HighestIndexedHeight() (uint64, error) { 115 select { 116 case <-i.jobConsumer.Ready(): 117 default: 118 // LastProcessedIndex is not meaningful until the component has completed startup 119 return 0, fmt.Errorf("HighestIndexedHeight must not be called before the component is ready") 120 } 121 122 // The jobqueue maintains its own highest indexed height value, separate from the register db. 123 // Since jobs are only marked complete when ALL data is indexed, the lastProcessedIndex must 124 // be strictly less than or equal to the register db's LatestHeight. 125 return i.jobConsumer.LastProcessedIndex(), nil 126 } 127 128 // OnExecutionData is used to notify when new execution data is downloaded by the execution data requester jobqueue. 129 func (i *Indexer) OnExecutionData(_ *execution_data.BlockExecutionDataEntity) { 130 i.exeDataNotifier.Notify() 131 } 132 133 // processExecutionData is a worker method that is being called by the jobqueue when processing a new job. 134 // The job data contains execution data which we provide to the execution indexer to index it. 135 func (i *Indexer) processExecutionData(ctx irrecoverable.SignalerContext, job module.Job, done func()) { 136 entry, err := jobs.JobToBlockEntry(job) 137 if err != nil { 138 i.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error converting execution data job") 139 ctx.Throw(err) 140 } 141 142 err = i.indexer.IndexBlockData(entry.ExecutionData) 143 if err != nil { 144 i.log.Error().Err(err).Str("job_id", string(job.ID())).Msg("error during execution data index processing job") 145 ctx.Throw(err) 146 } 147 148 done() 149 }