github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/state_synchronization/indexer/indexer_core.go (about) 1 package indexer 2 3 import ( 4 "errors" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 "golang.org/x/sync/errgroup" 10 11 "github.com/onflow/flow-go/fvm/storage/derived" 12 "github.com/onflow/flow-go/ledger" 13 "github.com/onflow/flow-go/ledger/common/convert" 14 "github.com/onflow/flow-go/model/flow" 15 "github.com/onflow/flow-go/module" 16 "github.com/onflow/flow-go/module/executiondatasync/execution_data" 17 "github.com/onflow/flow-go/storage" 18 bstorage "github.com/onflow/flow-go/storage/badger" 19 "github.com/onflow/flow-go/utils/logging" 20 ) 21 22 // IndexerCore indexes the execution state. 23 type IndexerCore struct { 24 log zerolog.Logger 25 metrics module.ExecutionStateIndexerMetrics 26 27 registers storage.RegisterIndex 28 headers storage.Headers 29 events storage.Events 30 collections storage.Collections 31 transactions storage.Transactions 32 results storage.LightTransactionResults 33 batcher bstorage.BatchBuilder 34 35 collectionExecutedMetric module.CollectionExecutedMetric 36 37 derivedChainData *derived.DerivedChainData 38 serviceAddress flow.Address 39 } 40 41 // New execution state indexer used to ingest block execution data and index it by height. 42 // The passed RegisterIndex storage must be populated to include the first and last height otherwise the indexer 43 // won't be initialized to ensure we have bootstrapped the storage first. 44 func New( 45 log zerolog.Logger, 46 metrics module.ExecutionStateIndexerMetrics, 47 batcher bstorage.BatchBuilder, 48 registers storage.RegisterIndex, 49 headers storage.Headers, 50 events storage.Events, 51 collections storage.Collections, 52 transactions storage.Transactions, 53 results storage.LightTransactionResults, 54 chain flow.Chain, 55 derivedChainData *derived.DerivedChainData, 56 collectionExecutedMetric module.CollectionExecutedMetric, 57 ) (*IndexerCore, error) { 58 log = log.With().Str("component", "execution_indexer").Logger() 59 metrics.InitializeLatestHeight(registers.LatestHeight()) 60 61 log.Info(). 62 Uint64("first_height", registers.FirstHeight()). 63 Uint64("latest_height", registers.LatestHeight()). 64 Msg("indexer initialized") 65 66 return &IndexerCore{ 67 log: log, 68 metrics: metrics, 69 batcher: batcher, 70 registers: registers, 71 headers: headers, 72 collections: collections, 73 transactions: transactions, 74 events: events, 75 results: results, 76 serviceAddress: chain.ServiceAddress(), 77 derivedChainData: derivedChainData, 78 79 collectionExecutedMetric: collectionExecutedMetric, 80 }, nil 81 } 82 83 // RegisterValue retrieves register values by the register IDs at the provided block height. 84 // Even if the register wasn't indexed at the provided height, returns the highest height the register was indexed at. 85 // If a register is not found it will return a nil value and not an error. 86 // Expected errors: 87 // - storage.ErrHeightNotIndexed if the given height was not indexed yet or lower than the first indexed height. 88 func (c *IndexerCore) RegisterValue(ID flow.RegisterID, height uint64) (flow.RegisterValue, error) { 89 value, err := c.registers.Get(ID, height) 90 if err != nil { 91 // only return an error if the error doesn't match the not found error, since we have 92 // to gracefully handle not found values and instead assign nil, that is because the script executor 93 // expects that behaviour 94 if errors.Is(err, storage.ErrNotFound) { 95 return nil, nil 96 } 97 98 return nil, err 99 } 100 101 return value, nil 102 } 103 104 // IndexBlockData indexes all execution block data by height. 105 // This method shouldn't be used concurrently. 106 // Expected errors: 107 // - storage.ErrNotFound if the block for execution data was not found 108 func (c *IndexerCore) IndexBlockData(data *execution_data.BlockExecutionDataEntity) error { 109 header, err := c.headers.ByBlockID(data.BlockID) 110 if err != nil { 111 return fmt.Errorf("could not get the block by ID %s: %w", data.BlockID, err) 112 } 113 114 lg := c.log.With(). 115 Hex("block_id", logging.ID(data.BlockID)). 116 Uint64("height", header.Height). 117 Logger() 118 119 lg.Debug().Msgf("indexing new block") 120 121 // the height we are indexing must be exactly one bigger or same as the latest height indexed from the storage 122 latest := c.registers.LatestHeight() 123 if header.Height != latest+1 && header.Height != latest { 124 return fmt.Errorf("must index block data with the next height %d, but got %d", latest+1, header.Height) 125 } 126 127 // allow rerunning the indexer for same height since we are fetching height from register storage, but there are other storages 128 // for indexing resources which might fail to update the values, so this enables rerunning and reindexing those resources 129 if header.Height == latest { 130 lg.Warn().Msg("reindexing block data") 131 c.metrics.BlockReindexed() 132 } 133 134 start := time.Now() 135 g := errgroup.Group{} 136 137 var eventCount, resultCount, registerCount int 138 g.Go(func() error { 139 start := time.Now() 140 141 events := make([]flow.Event, 0) 142 results := make([]flow.LightTransactionResult, 0) 143 for _, chunk := range data.ChunkExecutionDatas { 144 events = append(events, chunk.Events...) 145 results = append(results, chunk.TransactionResults...) 146 } 147 148 batch := bstorage.NewBatch(c.batcher) 149 150 err := c.events.BatchStore(data.BlockID, []flow.EventsList{events}, batch) 151 if err != nil { 152 return fmt.Errorf("could not index events at height %d: %w", header.Height, err) 153 } 154 155 err = c.results.BatchStore(data.BlockID, results, batch) 156 if err != nil { 157 return fmt.Errorf("could not index transaction results at height %d: %w", header.Height, err) 158 } 159 160 batch.Flush() 161 if err != nil { 162 return fmt.Errorf("batch flush error: %w", err) 163 } 164 165 eventCount = len(events) 166 resultCount = len(results) 167 168 lg.Debug(). 169 Int("event_count", eventCount). 170 Int("result_count", resultCount). 171 Dur("duration_ms", time.Since(start)). 172 Msg("indexed badger data") 173 174 return nil 175 }) 176 177 g.Go(func() error { 178 start := time.Now() 179 180 // index all collections except the system chunk 181 // Note: the access ingestion engine also indexes collections, starting when the block is 182 // finalized. This process can fall behind due to the node being offline, resource issues 183 // or network congestion. This indexer ensures that collections are never farther behind 184 // than the latest indexed block. Calling the collection handler with a collection that 185 // has already been indexed is a noop. 186 indexedCount := 0 187 if len(data.ChunkExecutionDatas) > 0 { 188 for _, chunk := range data.ChunkExecutionDatas[0 : len(data.ChunkExecutionDatas)-1] { 189 err := HandleCollection(chunk.Collection, c.collections, c.transactions, c.log, c.collectionExecutedMetric) 190 if err != nil { 191 return fmt.Errorf("could not handle collection") 192 } 193 indexedCount++ 194 } 195 } 196 197 lg.Debug(). 198 Int("collection_count", indexedCount). 199 Dur("duration_ms", time.Since(start)). 200 Msg("indexed collections") 201 202 return nil 203 }) 204 205 g.Go(func() error { 206 start := time.Now() 207 208 // we are iterating all the registers and overwrite any existing register at the same path 209 // this will make sure if we have multiple register changes only the last change will get persisted 210 // if block has two chucks: 211 // first chunk updates: { X: 1, Y: 2 } 212 // second chunk updates: { X: 2 } 213 // then we should persist only {X: 2: Y: 2} 214 payloads := make(map[ledger.Path]*ledger.Payload) 215 events := make([]flow.Event, 0) 216 collections := make([]*flow.Collection, 0) 217 for _, chunk := range data.ChunkExecutionDatas { 218 events = append(events, chunk.Events...) 219 collections = append(collections, chunk.Collection) 220 update := chunk.TrieUpdate 221 if update != nil { 222 // this should never happen but we check anyway 223 if len(update.Paths) != len(update.Payloads) { 224 return fmt.Errorf("update paths length is %d and payloads length is %d and they don't match", len(update.Paths), len(update.Payloads)) 225 } 226 227 for i, path := range update.Paths { 228 payloads[path] = update.Payloads[i] 229 } 230 } 231 } 232 233 err = c.indexRegisters(payloads, header.Height) 234 if err != nil { 235 return fmt.Errorf("could not index register payloads at height %d: %w", header.Height, err) 236 } 237 238 err = c.updateProgramCache(header, events, collections) 239 if err != nil { 240 return fmt.Errorf("could not update program cache at height %d: %w", header.Height, err) 241 } 242 243 registerCount = len(payloads) 244 245 lg.Debug(). 246 Int("register_count", registerCount). 247 Dur("duration_ms", time.Since(start)). 248 Msg("indexed registers") 249 250 return nil 251 }) 252 253 err = g.Wait() 254 if err != nil { 255 return fmt.Errorf("failed to index block data at height %d: %w", header.Height, err) 256 } 257 258 c.metrics.BlockIndexed(header.Height, time.Since(start), eventCount, registerCount, resultCount) 259 lg.Debug(). 260 Dur("duration_ms", time.Since(start)). 261 Msg("indexed block data") 262 263 return nil 264 } 265 266 func (c *IndexerCore) updateProgramCache(header *flow.Header, events []flow.Event, collections []*flow.Collection) error { 267 if c.derivedChainData == nil { 268 return nil 269 } 270 271 derivedBlockData := c.derivedChainData.GetOrCreateDerivedBlockData( 272 header.ID(), 273 header.ParentID, 274 ) 275 276 // get a list of all contracts that were updated in this block 277 updatedContracts, err := findContractUpdates(events) 278 if err != nil { 279 return fmt.Errorf("could not find contract updates for block %d: %w", header.Height, err) 280 } 281 282 // invalidate cache entries for all modified programs 283 tx, err := derivedBlockData.NewDerivedTransactionData(0, 0) 284 if err != nil { 285 return fmt.Errorf("could not create derived transaction data for block %d: %w", header.Height, err) 286 } 287 288 tx.AddInvalidator(&accessInvalidator{ 289 programs: &programInvalidator{ 290 invalidated: updatedContracts, 291 }, 292 meterParamOverrides: &meterParamOverridesInvalidator{ 293 invalidateAll: hasAuthorizedTransaction(collections, c.serviceAddress), 294 }, 295 }) 296 297 err = tx.Commit() 298 if err != nil { 299 return fmt.Errorf("could not commit derived transaction data for block %d: %w", header.Height, err) 300 } 301 302 return nil 303 } 304 305 func (c *IndexerCore) indexRegisters(registers map[ledger.Path]*ledger.Payload, height uint64) error { 306 regEntries := make(flow.RegisterEntries, 0, len(registers)) 307 308 for _, payload := range registers { 309 k, err := payload.Key() 310 if err != nil { 311 return err 312 } 313 314 id, err := convert.LedgerKeyToRegisterID(k) 315 if err != nil { 316 return err 317 } 318 319 regEntries = append(regEntries, flow.RegisterEntry{ 320 Key: id, 321 Value: payload.Value(), 322 }) 323 } 324 325 return c.registers.Store(regEntries, height) 326 } 327 328 // HandleCollection handles the response of the a collection request made earlier when a block was received. 329 // No errors expected during normal operations. 330 func HandleCollection( 331 collection *flow.Collection, 332 collections storage.Collections, 333 transactions storage.Transactions, 334 logger zerolog.Logger, 335 collectionExecutedMetric module.CollectionExecutedMetric, 336 ) error { 337 338 light := collection.Light() 339 340 collectionExecutedMetric.CollectionFinalized(light) 341 collectionExecutedMetric.CollectionExecuted(light) 342 343 // FIX: we can't index guarantees here, as we might have more than one block 344 // with the same collection as long as it is not finalized 345 346 // store the light collection (collection minus the transaction body - those are stored separately) 347 // and add transaction ids as index 348 err := collections.StoreLightAndIndexByTransaction(&light) 349 if err != nil { 350 // ignore collection if already seen 351 if errors.Is(err, storage.ErrAlreadyExists) { 352 logger.Debug(). 353 Hex("collection_id", logging.Entity(light)). 354 Msg("collection is already seen") 355 return nil 356 } 357 return err 358 } 359 360 // now store each of the transaction body 361 for _, tx := range collection.Transactions { 362 err := transactions.Store(tx) 363 if err != nil { 364 return fmt.Errorf("could not store transaction (%x): %w", tx.ID(), err) 365 } 366 } 367 368 return nil 369 }