github.com/onflow/flow-go@v0.33.17/module/state_synchronization/indexer/indexer_core.go (about) 1 package indexer 2 3 import ( 4 "errors" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 "golang.org/x/sync/errgroup" 10 11 "github.com/onflow/flow-go/ledger" 12 "github.com/onflow/flow-go/ledger/common/convert" 13 "github.com/onflow/flow-go/model/flow" 14 "github.com/onflow/flow-go/module" 15 "github.com/onflow/flow-go/module/executiondatasync/execution_data" 16 "github.com/onflow/flow-go/storage" 17 bstorage "github.com/onflow/flow-go/storage/badger" 18 "github.com/onflow/flow-go/utils/logging" 19 ) 20 21 // IndexerCore indexes the execution state. 22 type IndexerCore struct { 23 log zerolog.Logger 24 metrics module.ExecutionStateIndexerMetrics 25 26 registers storage.RegisterIndex 27 headers storage.Headers 28 events storage.Events 29 collections storage.Collections 30 transactions storage.Transactions 31 results storage.LightTransactionResults 32 batcher bstorage.BatchBuilder 33 34 collectionExecutedMetric module.CollectionExecutedMetric 35 } 36 37 // New execution state indexer used to ingest block execution data and index it by height. 38 // The passed RegisterIndex storage must be populated to include the first and last height otherwise the indexer 39 // won't be initialized to ensure we have bootstrapped the storage first. 40 func New( 41 log zerolog.Logger, 42 metrics module.ExecutionStateIndexerMetrics, 43 batcher bstorage.BatchBuilder, 44 registers storage.RegisterIndex, 45 headers storage.Headers, 46 events storage.Events, 47 collections storage.Collections, 48 transactions storage.Transactions, 49 results storage.LightTransactionResults, 50 collectionExecutedMetric module.CollectionExecutedMetric, 51 ) (*IndexerCore, error) { 52 log = log.With().Str("component", "execution_indexer").Logger() 53 metrics.InitializeLatestHeight(registers.LatestHeight()) 54 55 log.Info(). 56 Uint64("first_height", registers.FirstHeight()). 57 Uint64("latest_height", registers.LatestHeight()). 58 Msg("indexer initialized") 59 60 return &IndexerCore{ 61 log: log, 62 metrics: metrics, 63 batcher: batcher, 64 registers: registers, 65 headers: headers, 66 events: events, 67 collections: collections, 68 transactions: transactions, 69 results: results, 70 collectionExecutedMetric: collectionExecutedMetric, 71 }, nil 72 } 73 74 // RegisterValue retrieves register values by the register IDs at the provided block height. 75 // Even if the register wasn't indexed at the provided height, returns the highest height the register was indexed at. 76 // If a register is not found it will return a nil value and not an error. 77 // Expected errors: 78 // - storage.ErrHeightNotIndexed if the given height was not indexed yet or lower than the first indexed height. 79 func (c *IndexerCore) RegisterValue(ID flow.RegisterID, height uint64) (flow.RegisterValue, error) { 80 value, err := c.registers.Get(ID, height) 81 if err != nil { 82 // only return an error if the error doesn't match the not found error, since we have 83 // to gracefully handle not found values and instead assign nil, that is because the script executor 84 // expects that behaviour 85 if errors.Is(err, storage.ErrNotFound) { 86 return nil, nil 87 } 88 89 return nil, err 90 } 91 92 return value, nil 93 } 94 95 // IndexBlockData indexes all execution block data by height. 96 // This method shouldn't be used concurrently. 97 // Expected errors: 98 // - storage.ErrNotFound if the block for execution data was not found 99 func (c *IndexerCore) IndexBlockData(data *execution_data.BlockExecutionDataEntity) error { 100 block, err := c.headers.ByBlockID(data.BlockID) 101 if err != nil { 102 return fmt.Errorf("could not get the block by ID %s: %w", data.BlockID, err) 103 } 104 105 lg := c.log.With(). 106 Hex("block_id", logging.ID(data.BlockID)). 107 Uint64("height", block.Height). 108 Logger() 109 110 lg.Debug().Msgf("indexing new block") 111 112 // the height we are indexing must be exactly one bigger or same as the latest height indexed from the storage 113 latest := c.registers.LatestHeight() 114 if block.Height != latest+1 && block.Height != latest { 115 return fmt.Errorf("must index block data with the next height %d, but got %d", latest+1, block.Height) 116 } 117 118 // allow rerunning the indexer for same height since we are fetching height from register storage, but there are other storages 119 // for indexing resources which might fail to update the values, so this enables rerunning and reindexing those resources 120 if block.Height == latest { 121 lg.Warn().Msg("reindexing block data") 122 c.metrics.BlockReindexed() 123 } 124 125 start := time.Now() 126 g := errgroup.Group{} 127 128 var eventCount, resultCount, registerCount int 129 g.Go(func() error { 130 start := time.Now() 131 132 events := make([]flow.Event, 0) 133 results := make([]flow.LightTransactionResult, 0) 134 for _, chunk := range data.ChunkExecutionDatas { 135 events = append(events, chunk.Events...) 136 results = append(results, chunk.TransactionResults...) 137 } 138 139 batch := bstorage.NewBatch(c.batcher) 140 141 err := c.events.BatchStore(data.BlockID, []flow.EventsList{events}, batch) 142 if err != nil { 143 return fmt.Errorf("could not index events at height %d: %w", block.Height, err) 144 } 145 146 err = c.results.BatchStore(data.BlockID, results, batch) 147 if err != nil { 148 return fmt.Errorf("could not index transaction results at height %d: %w", block.Height, err) 149 } 150 151 batch.Flush() 152 if err != nil { 153 return fmt.Errorf("batch flush error: %w", err) 154 } 155 156 eventCount = len(events) 157 resultCount = len(results) 158 159 lg.Debug(). 160 Int("event_count", eventCount). 161 Int("result_count", resultCount). 162 Dur("duration_ms", time.Since(start)). 163 Msg("indexed badger data") 164 165 return nil 166 }) 167 168 g.Go(func() error { 169 start := time.Now() 170 171 // index all collections except the system chunk 172 // Note: the access ingestion engine also indexes collections, starting when the block is 173 // finalized. This process can fall behind due to the node being offline, resource issues 174 // or network congestion. This indexer ensures that collections are never farther behind 175 // than the latest indexed block. Calling the collection handler with a collection that 176 // has already been indexed is a noop. 177 indexedCount := 0 178 if len(data.ChunkExecutionDatas) > 0 { 179 for _, chunk := range data.ChunkExecutionDatas[0 : len(data.ChunkExecutionDatas)-1] { 180 err := HandleCollection(chunk.Collection, c.collections, c.transactions, c.log, c.collectionExecutedMetric) 181 if err != nil { 182 return fmt.Errorf("could not handle collection") 183 } 184 indexedCount++ 185 } 186 } 187 188 lg.Debug(). 189 Int("collection_count", indexedCount). 190 Dur("duration_ms", time.Since(start)). 191 Msg("indexed collections") 192 193 return nil 194 }) 195 196 g.Go(func() error { 197 start := time.Now() 198 199 // we are iterating all the registers and overwrite any existing register at the same path 200 // this will make sure if we have multiple register changes only the last change will get persisted 201 // if block has two chucks: 202 // first chunk updates: { X: 1, Y: 2 } 203 // second chunk updates: { X: 2 } 204 // then we should persist only {X: 2: Y: 2} 205 payloads := make(map[ledger.Path]*ledger.Payload) 206 for _, chunk := range data.ChunkExecutionDatas { 207 update := chunk.TrieUpdate 208 if update != nil { 209 // this should never happen but we check anyway 210 if len(update.Paths) != len(update.Payloads) { 211 return fmt.Errorf("update paths length is %d and payloads length is %d and they don't match", len(update.Paths), len(update.Payloads)) 212 } 213 214 for i, path := range update.Paths { 215 payloads[path] = update.Payloads[i] 216 } 217 } 218 } 219 220 err = c.indexRegisters(payloads, block.Height) 221 if err != nil { 222 return fmt.Errorf("could not index register payloads at height %d: %w", block.Height, err) 223 } 224 225 registerCount = len(payloads) 226 227 lg.Debug(). 228 Int("register_count", registerCount). 229 Dur("duration_ms", time.Since(start)). 230 Msg("indexed registers") 231 232 return nil 233 }) 234 235 err = g.Wait() 236 if err != nil { 237 return fmt.Errorf("failed to index block data at height %d: %w", block.Height, err) 238 } 239 240 c.metrics.BlockIndexed(block.Height, time.Since(start), eventCount, registerCount, resultCount) 241 lg.Debug(). 242 Dur("duration_ms", time.Since(start)). 243 Msg("indexed block data") 244 245 return nil 246 } 247 248 func (c *IndexerCore) indexRegisters(registers map[ledger.Path]*ledger.Payload, height uint64) error { 249 regEntries := make(flow.RegisterEntries, 0, len(registers)) 250 251 for _, payload := range registers { 252 k, err := payload.Key() 253 if err != nil { 254 return err 255 } 256 257 id, err := convert.LedgerKeyToRegisterID(k) 258 if err != nil { 259 return err 260 } 261 262 regEntries = append(regEntries, flow.RegisterEntry{ 263 Key: id, 264 Value: payload.Value(), 265 }) 266 } 267 268 return c.registers.Store(regEntries, height) 269 } 270 271 // HandleCollection handles the response of the a collection request made earlier when a block was received. 272 // No errors expected during normal operations. 273 func HandleCollection( 274 collection *flow.Collection, 275 collections storage.Collections, 276 transactions storage.Transactions, 277 logger zerolog.Logger, 278 collectionExecutedMetric module.CollectionExecutedMetric, 279 ) error { 280 281 light := collection.Light() 282 283 collectionExecutedMetric.CollectionFinalized(light) 284 collectionExecutedMetric.CollectionExecuted(light) 285 286 // FIX: we can't index guarantees here, as we might have more than one block 287 // with the same collection as long as it is not finalized 288 289 // store the light collection (collection minus the transaction body - those are stored separately) 290 // and add transaction ids as index 291 err := collections.StoreLightAndIndexByTransaction(&light) 292 if err != nil { 293 // ignore collection if already seen 294 if errors.Is(err, storage.ErrAlreadyExists) { 295 logger.Debug(). 296 Hex("collection_id", logging.Entity(light)). 297 Msg("collection is already seen") 298 return nil 299 } 300 return err 301 } 302 303 // now store each of the transaction body 304 for _, tx := range collection.Transactions { 305 err := transactions.Store(tx) 306 if err != nil { 307 return fmt.Errorf("could not store transaction (%x): %w", tx.ID(), err) 308 } 309 } 310 311 return nil 312 }