github.com/onflow/flow-go@v0.33.17/module/state_synchronization/indexer/indexer_core.go (about)

     1  package indexer
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  	"golang.org/x/sync/errgroup"
    10  
    11  	"github.com/onflow/flow-go/ledger"
    12  	"github.com/onflow/flow-go/ledger/common/convert"
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/module"
    15  	"github.com/onflow/flow-go/module/executiondatasync/execution_data"
    16  	"github.com/onflow/flow-go/storage"
    17  	bstorage "github.com/onflow/flow-go/storage/badger"
    18  	"github.com/onflow/flow-go/utils/logging"
    19  )
    20  
    21  // IndexerCore indexes the execution state.
    22  type IndexerCore struct {
    23  	log     zerolog.Logger
    24  	metrics module.ExecutionStateIndexerMetrics
    25  
    26  	registers    storage.RegisterIndex
    27  	headers      storage.Headers
    28  	events       storage.Events
    29  	collections  storage.Collections
    30  	transactions storage.Transactions
    31  	results      storage.LightTransactionResults
    32  	batcher      bstorage.BatchBuilder
    33  
    34  	collectionExecutedMetric module.CollectionExecutedMetric
    35  }
    36  
    37  // New execution state indexer used to ingest block execution data and index it by height.
    38  // The passed RegisterIndex storage must be populated to include the first and last height otherwise the indexer
    39  // won't be initialized to ensure we have bootstrapped the storage first.
    40  func New(
    41  	log zerolog.Logger,
    42  	metrics module.ExecutionStateIndexerMetrics,
    43  	batcher bstorage.BatchBuilder,
    44  	registers storage.RegisterIndex,
    45  	headers storage.Headers,
    46  	events storage.Events,
    47  	collections storage.Collections,
    48  	transactions storage.Transactions,
    49  	results storage.LightTransactionResults,
    50  	collectionExecutedMetric module.CollectionExecutedMetric,
    51  ) (*IndexerCore, error) {
    52  	log = log.With().Str("component", "execution_indexer").Logger()
    53  	metrics.InitializeLatestHeight(registers.LatestHeight())
    54  
    55  	log.Info().
    56  		Uint64("first_height", registers.FirstHeight()).
    57  		Uint64("latest_height", registers.LatestHeight()).
    58  		Msg("indexer initialized")
    59  
    60  	return &IndexerCore{
    61  		log:                      log,
    62  		metrics:                  metrics,
    63  		batcher:                  batcher,
    64  		registers:                registers,
    65  		headers:                  headers,
    66  		events:                   events,
    67  		collections:              collections,
    68  		transactions:             transactions,
    69  		results:                  results,
    70  		collectionExecutedMetric: collectionExecutedMetric,
    71  	}, nil
    72  }
    73  
    74  // RegisterValue retrieves register values by the register IDs at the provided block height.
    75  // Even if the register wasn't indexed at the provided height, returns the highest height the register was indexed at.
    76  // If a register is not found it will return a nil value and not an error.
    77  // Expected errors:
    78  // - storage.ErrHeightNotIndexed if the given height was not indexed yet or lower than the first indexed height.
    79  func (c *IndexerCore) RegisterValue(ID flow.RegisterID, height uint64) (flow.RegisterValue, error) {
    80  	value, err := c.registers.Get(ID, height)
    81  	if err != nil {
    82  		// only return an error if the error doesn't match the not found error, since we have
    83  		// to gracefully handle not found values and instead assign nil, that is because the script executor
    84  		// expects that behaviour
    85  		if errors.Is(err, storage.ErrNotFound) {
    86  			return nil, nil
    87  		}
    88  
    89  		return nil, err
    90  	}
    91  
    92  	return value, nil
    93  }
    94  
    95  // IndexBlockData indexes all execution block data by height.
    96  // This method shouldn't be used concurrently.
    97  // Expected errors:
    98  // - storage.ErrNotFound if the block for execution data was not found
    99  func (c *IndexerCore) IndexBlockData(data *execution_data.BlockExecutionDataEntity) error {
   100  	block, err := c.headers.ByBlockID(data.BlockID)
   101  	if err != nil {
   102  		return fmt.Errorf("could not get the block by ID %s: %w", data.BlockID, err)
   103  	}
   104  
   105  	lg := c.log.With().
   106  		Hex("block_id", logging.ID(data.BlockID)).
   107  		Uint64("height", block.Height).
   108  		Logger()
   109  
   110  	lg.Debug().Msgf("indexing new block")
   111  
   112  	// the height we are indexing must be exactly one bigger or same as the latest height indexed from the storage
   113  	latest := c.registers.LatestHeight()
   114  	if block.Height != latest+1 && block.Height != latest {
   115  		return fmt.Errorf("must index block data with the next height %d, but got %d", latest+1, block.Height)
   116  	}
   117  
   118  	// allow rerunning the indexer for same height since we are fetching height from register storage, but there are other storages
   119  	// for indexing resources which might fail to update the values, so this enables rerunning and reindexing those resources
   120  	if block.Height == latest {
   121  		lg.Warn().Msg("reindexing block data")
   122  		c.metrics.BlockReindexed()
   123  	}
   124  
   125  	start := time.Now()
   126  	g := errgroup.Group{}
   127  
   128  	var eventCount, resultCount, registerCount int
   129  	g.Go(func() error {
   130  		start := time.Now()
   131  
   132  		events := make([]flow.Event, 0)
   133  		results := make([]flow.LightTransactionResult, 0)
   134  		for _, chunk := range data.ChunkExecutionDatas {
   135  			events = append(events, chunk.Events...)
   136  			results = append(results, chunk.TransactionResults...)
   137  		}
   138  
   139  		batch := bstorage.NewBatch(c.batcher)
   140  
   141  		err := c.events.BatchStore(data.BlockID, []flow.EventsList{events}, batch)
   142  		if err != nil {
   143  			return fmt.Errorf("could not index events at height %d: %w", block.Height, err)
   144  		}
   145  
   146  		err = c.results.BatchStore(data.BlockID, results, batch)
   147  		if err != nil {
   148  			return fmt.Errorf("could not index transaction results at height %d: %w", block.Height, err)
   149  		}
   150  
   151  		batch.Flush()
   152  		if err != nil {
   153  			return fmt.Errorf("batch flush error: %w", err)
   154  		}
   155  
   156  		eventCount = len(events)
   157  		resultCount = len(results)
   158  
   159  		lg.Debug().
   160  			Int("event_count", eventCount).
   161  			Int("result_count", resultCount).
   162  			Dur("duration_ms", time.Since(start)).
   163  			Msg("indexed badger data")
   164  
   165  		return nil
   166  	})
   167  
   168  	g.Go(func() error {
   169  		start := time.Now()
   170  
   171  		// index all collections except the system chunk
   172  		// Note: the access ingestion engine also indexes collections, starting when the block is
   173  		// finalized. This process can fall behind due to the node being offline, resource issues
   174  		// or network congestion. This indexer ensures that collections are never farther behind
   175  		// than the latest indexed block. Calling the collection handler with a collection that
   176  		// has already been indexed is a noop.
   177  		indexedCount := 0
   178  		if len(data.ChunkExecutionDatas) > 0 {
   179  			for _, chunk := range data.ChunkExecutionDatas[0 : len(data.ChunkExecutionDatas)-1] {
   180  				err := HandleCollection(chunk.Collection, c.collections, c.transactions, c.log, c.collectionExecutedMetric)
   181  				if err != nil {
   182  					return fmt.Errorf("could not handle collection")
   183  				}
   184  				indexedCount++
   185  			}
   186  		}
   187  
   188  		lg.Debug().
   189  			Int("collection_count", indexedCount).
   190  			Dur("duration_ms", time.Since(start)).
   191  			Msg("indexed collections")
   192  
   193  		return nil
   194  	})
   195  
   196  	g.Go(func() error {
   197  		start := time.Now()
   198  
   199  		// we are iterating all the registers and overwrite any existing register at the same path
   200  		// this will make sure if we have multiple register changes only the last change will get persisted
   201  		// if block has two chucks:
   202  		// first chunk updates: { X: 1, Y: 2 }
   203  		// second chunk updates: { X: 2 }
   204  		// then we should persist only {X: 2: Y: 2}
   205  		payloads := make(map[ledger.Path]*ledger.Payload)
   206  		for _, chunk := range data.ChunkExecutionDatas {
   207  			update := chunk.TrieUpdate
   208  			if update != nil {
   209  				// this should never happen but we check anyway
   210  				if len(update.Paths) != len(update.Payloads) {
   211  					return fmt.Errorf("update paths length is %d and payloads length is %d and they don't match", len(update.Paths), len(update.Payloads))
   212  				}
   213  
   214  				for i, path := range update.Paths {
   215  					payloads[path] = update.Payloads[i]
   216  				}
   217  			}
   218  		}
   219  
   220  		err = c.indexRegisters(payloads, block.Height)
   221  		if err != nil {
   222  			return fmt.Errorf("could not index register payloads at height %d: %w", block.Height, err)
   223  		}
   224  
   225  		registerCount = len(payloads)
   226  
   227  		lg.Debug().
   228  			Int("register_count", registerCount).
   229  			Dur("duration_ms", time.Since(start)).
   230  			Msg("indexed registers")
   231  
   232  		return nil
   233  	})
   234  
   235  	err = g.Wait()
   236  	if err != nil {
   237  		return fmt.Errorf("failed to index block data at height %d: %w", block.Height, err)
   238  	}
   239  
   240  	c.metrics.BlockIndexed(block.Height, time.Since(start), eventCount, registerCount, resultCount)
   241  	lg.Debug().
   242  		Dur("duration_ms", time.Since(start)).
   243  		Msg("indexed block data")
   244  
   245  	return nil
   246  }
   247  
   248  func (c *IndexerCore) indexRegisters(registers map[ledger.Path]*ledger.Payload, height uint64) error {
   249  	regEntries := make(flow.RegisterEntries, 0, len(registers))
   250  
   251  	for _, payload := range registers {
   252  		k, err := payload.Key()
   253  		if err != nil {
   254  			return err
   255  		}
   256  
   257  		id, err := convert.LedgerKeyToRegisterID(k)
   258  		if err != nil {
   259  			return err
   260  		}
   261  
   262  		regEntries = append(regEntries, flow.RegisterEntry{
   263  			Key:   id,
   264  			Value: payload.Value(),
   265  		})
   266  	}
   267  
   268  	return c.registers.Store(regEntries, height)
   269  }
   270  
   271  // HandleCollection handles the response of the a collection request made earlier when a block was received.
   272  // No errors expected during normal operations.
   273  func HandleCollection(
   274  	collection *flow.Collection,
   275  	collections storage.Collections,
   276  	transactions storage.Transactions,
   277  	logger zerolog.Logger,
   278  	collectionExecutedMetric module.CollectionExecutedMetric,
   279  ) error {
   280  
   281  	light := collection.Light()
   282  
   283  	collectionExecutedMetric.CollectionFinalized(light)
   284  	collectionExecutedMetric.CollectionExecuted(light)
   285  
   286  	// FIX: we can't index guarantees here, as we might have more than one block
   287  	// with the same collection as long as it is not finalized
   288  
   289  	// store the light collection (collection minus the transaction body - those are stored separately)
   290  	// and add transaction ids as index
   291  	err := collections.StoreLightAndIndexByTransaction(&light)
   292  	if err != nil {
   293  		// ignore collection if already seen
   294  		if errors.Is(err, storage.ErrAlreadyExists) {
   295  			logger.Debug().
   296  				Hex("collection_id", logging.Entity(light)).
   297  				Msg("collection is already seen")
   298  			return nil
   299  		}
   300  		return err
   301  	}
   302  
   303  	// now store each of the transaction body
   304  	for _, tx := range collection.Transactions {
   305  		err := transactions.Store(tx)
   306  		if err != nil {
   307  			return fmt.Errorf("could not store transaction (%x): %w", tx.ID(), err)
   308  		}
   309  	}
   310  
   311  	return nil
   312  }