github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/state_synchronization/indexer/indexer_core.go (about)

     1  package indexer
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  	"golang.org/x/sync/errgroup"
    10  
    11  	"github.com/onflow/flow-go/fvm/storage/derived"
    12  	"github.com/onflow/flow-go/ledger"
    13  	"github.com/onflow/flow-go/ledger/common/convert"
    14  	"github.com/onflow/flow-go/model/flow"
    15  	"github.com/onflow/flow-go/module"
    16  	"github.com/onflow/flow-go/module/executiondatasync/execution_data"
    17  	"github.com/onflow/flow-go/storage"
    18  	bstorage "github.com/onflow/flow-go/storage/badger"
    19  	"github.com/onflow/flow-go/utils/logging"
    20  )
    21  
    22  // IndexerCore indexes the execution state.
    23  type IndexerCore struct {
    24  	log     zerolog.Logger
    25  	metrics module.ExecutionStateIndexerMetrics
    26  
    27  	registers    storage.RegisterIndex
    28  	headers      storage.Headers
    29  	events       storage.Events
    30  	collections  storage.Collections
    31  	transactions storage.Transactions
    32  	results      storage.LightTransactionResults
    33  	batcher      bstorage.BatchBuilder
    34  
    35  	collectionExecutedMetric module.CollectionExecutedMetric
    36  
    37  	derivedChainData *derived.DerivedChainData
    38  	serviceAddress   flow.Address
    39  }
    40  
    41  // New execution state indexer used to ingest block execution data and index it by height.
    42  // The passed RegisterIndex storage must be populated to include the first and last height otherwise the indexer
    43  // won't be initialized to ensure we have bootstrapped the storage first.
    44  func New(
    45  	log zerolog.Logger,
    46  	metrics module.ExecutionStateIndexerMetrics,
    47  	batcher bstorage.BatchBuilder,
    48  	registers storage.RegisterIndex,
    49  	headers storage.Headers,
    50  	events storage.Events,
    51  	collections storage.Collections,
    52  	transactions storage.Transactions,
    53  	results storage.LightTransactionResults,
    54  	chain flow.Chain,
    55  	derivedChainData *derived.DerivedChainData,
    56  	collectionExecutedMetric module.CollectionExecutedMetric,
    57  ) (*IndexerCore, error) {
    58  	log = log.With().Str("component", "execution_indexer").Logger()
    59  	metrics.InitializeLatestHeight(registers.LatestHeight())
    60  
    61  	log.Info().
    62  		Uint64("first_height", registers.FirstHeight()).
    63  		Uint64("latest_height", registers.LatestHeight()).
    64  		Msg("indexer initialized")
    65  
    66  	return &IndexerCore{
    67  		log:              log,
    68  		metrics:          metrics,
    69  		batcher:          batcher,
    70  		registers:        registers,
    71  		headers:          headers,
    72  		collections:      collections,
    73  		transactions:     transactions,
    74  		events:           events,
    75  		results:          results,
    76  		serviceAddress:   chain.ServiceAddress(),
    77  		derivedChainData: derivedChainData,
    78  
    79  		collectionExecutedMetric: collectionExecutedMetric,
    80  	}, nil
    81  }
    82  
    83  // RegisterValue retrieves register values by the register IDs at the provided block height.
    84  // Even if the register wasn't indexed at the provided height, returns the highest height the register was indexed at.
    85  // If a register is not found it will return a nil value and not an error.
    86  // Expected errors:
    87  // - storage.ErrHeightNotIndexed if the given height was not indexed yet or lower than the first indexed height.
    88  func (c *IndexerCore) RegisterValue(ID flow.RegisterID, height uint64) (flow.RegisterValue, error) {
    89  	value, err := c.registers.Get(ID, height)
    90  	if err != nil {
    91  		// only return an error if the error doesn't match the not found error, since we have
    92  		// to gracefully handle not found values and instead assign nil, that is because the script executor
    93  		// expects that behaviour
    94  		if errors.Is(err, storage.ErrNotFound) {
    95  			return nil, nil
    96  		}
    97  
    98  		return nil, err
    99  	}
   100  
   101  	return value, nil
   102  }
   103  
   104  // IndexBlockData indexes all execution block data by height.
   105  // This method shouldn't be used concurrently.
   106  // Expected errors:
   107  // - storage.ErrNotFound if the block for execution data was not found
   108  func (c *IndexerCore) IndexBlockData(data *execution_data.BlockExecutionDataEntity) error {
   109  	header, err := c.headers.ByBlockID(data.BlockID)
   110  	if err != nil {
   111  		return fmt.Errorf("could not get the block by ID %s: %w", data.BlockID, err)
   112  	}
   113  
   114  	lg := c.log.With().
   115  		Hex("block_id", logging.ID(data.BlockID)).
   116  		Uint64("height", header.Height).
   117  		Logger()
   118  
   119  	lg.Debug().Msgf("indexing new block")
   120  
   121  	// the height we are indexing must be exactly one bigger or same as the latest height indexed from the storage
   122  	latest := c.registers.LatestHeight()
   123  	if header.Height != latest+1 && header.Height != latest {
   124  		return fmt.Errorf("must index block data with the next height %d, but got %d", latest+1, header.Height)
   125  	}
   126  
   127  	// allow rerunning the indexer for same height since we are fetching height from register storage, but there are other storages
   128  	// for indexing resources which might fail to update the values, so this enables rerunning and reindexing those resources
   129  	if header.Height == latest {
   130  		lg.Warn().Msg("reindexing block data")
   131  		c.metrics.BlockReindexed()
   132  	}
   133  
   134  	start := time.Now()
   135  	g := errgroup.Group{}
   136  
   137  	var eventCount, resultCount, registerCount int
   138  	g.Go(func() error {
   139  		start := time.Now()
   140  
   141  		events := make([]flow.Event, 0)
   142  		results := make([]flow.LightTransactionResult, 0)
   143  		for _, chunk := range data.ChunkExecutionDatas {
   144  			events = append(events, chunk.Events...)
   145  			results = append(results, chunk.TransactionResults...)
   146  		}
   147  
   148  		batch := bstorage.NewBatch(c.batcher)
   149  
   150  		err := c.events.BatchStore(data.BlockID, []flow.EventsList{events}, batch)
   151  		if err != nil {
   152  			return fmt.Errorf("could not index events at height %d: %w", header.Height, err)
   153  		}
   154  
   155  		err = c.results.BatchStore(data.BlockID, results, batch)
   156  		if err != nil {
   157  			return fmt.Errorf("could not index transaction results at height %d: %w", header.Height, err)
   158  		}
   159  
   160  		batch.Flush()
   161  		if err != nil {
   162  			return fmt.Errorf("batch flush error: %w", err)
   163  		}
   164  
   165  		eventCount = len(events)
   166  		resultCount = len(results)
   167  
   168  		lg.Debug().
   169  			Int("event_count", eventCount).
   170  			Int("result_count", resultCount).
   171  			Dur("duration_ms", time.Since(start)).
   172  			Msg("indexed badger data")
   173  
   174  		return nil
   175  	})
   176  
   177  	g.Go(func() error {
   178  		start := time.Now()
   179  
   180  		// index all collections except the system chunk
   181  		// Note: the access ingestion engine also indexes collections, starting when the block is
   182  		// finalized. This process can fall behind due to the node being offline, resource issues
   183  		// or network congestion. This indexer ensures that collections are never farther behind
   184  		// than the latest indexed block. Calling the collection handler with a collection that
   185  		// has already been indexed is a noop.
   186  		indexedCount := 0
   187  		if len(data.ChunkExecutionDatas) > 0 {
   188  			for _, chunk := range data.ChunkExecutionDatas[0 : len(data.ChunkExecutionDatas)-1] {
   189  				err := HandleCollection(chunk.Collection, c.collections, c.transactions, c.log, c.collectionExecutedMetric)
   190  				if err != nil {
   191  					return fmt.Errorf("could not handle collection")
   192  				}
   193  				indexedCount++
   194  			}
   195  		}
   196  
   197  		lg.Debug().
   198  			Int("collection_count", indexedCount).
   199  			Dur("duration_ms", time.Since(start)).
   200  			Msg("indexed collections")
   201  
   202  		return nil
   203  	})
   204  
   205  	g.Go(func() error {
   206  		start := time.Now()
   207  
   208  		// we are iterating all the registers and overwrite any existing register at the same path
   209  		// this will make sure if we have multiple register changes only the last change will get persisted
   210  		// if block has two chucks:
   211  		// first chunk updates: { X: 1, Y: 2 }
   212  		// second chunk updates: { X: 2 }
   213  		// then we should persist only {X: 2: Y: 2}
   214  		payloads := make(map[ledger.Path]*ledger.Payload)
   215  		events := make([]flow.Event, 0)
   216  		collections := make([]*flow.Collection, 0)
   217  		for _, chunk := range data.ChunkExecutionDatas {
   218  			events = append(events, chunk.Events...)
   219  			collections = append(collections, chunk.Collection)
   220  			update := chunk.TrieUpdate
   221  			if update != nil {
   222  				// this should never happen but we check anyway
   223  				if len(update.Paths) != len(update.Payloads) {
   224  					return fmt.Errorf("update paths length is %d and payloads length is %d and they don't match", len(update.Paths), len(update.Payloads))
   225  				}
   226  
   227  				for i, path := range update.Paths {
   228  					payloads[path] = update.Payloads[i]
   229  				}
   230  			}
   231  		}
   232  
   233  		err = c.indexRegisters(payloads, header.Height)
   234  		if err != nil {
   235  			return fmt.Errorf("could not index register payloads at height %d: %w", header.Height, err)
   236  		}
   237  
   238  		err = c.updateProgramCache(header, events, collections)
   239  		if err != nil {
   240  			return fmt.Errorf("could not update program cache at height %d: %w", header.Height, err)
   241  		}
   242  
   243  		registerCount = len(payloads)
   244  
   245  		lg.Debug().
   246  			Int("register_count", registerCount).
   247  			Dur("duration_ms", time.Since(start)).
   248  			Msg("indexed registers")
   249  
   250  		return nil
   251  	})
   252  
   253  	err = g.Wait()
   254  	if err != nil {
   255  		return fmt.Errorf("failed to index block data at height %d: %w", header.Height, err)
   256  	}
   257  
   258  	c.metrics.BlockIndexed(header.Height, time.Since(start), eventCount, registerCount, resultCount)
   259  	lg.Debug().
   260  		Dur("duration_ms", time.Since(start)).
   261  		Msg("indexed block data")
   262  
   263  	return nil
   264  }
   265  
   266  func (c *IndexerCore) updateProgramCache(header *flow.Header, events []flow.Event, collections []*flow.Collection) error {
   267  	if c.derivedChainData == nil {
   268  		return nil
   269  	}
   270  
   271  	derivedBlockData := c.derivedChainData.GetOrCreateDerivedBlockData(
   272  		header.ID(),
   273  		header.ParentID,
   274  	)
   275  
   276  	// get a list of all contracts that were updated in this block
   277  	updatedContracts, err := findContractUpdates(events)
   278  	if err != nil {
   279  		return fmt.Errorf("could not find contract updates for block %d: %w", header.Height, err)
   280  	}
   281  
   282  	// invalidate cache entries for all modified programs
   283  	tx, err := derivedBlockData.NewDerivedTransactionData(0, 0)
   284  	if err != nil {
   285  		return fmt.Errorf("could not create derived transaction data for block %d: %w", header.Height, err)
   286  	}
   287  
   288  	tx.AddInvalidator(&accessInvalidator{
   289  		programs: &programInvalidator{
   290  			invalidated: updatedContracts,
   291  		},
   292  		meterParamOverrides: &meterParamOverridesInvalidator{
   293  			invalidateAll: hasAuthorizedTransaction(collections, c.serviceAddress),
   294  		},
   295  	})
   296  
   297  	err = tx.Commit()
   298  	if err != nil {
   299  		return fmt.Errorf("could not commit derived transaction data for block %d: %w", header.Height, err)
   300  	}
   301  
   302  	return nil
   303  }
   304  
   305  func (c *IndexerCore) indexRegisters(registers map[ledger.Path]*ledger.Payload, height uint64) error {
   306  	regEntries := make(flow.RegisterEntries, 0, len(registers))
   307  
   308  	for _, payload := range registers {
   309  		k, err := payload.Key()
   310  		if err != nil {
   311  			return err
   312  		}
   313  
   314  		id, err := convert.LedgerKeyToRegisterID(k)
   315  		if err != nil {
   316  			return err
   317  		}
   318  
   319  		regEntries = append(regEntries, flow.RegisterEntry{
   320  			Key:   id,
   321  			Value: payload.Value(),
   322  		})
   323  	}
   324  
   325  	return c.registers.Store(regEntries, height)
   326  }
   327  
   328  // HandleCollection handles the response of the a collection request made earlier when a block was received.
   329  // No errors expected during normal operations.
   330  func HandleCollection(
   331  	collection *flow.Collection,
   332  	collections storage.Collections,
   333  	transactions storage.Transactions,
   334  	logger zerolog.Logger,
   335  	collectionExecutedMetric module.CollectionExecutedMetric,
   336  ) error {
   337  
   338  	light := collection.Light()
   339  
   340  	collectionExecutedMetric.CollectionFinalized(light)
   341  	collectionExecutedMetric.CollectionExecuted(light)
   342  
   343  	// FIX: we can't index guarantees here, as we might have more than one block
   344  	// with the same collection as long as it is not finalized
   345  
   346  	// store the light collection (collection minus the transaction body - those are stored separately)
   347  	// and add transaction ids as index
   348  	err := collections.StoreLightAndIndexByTransaction(&light)
   349  	if err != nil {
   350  		// ignore collection if already seen
   351  		if errors.Is(err, storage.ErrAlreadyExists) {
   352  			logger.Debug().
   353  				Hex("collection_id", logging.Entity(light)).
   354  				Msg("collection is already seen")
   355  			return nil
   356  		}
   357  		return err
   358  	}
   359  
   360  	// now store each of the transaction body
   361  	for _, tx := range collection.Transactions {
   362  		err := transactions.Store(tx)
   363  		if err != nil {
   364  			return fmt.Errorf("could not store transaction (%x): %w", tx.ID(), err)
   365  		}
   366  	}
   367  
   368  	return nil
   369  }