github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/protocol_state.go (about)

     1  package badger
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/dgraph-io/badger/v2"
     7  
     8  	"github.com/onflow/flow-go/model/flow"
     9  	"github.com/onflow/flow-go/module"
    10  	"github.com/onflow/flow-go/module/irrecoverable"
    11  	"github.com/onflow/flow-go/module/metrics"
    12  	"github.com/onflow/flow-go/storage"
    13  	"github.com/onflow/flow-go/storage/badger/operation"
    14  	"github.com/onflow/flow-go/storage/badger/transaction"
    15  )
    16  
    17  // DefaultProtocolStateCacheSize is the default size for primary protocol state cache.
    18  // Minimally, we have 3 entries per epoch (one on epoch Switchover, one on receiving the Epoch Setup and one when seeing the Epoch Commit event).
    19  // Lets be generous and assume we have 20 different Protocol States per epoch.
    20  var DefaultProtocolStateCacheSize uint = 20
    21  
    22  // DefaultProtocolStateByBlockIDCacheSize is the default value for secondary byBlockIdCache.
    23  // We want to be able to cover a broad interval of views without cache misses, so we use a bigger value.
    24  var DefaultProtocolStateByBlockIDCacheSize uint = 1000
    25  
    26  // ProtocolState implements persistent storage for storing Protocol States.
    27  // Protocol state uses an embedded cache without storing capabilities(store happens on first retrieval) to avoid unnecessary
    28  // operations and to speed up access to frequently used Protocol State.
    29  type ProtocolState struct {
    30  	db *badger.DB
    31  
    32  	// cache is essentially an in-memory map from `ProtocolStateEntry.ID()` -> `RichProtocolStateEntry`
    33  	// We do _not_ populate this cache which holds the RichProtocolStateEntrys on store. This is because
    34  	//   (i) we don't have the RichProtocolStateEntry on store readily available and
    35  	//  (ii) new RichProtocolStateEntry are really rare throughout an epoch, so the total cost of populating
    36  	//       the cache becomes negligible over several views.
    37  	// In the future, we might want to populate the cache on store, if we want to maintain frequently-changing
    38  	// information in the protocol state, like the latest sealed block. This should be a smaller amount of work,
    39  	// because the `ProtocolStateEntry` is generated by `StateMutator.Build()`. The `StateMutator` should already
    40  	// have the needed Epoch Setup and Commit events, since it starts with a RichProtocolStateEntry for the parent
    41  	// state and consumes Epoch Setup and Epoch Commit events. Though, we leave this optimization for later.
    42  	//
    43  	// `cache` only holds the distinct Protocol States. On the happy path, we expect something like 3 entries per epoch.
    44  	// On the optimal happy path we have 3 entries per epoch: one entry on epoch Switchover, one on receiving the Epoch Setup
    45  	// and one when seeing the Epoch Commit event. Let's be generous and assume we have 20 different Protocol States per epoch.
    46  	// Beyond that, we are certainly leaving the domain of normal operations that we optimize for. Therefore, a cache size of
    47  	// roughly 100 is a reasonable balance between performance and memory consumption.
    48  	cache *Cache[flow.Identifier, *flow.RichProtocolStateEntry]
    49  
    50  	// byBlockIdCache is essentially an in-memory map from `Block.ID()` -> `ProtocolStateEntry.ID()`. The full
    51  	// Protocol state can be retrieved from the `cache` above.
    52  	// We populate the `byBlockIdCache` on store, because a new entry is added for every block and we probably also
    53  	// query the Protocol state for every block. So argument (ii) from above does not apply here. Furthermore,
    54  	// argument (i) from above also does not apply, because we already have the Protocol State's ID on store,
    55  	// so populating the cache is easy.
    56  	//
    57  	// `byBlockIdCache` will contain an entry for every block. We want to be able to cover a broad interval of views
    58  	// without cache misses, so a cache size of roughly 1000 entries is reasonable.
    59  	byBlockIdCache *Cache[flow.Identifier, flow.Identifier]
    60  }
    61  
    62  var _ storage.ProtocolState = (*ProtocolState)(nil)
    63  
    64  // NewProtocolState creates a ProtocolState instance, which is a database of Protocol State.
    65  // It supports storing, caching and retrieving by ID or the additionally indexed block ID.
    66  func NewProtocolState(collector module.CacheMetrics,
    67  	epochSetups storage.EpochSetups,
    68  	epochCommits storage.EpochCommits,
    69  	db *badger.DB,
    70  	stateCacheSize uint,
    71  	stateByBlockIDCacheSize uint,
    72  ) *ProtocolState {
    73  	retrieveByProtocolStateID := func(protocolStateID flow.Identifier) func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) {
    74  		var protocolStateEntry flow.ProtocolStateEntry
    75  		return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) {
    76  			err := operation.RetrieveProtocolState(protocolStateID, &protocolStateEntry)(tx)
    77  			if err != nil {
    78  				return nil, err
    79  			}
    80  			result, err := newRichProtocolStateEntry(&protocolStateEntry, epochSetups, epochCommits)
    81  			if err != nil {
    82  				return nil, fmt.Errorf("could not create rich protocol state entry: %w", err)
    83  			}
    84  			return result, nil
    85  		}
    86  	}
    87  
    88  	storeByBlockID := func(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error {
    89  		return func(tx *transaction.Tx) error {
    90  			err := transaction.WithTx(operation.IndexProtocolState(blockID, protocolStateID))(tx)
    91  			if err != nil {
    92  				return fmt.Errorf("could not index protocol state for block (%x): %w", blockID[:], err)
    93  			}
    94  			return nil
    95  		}
    96  	}
    97  
    98  	retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (flow.Identifier, error) {
    99  		return func(tx *badger.Txn) (flow.Identifier, error) {
   100  			var protocolStateID flow.Identifier
   101  			err := operation.LookupProtocolState(blockID, &protocolStateID)(tx)
   102  			if err != nil {
   103  				return flow.ZeroID, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err)
   104  			}
   105  			return protocolStateID, nil
   106  		}
   107  	}
   108  
   109  	return &ProtocolState{
   110  		db: db,
   111  		cache: newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolState,
   112  			withLimit[flow.Identifier, *flow.RichProtocolStateEntry](stateCacheSize),
   113  			withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]),
   114  			withRetrieve(retrieveByProtocolStateID)),
   115  		byBlockIdCache: newCache[flow.Identifier, flow.Identifier](collector, metrics.ResourceProtocolStateByBlockID,
   116  			withLimit[flow.Identifier, flow.Identifier](stateByBlockIDCacheSize),
   117  			withStore(storeByBlockID),
   118  			withRetrieve(retrieveByBlockID)),
   119  	}
   120  }
   121  
   122  // StoreTx returns an anonymous function (intended to be executed as part of a badger transaction),
   123  // which persists the given protocol state as part of a DB tx. Per convention, the identities in
   124  // the Protocol State must be in canonical order for the current and next epoch (if present),
   125  // otherwise an exception is returned.
   126  // Expected errors of the returned anonymous function:
   127  //   - storage.ErrAlreadyExists if a Protocol State with the given id is already stored
   128  func (s *ProtocolState) StoreTx(protocolStateID flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error {
   129  	// front-load sanity checks:
   130  	if !protocolState.CurrentEpoch.ActiveIdentities.Sorted(flow.IdentifierCanonical) {
   131  		return transaction.Fail(fmt.Errorf("sanity check failed: identities are not sorted"))
   132  	}
   133  	if protocolState.NextEpoch != nil && !protocolState.NextEpoch.ActiveIdentities.Sorted(flow.IdentifierCanonical) {
   134  		return transaction.Fail(fmt.Errorf("sanity check failed: next epoch identities are not sorted"))
   135  	}
   136  
   137  	// happy path: return anonymous function, whose future execution (as part of a transaction) will store the protocolState
   138  	return transaction.WithTx(operation.InsertProtocolState(protocolStateID, protocolState))
   139  }
   140  
   141  // Index returns an anonymous function that is intended to be executed as part of a database transaction.
   142  // In a nutshell, we want to maintain a map from `blockID` to `protocolStateID`, where `blockID` references the
   143  // block that _proposes_ the Protocol State.
   144  // Upon call, the anonymous function persists the specific map entry in the node's database.
   145  // Protocol convention:
   146  //   - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example,
   147  //     the protocol state changes if we seal some execution results emitting service events.
   148  //   - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value,
   149  //     the hash of the resulting protocol state at the end of processing B is to be used.
   150  //   - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block,
   151  //     _after_ validating the QC.
   152  //
   153  // Expected errors during normal operations:
   154  //   - storage.ErrAlreadyExists if a Protocol State for the given blockID has already been indexed
   155  func (s *ProtocolState) Index(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error {
   156  	return s.byBlockIdCache.PutTx(blockID, protocolStateID)
   157  }
   158  
   159  // ByID returns the protocol state by its ID.
   160  // Expected errors during normal operations:
   161  //   - storage.ErrNotFound if no protocol state with the given Identifier is known.
   162  func (s *ProtocolState) ByID(protocolStateID flow.Identifier) (*flow.RichProtocolStateEntry, error) {
   163  	tx := s.db.NewTransaction(false)
   164  	defer tx.Discard()
   165  	return s.cache.Get(protocolStateID)(tx)
   166  }
   167  
   168  // ByBlockID retrieves the Protocol State that the block with the given ID proposes.
   169  // CAUTION: this protocol state requires confirmation by a QC and will only become active at the child block,
   170  // _after_ validating the QC. Protocol convention:
   171  //   - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example,
   172  //     the protocol state changes if we seal some execution results emitting service events.
   173  //   - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value,
   174  //     the hash of the resulting protocol state at the end of processing B is to be used.
   175  //   - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block,
   176  //     _after_ validating the QC.
   177  //
   178  // Expected errors during normal operations:
   179  //   - storage.ErrNotFound if no protocol state has been indexed for the given block.
   180  func (s *ProtocolState) ByBlockID(blockID flow.Identifier) (*flow.RichProtocolStateEntry, error) {
   181  	tx := s.db.NewTransaction(false)
   182  	defer tx.Discard()
   183  	protocolStateID, err := s.byBlockIdCache.Get(blockID)(tx)
   184  	if err != nil {
   185  		return nil, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err)
   186  	}
   187  	return s.cache.Get(protocolStateID)(tx)
   188  }
   189  
   190  // newRichProtocolStateEntry constructs a rich protocol state entry from a protocol state entry.
   191  // It queries and fills in epoch setups and commits for previous and current epochs and possibly next epoch.
   192  // No errors are expected during normal operation.
   193  func newRichProtocolStateEntry(
   194  	protocolState *flow.ProtocolStateEntry,
   195  	setups storage.EpochSetups,
   196  	commits storage.EpochCommits,
   197  ) (*flow.RichProtocolStateEntry, error) {
   198  	var (
   199  		previousEpochSetup  *flow.EpochSetup
   200  		previousEpochCommit *flow.EpochCommit
   201  		nextEpochSetup      *flow.EpochSetup
   202  		nextEpochCommit     *flow.EpochCommit
   203  		err                 error
   204  	)
   205  	// query and fill in epoch setups and commits for previous and current epochs
   206  	if protocolState.PreviousEpoch != nil {
   207  		previousEpochSetup, err = setups.ByID(protocolState.PreviousEpoch.SetupID)
   208  		if err != nil {
   209  			return nil, fmt.Errorf("could not retrieve previous epoch setup: %w", err)
   210  		}
   211  		previousEpochCommit, err = commits.ByID(protocolState.PreviousEpoch.CommitID)
   212  		if err != nil {
   213  			return nil, fmt.Errorf("could not retrieve previous epoch commit: %w", err)
   214  		}
   215  	}
   216  
   217  	currentEpochSetup, err := setups.ByID(protocolState.CurrentEpoch.SetupID)
   218  	if err != nil {
   219  		return nil, fmt.Errorf("could not retrieve current epoch setup: %w", err)
   220  	}
   221  	currentEpochCommit, err := commits.ByID(protocolState.CurrentEpoch.CommitID)
   222  	if err != nil {
   223  		return nil, fmt.Errorf("could not retrieve current epoch commit: %w", err)
   224  	}
   225  
   226  	// if next epoch has been set up, fill in data for it as well
   227  	nextEpoch := protocolState.NextEpoch
   228  	if nextEpoch != nil {
   229  		nextEpochSetup, err = setups.ByID(nextEpoch.SetupID)
   230  		if err != nil {
   231  			return nil, fmt.Errorf("could not retrieve next epoch's setup event: %w", err)
   232  		}
   233  		if nextEpoch.CommitID != flow.ZeroID {
   234  			nextEpochCommit, err = commits.ByID(nextEpoch.CommitID)
   235  			if err != nil {
   236  				return nil, fmt.Errorf("could not retrieve next epoch's commit event: %w", err)
   237  			}
   238  		}
   239  	}
   240  
   241  	result, err := flow.NewRichProtocolStateEntry(
   242  		protocolState,
   243  		previousEpochSetup,
   244  		previousEpochCommit,
   245  		currentEpochSetup,
   246  		currentEpochCommit,
   247  		nextEpochSetup,
   248  		nextEpochCommit,
   249  	)
   250  	if err != nil {
   251  		// observing an error here would be an indication of severe data corruption or bug in our code since
   252  		// all data should be available and correctly structured at this point.
   253  		return nil, irrecoverable.NewExceptionf("critical failure while instantiating RichProtocolStateEntry: %w", err)
   254  	}
   255  	return result, nil
   256  }