github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/protocol_kv_store.go (about)

     1  package badger
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/dgraph-io/badger/v2"
     7  
     8  	"github.com/onflow/flow-go/model/flow"
     9  	"github.com/onflow/flow-go/module"
    10  	"github.com/onflow/flow-go/module/metrics"
    11  	"github.com/onflow/flow-go/storage"
    12  	"github.com/onflow/flow-go/storage/badger/operation"
    13  	"github.com/onflow/flow-go/storage/badger/transaction"
    14  )
    15  
    16  // DefaultProtocolKVStoreCacheSize is the default size for primary protocol KV store cache.
    17  // KV store is rarely updated, so we will have a limited number of unique snapshots.
    18  // Let's be generous and assume we have 10 different KV stores used at the same time.
    19  var DefaultProtocolKVStoreCacheSize uint = 10
    20  
    21  // DefaultProtocolKVStoreByBlockIDCacheSize is the default value for secondary index `byBlockIdCache`.
    22  // We want to be able to cover a broad interval of views without cache misses, so we use a bigger value.
    23  // Generally, many blocks will reference the same KV store snapshot.
    24  var DefaultProtocolKVStoreByBlockIDCacheSize uint = 1000
    25  
    26  // ProtocolKVStore implements persistent storage for storing KV store snapshots.
    27  type ProtocolKVStore struct {
    28  	db *badger.DB
    29  
    30  	// cache holds versioned binary blobs representing snapshots of key-value stores. We use the kv-store's
    31  	// ID as key for retrieving the versioned binary snapshot of the kv-store. Consumers must know how to
    32  	// deal with the binary representation. `cache` only holds the distinct snapshots. On the happy path,
    33  	// we expect single-digit number of unique snapshots within an epoch.
    34  	cache *Cache[flow.Identifier, *flow.PSKeyValueStoreData]
    35  
    36  	// byBlockIdCache is essentially an in-memory map from `Block.ID()` -> `KeyValueStore.ID()`. The full
    37  	// kv-store snapshot can be retrieved from the `cache` above.
    38  	// `byBlockIdCache` will contain an entry for every block. We want to be able to cover a broad interval of views
    39  	// without cache misses, so a cache size of roughly 1000 entries is reasonable.
    40  	byBlockIdCache *Cache[flow.Identifier, flow.Identifier]
    41  }
    42  
    43  var _ storage.ProtocolKVStore = (*ProtocolKVStore)(nil)
    44  
    45  // NewProtocolKVStore creates a ProtocolKVStore instance, which is a database holding KV store snapshots.
    46  // It supports storing, caching and retrieving by ID or the additionally indexed block ID.
    47  func NewProtocolKVStore(collector module.CacheMetrics,
    48  	db *badger.DB,
    49  	kvStoreCacheSize uint,
    50  	kvStoreByBlockIDCacheSize uint,
    51  ) *ProtocolKVStore {
    52  	retrieveByStateID := func(stateID flow.Identifier) func(tx *badger.Txn) (*flow.PSKeyValueStoreData, error) {
    53  		return func(tx *badger.Txn) (*flow.PSKeyValueStoreData, error) {
    54  			var kvStore flow.PSKeyValueStoreData
    55  			err := operation.RetrieveProtocolKVStore(stateID, &kvStore)(tx)
    56  			if err != nil {
    57  				return nil, fmt.Errorf("could not get kv snapshot by id (%x): %w", stateID, err)
    58  			}
    59  			return &kvStore, nil
    60  		}
    61  	}
    62  	storeByStateID := func(stateID flow.Identifier, data *flow.PSKeyValueStoreData) func(*transaction.Tx) error {
    63  		return transaction.WithTx(operation.InsertProtocolKVStore(stateID, data))
    64  	}
    65  
    66  	storeByBlockID := func(blockID flow.Identifier, stateID flow.Identifier) func(*transaction.Tx) error {
    67  		return func(tx *transaction.Tx) error {
    68  			err := transaction.WithTx(operation.IndexProtocolKVStore(blockID, stateID))(tx)
    69  			if err != nil {
    70  				return fmt.Errorf("could not index protocol state for block (%x): %w", blockID[:], err)
    71  			}
    72  			return nil
    73  		}
    74  	}
    75  
    76  	retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (flow.Identifier, error) {
    77  		return func(tx *badger.Txn) (flow.Identifier, error) {
    78  			var stateID flow.Identifier
    79  			err := operation.LookupProtocolKVStore(blockID, &stateID)(tx)
    80  			if err != nil {
    81  				return flow.ZeroID, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err)
    82  			}
    83  			return stateID, nil
    84  		}
    85  	}
    86  
    87  	return &ProtocolKVStore{
    88  		db: db,
    89  		cache: newCache[flow.Identifier, *flow.PSKeyValueStoreData](collector, metrics.ResourceProtocolKVStore,
    90  			withLimit[flow.Identifier, *flow.PSKeyValueStoreData](kvStoreCacheSize),
    91  			withStore(storeByStateID),
    92  			withRetrieve(retrieveByStateID)),
    93  		byBlockIdCache: newCache[flow.Identifier, flow.Identifier](collector, metrics.ResourceProtocolKVStoreByBlockID,
    94  			withLimit[flow.Identifier, flow.Identifier](kvStoreByBlockIDCacheSize),
    95  			withStore(storeByBlockID),
    96  			withRetrieve(retrieveByBlockID)),
    97  	}
    98  }
    99  
   100  // StoreTx returns an anonymous function (intended to be executed as part of a badger transaction),
   101  // which persists the given KV-store snapshot as part of a DB tx.
   102  // Expected errors of the returned anonymous function:
   103  //   - storage.ErrAlreadyExists if a KV-store snapshot with the given id is already stored.
   104  func (s *ProtocolKVStore) StoreTx(stateID flow.Identifier, data *flow.PSKeyValueStoreData) func(*transaction.Tx) error {
   105  	return s.cache.PutTx(stateID, data)
   106  }
   107  
   108  // IndexTx returns an anonymous function intended to be executed as part of a database transaction.
   109  // In a nutshell, we want to maintain a map from `blockID` to `stateID`, where `blockID` references the
   110  // block that _proposes_ updated key-value store.
   111  // Upon call, the anonymous function persists the specific map entry in the node's database.
   112  // Protocol convention:
   113  //   - Consider block B, whose ingestion might potentially lead to an updated KV store. For example,
   114  //     the KV store changes if we seal some execution results emitting specific service events.
   115  //   - For the key `blockID`, we use the identity of block B which _proposes_ this updated KV store.
   116  //   - CAUTION: The updated state requires confirmation by a QC and will only become active at the child block,
   117  //     _after_ validating the QC.
   118  //
   119  // Expected errors during normal operations:
   120  //   - storage.ErrAlreadyExists if a KV store for the given blockID has already been indexed.
   121  func (s *ProtocolKVStore) IndexTx(blockID flow.Identifier, stateID flow.Identifier) func(*transaction.Tx) error {
   122  	return s.byBlockIdCache.PutTx(blockID, stateID)
   123  }
   124  
   125  // ByID retrieves the KV store snapshot with the given ID.
   126  // Expected errors during normal operations:
   127  //   - storage.ErrNotFound if no snapshot with the given Identifier is known.
   128  func (s *ProtocolKVStore) ByID(id flow.Identifier) (*flow.PSKeyValueStoreData, error) {
   129  	tx := s.db.NewTransaction(false)
   130  	defer tx.Discard()
   131  	return s.cache.Get(id)(tx)
   132  }
   133  
   134  // ByBlockID retrieves the kv-store snapshot that the block with the given ID proposes.
   135  // CAUTION: this store snapshot requires confirmation by a QC and will only become active at the child block,
   136  // _after_ validating the QC. Protocol convention:
   137  //   - Consider block B, whose ingestion might potentially lead to an updated KV store state.
   138  //     For example, the state changes if we seal some execution results emitting specific service events.
   139  //   - For the key `blockID`, we use the identity of block B which _proposes_ this updated KV store. As value,
   140  //     the hash of the resulting state at the end of processing B is to be used.
   141  //   - CAUTION: The updated state requires confirmation by a QC and will only become active at the child block,
   142  //     _after_ validating the QC.
   143  //
   144  // Expected errors during normal operations:
   145  //   - storage.ErrNotFound if no snapshot has been indexed for the given block.
   146  func (s *ProtocolKVStore) ByBlockID(blockID flow.Identifier) (*flow.PSKeyValueStoreData, error) {
   147  	tx := s.db.NewTransaction(false)
   148  	defer tx.Discard()
   149  	stateID, err := s.byBlockIdCache.Get(blockID)(tx)
   150  	if err != nil {
   151  		return nil, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err)
   152  	}
   153  	return s.cache.Get(stateID)(tx)
   154  }