github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/protocol_kv_store.go (about) 1 package badger 2 3 import ( 4 "fmt" 5 6 "github.com/dgraph-io/badger/v2" 7 8 "github.com/onflow/flow-go/model/flow" 9 "github.com/onflow/flow-go/module" 10 "github.com/onflow/flow-go/module/metrics" 11 "github.com/onflow/flow-go/storage" 12 "github.com/onflow/flow-go/storage/badger/operation" 13 "github.com/onflow/flow-go/storage/badger/transaction" 14 ) 15 16 // DefaultProtocolKVStoreCacheSize is the default size for primary protocol KV store cache. 17 // KV store is rarely updated, so we will have a limited number of unique snapshots. 18 // Let's be generous and assume we have 10 different KV stores used at the same time. 19 var DefaultProtocolKVStoreCacheSize uint = 10 20 21 // DefaultProtocolKVStoreByBlockIDCacheSize is the default value for secondary index `byBlockIdCache`. 22 // We want to be able to cover a broad interval of views without cache misses, so we use a bigger value. 23 // Generally, many blocks will reference the same KV store snapshot. 24 var DefaultProtocolKVStoreByBlockIDCacheSize uint = 1000 25 26 // ProtocolKVStore implements persistent storage for storing KV store snapshots. 27 type ProtocolKVStore struct { 28 db *badger.DB 29 30 // cache holds versioned binary blobs representing snapshots of key-value stores. We use the kv-store's 31 // ID as key for retrieving the versioned binary snapshot of the kv-store. Consumers must know how to 32 // deal with the binary representation. `cache` only holds the distinct snapshots. On the happy path, 33 // we expect single-digit number of unique snapshots within an epoch. 34 cache *Cache[flow.Identifier, *flow.PSKeyValueStoreData] 35 36 // byBlockIdCache is essentially an in-memory map from `Block.ID()` -> `KeyValueStore.ID()`. The full 37 // kv-store snapshot can be retrieved from the `cache` above. 38 // `byBlockIdCache` will contain an entry for every block. We want to be able to cover a broad interval of views 39 // without cache misses, so a cache size of roughly 1000 entries is reasonable. 40 byBlockIdCache *Cache[flow.Identifier, flow.Identifier] 41 } 42 43 var _ storage.ProtocolKVStore = (*ProtocolKVStore)(nil) 44 45 // NewProtocolKVStore creates a ProtocolKVStore instance, which is a database holding KV store snapshots. 46 // It supports storing, caching and retrieving by ID or the additionally indexed block ID. 47 func NewProtocolKVStore(collector module.CacheMetrics, 48 db *badger.DB, 49 kvStoreCacheSize uint, 50 kvStoreByBlockIDCacheSize uint, 51 ) *ProtocolKVStore { 52 retrieveByStateID := func(stateID flow.Identifier) func(tx *badger.Txn) (*flow.PSKeyValueStoreData, error) { 53 return func(tx *badger.Txn) (*flow.PSKeyValueStoreData, error) { 54 var kvStore flow.PSKeyValueStoreData 55 err := operation.RetrieveProtocolKVStore(stateID, &kvStore)(tx) 56 if err != nil { 57 return nil, fmt.Errorf("could not get kv snapshot by id (%x): %w", stateID, err) 58 } 59 return &kvStore, nil 60 } 61 } 62 storeByStateID := func(stateID flow.Identifier, data *flow.PSKeyValueStoreData) func(*transaction.Tx) error { 63 return transaction.WithTx(operation.InsertProtocolKVStore(stateID, data)) 64 } 65 66 storeByBlockID := func(blockID flow.Identifier, stateID flow.Identifier) func(*transaction.Tx) error { 67 return func(tx *transaction.Tx) error { 68 err := transaction.WithTx(operation.IndexProtocolKVStore(blockID, stateID))(tx) 69 if err != nil { 70 return fmt.Errorf("could not index protocol state for block (%x): %w", blockID[:], err) 71 } 72 return nil 73 } 74 } 75 76 retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (flow.Identifier, error) { 77 return func(tx *badger.Txn) (flow.Identifier, error) { 78 var stateID flow.Identifier 79 err := operation.LookupProtocolKVStore(blockID, &stateID)(tx) 80 if err != nil { 81 return flow.ZeroID, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) 82 } 83 return stateID, nil 84 } 85 } 86 87 return &ProtocolKVStore{ 88 db: db, 89 cache: newCache[flow.Identifier, *flow.PSKeyValueStoreData](collector, metrics.ResourceProtocolKVStore, 90 withLimit[flow.Identifier, *flow.PSKeyValueStoreData](kvStoreCacheSize), 91 withStore(storeByStateID), 92 withRetrieve(retrieveByStateID)), 93 byBlockIdCache: newCache[flow.Identifier, flow.Identifier](collector, metrics.ResourceProtocolKVStoreByBlockID, 94 withLimit[flow.Identifier, flow.Identifier](kvStoreByBlockIDCacheSize), 95 withStore(storeByBlockID), 96 withRetrieve(retrieveByBlockID)), 97 } 98 } 99 100 // StoreTx returns an anonymous function (intended to be executed as part of a badger transaction), 101 // which persists the given KV-store snapshot as part of a DB tx. 102 // Expected errors of the returned anonymous function: 103 // - storage.ErrAlreadyExists if a KV-store snapshot with the given id is already stored. 104 func (s *ProtocolKVStore) StoreTx(stateID flow.Identifier, data *flow.PSKeyValueStoreData) func(*transaction.Tx) error { 105 return s.cache.PutTx(stateID, data) 106 } 107 108 // IndexTx returns an anonymous function intended to be executed as part of a database transaction. 109 // In a nutshell, we want to maintain a map from `blockID` to `stateID`, where `blockID` references the 110 // block that _proposes_ updated key-value store. 111 // Upon call, the anonymous function persists the specific map entry in the node's database. 112 // Protocol convention: 113 // - Consider block B, whose ingestion might potentially lead to an updated KV store. For example, 114 // the KV store changes if we seal some execution results emitting specific service events. 115 // - For the key `blockID`, we use the identity of block B which _proposes_ this updated KV store. 116 // - CAUTION: The updated state requires confirmation by a QC and will only become active at the child block, 117 // _after_ validating the QC. 118 // 119 // Expected errors during normal operations: 120 // - storage.ErrAlreadyExists if a KV store for the given blockID has already been indexed. 121 func (s *ProtocolKVStore) IndexTx(blockID flow.Identifier, stateID flow.Identifier) func(*transaction.Tx) error { 122 return s.byBlockIdCache.PutTx(blockID, stateID) 123 } 124 125 // ByID retrieves the KV store snapshot with the given ID. 126 // Expected errors during normal operations: 127 // - storage.ErrNotFound if no snapshot with the given Identifier is known. 128 func (s *ProtocolKVStore) ByID(id flow.Identifier) (*flow.PSKeyValueStoreData, error) { 129 tx := s.db.NewTransaction(false) 130 defer tx.Discard() 131 return s.cache.Get(id)(tx) 132 } 133 134 // ByBlockID retrieves the kv-store snapshot that the block with the given ID proposes. 135 // CAUTION: this store snapshot requires confirmation by a QC and will only become active at the child block, 136 // _after_ validating the QC. Protocol convention: 137 // - Consider block B, whose ingestion might potentially lead to an updated KV store state. 138 // For example, the state changes if we seal some execution results emitting specific service events. 139 // - For the key `blockID`, we use the identity of block B which _proposes_ this updated KV store. As value, 140 // the hash of the resulting state at the end of processing B is to be used. 141 // - CAUTION: The updated state requires confirmation by a QC and will only become active at the child block, 142 // _after_ validating the QC. 143 // 144 // Expected errors during normal operations: 145 // - storage.ErrNotFound if no snapshot has been indexed for the given block. 146 func (s *ProtocolKVStore) ByBlockID(blockID flow.Identifier) (*flow.PSKeyValueStoreData, error) { 147 tx := s.db.NewTransaction(false) 148 defer tx.Discard() 149 stateID, err := s.byBlockIdCache.Get(blockID)(tx) 150 if err != nil { 151 return nil, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) 152 } 153 return s.cache.Get(stateID)(tx) 154 }