github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/protocol_state.go (about) 1 package badger 2 3 import ( 4 "fmt" 5 6 "github.com/dgraph-io/badger/v2" 7 8 "github.com/onflow/flow-go/model/flow" 9 "github.com/onflow/flow-go/module" 10 "github.com/onflow/flow-go/module/irrecoverable" 11 "github.com/onflow/flow-go/module/metrics" 12 "github.com/onflow/flow-go/storage" 13 "github.com/onflow/flow-go/storage/badger/operation" 14 "github.com/onflow/flow-go/storage/badger/transaction" 15 ) 16 17 // DefaultProtocolStateCacheSize is the default size for primary protocol state cache. 18 // Minimally, we have 3 entries per epoch (one on epoch Switchover, one on receiving the Epoch Setup and one when seeing the Epoch Commit event). 19 // Lets be generous and assume we have 20 different Protocol States per epoch. 20 var DefaultProtocolStateCacheSize uint = 20 21 22 // DefaultProtocolStateByBlockIDCacheSize is the default value for secondary byBlockIdCache. 23 // We want to be able to cover a broad interval of views without cache misses, so we use a bigger value. 24 var DefaultProtocolStateByBlockIDCacheSize uint = 1000 25 26 // ProtocolState implements persistent storage for storing Protocol States. 27 // Protocol state uses an embedded cache without storing capabilities(store happens on first retrieval) to avoid unnecessary 28 // operations and to speed up access to frequently used Protocol State. 29 type ProtocolState struct { 30 db *badger.DB 31 32 // cache is essentially an in-memory map from `ProtocolStateEntry.ID()` -> `RichProtocolStateEntry` 33 // We do _not_ populate this cache which holds the RichProtocolStateEntrys on store. This is because 34 // (i) we don't have the RichProtocolStateEntry on store readily available and 35 // (ii) new RichProtocolStateEntry are really rare throughout an epoch, so the total cost of populating 36 // the cache becomes negligible over several views. 37 // In the future, we might want to populate the cache on store, if we want to maintain frequently-changing 38 // information in the protocol state, like the latest sealed block. This should be a smaller amount of work, 39 // because the `ProtocolStateEntry` is generated by `StateMutator.Build()`. The `StateMutator` should already 40 // have the needed Epoch Setup and Commit events, since it starts with a RichProtocolStateEntry for the parent 41 // state and consumes Epoch Setup and Epoch Commit events. Though, we leave this optimization for later. 42 // 43 // `cache` only holds the distinct Protocol States. On the happy path, we expect something like 3 entries per epoch. 44 // On the optimal happy path we have 3 entries per epoch: one entry on epoch Switchover, one on receiving the Epoch Setup 45 // and one when seeing the Epoch Commit event. Let's be generous and assume we have 20 different Protocol States per epoch. 46 // Beyond that, we are certainly leaving the domain of normal operations that we optimize for. Therefore, a cache size of 47 // roughly 100 is a reasonable balance between performance and memory consumption. 48 cache *Cache[flow.Identifier, *flow.RichProtocolStateEntry] 49 50 // byBlockIdCache is essentially an in-memory map from `Block.ID()` -> `ProtocolStateEntry.ID()`. The full 51 // Protocol state can be retrieved from the `cache` above. 52 // We populate the `byBlockIdCache` on store, because a new entry is added for every block and we probably also 53 // query the Protocol state for every block. So argument (ii) from above does not apply here. Furthermore, 54 // argument (i) from above also does not apply, because we already have the Protocol State's ID on store, 55 // so populating the cache is easy. 56 // 57 // `byBlockIdCache` will contain an entry for every block. We want to be able to cover a broad interval of views 58 // without cache misses, so a cache size of roughly 1000 entries is reasonable. 59 byBlockIdCache *Cache[flow.Identifier, flow.Identifier] 60 } 61 62 var _ storage.ProtocolState = (*ProtocolState)(nil) 63 64 // NewProtocolState creates a ProtocolState instance, which is a database of Protocol State. 65 // It supports storing, caching and retrieving by ID or the additionally indexed block ID. 66 func NewProtocolState(collector module.CacheMetrics, 67 epochSetups storage.EpochSetups, 68 epochCommits storage.EpochCommits, 69 db *badger.DB, 70 stateCacheSize uint, 71 stateByBlockIDCacheSize uint, 72 ) *ProtocolState { 73 retrieveByProtocolStateID := func(protocolStateID flow.Identifier) func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { 74 var protocolStateEntry flow.ProtocolStateEntry 75 return func(tx *badger.Txn) (*flow.RichProtocolStateEntry, error) { 76 err := operation.RetrieveProtocolState(protocolStateID, &protocolStateEntry)(tx) 77 if err != nil { 78 return nil, err 79 } 80 result, err := newRichProtocolStateEntry(&protocolStateEntry, epochSetups, epochCommits) 81 if err != nil { 82 return nil, fmt.Errorf("could not create rich protocol state entry: %w", err) 83 } 84 return result, nil 85 } 86 } 87 88 storeByBlockID := func(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error { 89 return func(tx *transaction.Tx) error { 90 err := transaction.WithTx(operation.IndexProtocolState(blockID, protocolStateID))(tx) 91 if err != nil { 92 return fmt.Errorf("could not index protocol state for block (%x): %w", blockID[:], err) 93 } 94 return nil 95 } 96 } 97 98 retrieveByBlockID := func(blockID flow.Identifier) func(tx *badger.Txn) (flow.Identifier, error) { 99 return func(tx *badger.Txn) (flow.Identifier, error) { 100 var protocolStateID flow.Identifier 101 err := operation.LookupProtocolState(blockID, &protocolStateID)(tx) 102 if err != nil { 103 return flow.ZeroID, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) 104 } 105 return protocolStateID, nil 106 } 107 } 108 109 return &ProtocolState{ 110 db: db, 111 cache: newCache[flow.Identifier, *flow.RichProtocolStateEntry](collector, metrics.ResourceProtocolState, 112 withLimit[flow.Identifier, *flow.RichProtocolStateEntry](stateCacheSize), 113 withStore(noopStore[flow.Identifier, *flow.RichProtocolStateEntry]), 114 withRetrieve(retrieveByProtocolStateID)), 115 byBlockIdCache: newCache[flow.Identifier, flow.Identifier](collector, metrics.ResourceProtocolStateByBlockID, 116 withLimit[flow.Identifier, flow.Identifier](stateByBlockIDCacheSize), 117 withStore(storeByBlockID), 118 withRetrieve(retrieveByBlockID)), 119 } 120 } 121 122 // StoreTx returns an anonymous function (intended to be executed as part of a badger transaction), 123 // which persists the given protocol state as part of a DB tx. Per convention, the identities in 124 // the Protocol State must be in canonical order for the current and next epoch (if present), 125 // otherwise an exception is returned. 126 // Expected errors of the returned anonymous function: 127 // - storage.ErrAlreadyExists if a Protocol State with the given id is already stored 128 func (s *ProtocolState) StoreTx(protocolStateID flow.Identifier, protocolState *flow.ProtocolStateEntry) func(*transaction.Tx) error { 129 // front-load sanity checks: 130 if !protocolState.CurrentEpoch.ActiveIdentities.Sorted(flow.IdentifierCanonical) { 131 return transaction.Fail(fmt.Errorf("sanity check failed: identities are not sorted")) 132 } 133 if protocolState.NextEpoch != nil && !protocolState.NextEpoch.ActiveIdentities.Sorted(flow.IdentifierCanonical) { 134 return transaction.Fail(fmt.Errorf("sanity check failed: next epoch identities are not sorted")) 135 } 136 137 // happy path: return anonymous function, whose future execution (as part of a transaction) will store the protocolState 138 return transaction.WithTx(operation.InsertProtocolState(protocolStateID, protocolState)) 139 } 140 141 // Index returns an anonymous function that is intended to be executed as part of a database transaction. 142 // In a nutshell, we want to maintain a map from `blockID` to `protocolStateID`, where `blockID` references the 143 // block that _proposes_ the Protocol State. 144 // Upon call, the anonymous function persists the specific map entry in the node's database. 145 // Protocol convention: 146 // - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, 147 // the protocol state changes if we seal some execution results emitting service events. 148 // - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, 149 // the hash of the resulting protocol state at the end of processing B is to be used. 150 // - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, 151 // _after_ validating the QC. 152 // 153 // Expected errors during normal operations: 154 // - storage.ErrAlreadyExists if a Protocol State for the given blockID has already been indexed 155 func (s *ProtocolState) Index(blockID flow.Identifier, protocolStateID flow.Identifier) func(*transaction.Tx) error { 156 return s.byBlockIdCache.PutTx(blockID, protocolStateID) 157 } 158 159 // ByID returns the protocol state by its ID. 160 // Expected errors during normal operations: 161 // - storage.ErrNotFound if no protocol state with the given Identifier is known. 162 func (s *ProtocolState) ByID(protocolStateID flow.Identifier) (*flow.RichProtocolStateEntry, error) { 163 tx := s.db.NewTransaction(false) 164 defer tx.Discard() 165 return s.cache.Get(protocolStateID)(tx) 166 } 167 168 // ByBlockID retrieves the Protocol State that the block with the given ID proposes. 169 // CAUTION: this protocol state requires confirmation by a QC and will only become active at the child block, 170 // _after_ validating the QC. Protocol convention: 171 // - Consider block B, whose ingestion might potentially lead to an updated protocol state. For example, 172 // the protocol state changes if we seal some execution results emitting service events. 173 // - For the key `blockID`, we use the identity of block B which _proposes_ this Protocol State. As value, 174 // the hash of the resulting protocol state at the end of processing B is to be used. 175 // - CAUTION: The protocol state requires confirmation by a QC and will only become active at the child block, 176 // _after_ validating the QC. 177 // 178 // Expected errors during normal operations: 179 // - storage.ErrNotFound if no protocol state has been indexed for the given block. 180 func (s *ProtocolState) ByBlockID(blockID flow.Identifier) (*flow.RichProtocolStateEntry, error) { 181 tx := s.db.NewTransaction(false) 182 defer tx.Discard() 183 protocolStateID, err := s.byBlockIdCache.Get(blockID)(tx) 184 if err != nil { 185 return nil, fmt.Errorf("could not lookup protocol state ID for block (%x): %w", blockID[:], err) 186 } 187 return s.cache.Get(protocolStateID)(tx) 188 } 189 190 // newRichProtocolStateEntry constructs a rich protocol state entry from a protocol state entry. 191 // It queries and fills in epoch setups and commits for previous and current epochs and possibly next epoch. 192 // No errors are expected during normal operation. 193 func newRichProtocolStateEntry( 194 protocolState *flow.ProtocolStateEntry, 195 setups storage.EpochSetups, 196 commits storage.EpochCommits, 197 ) (*flow.RichProtocolStateEntry, error) { 198 var ( 199 previousEpochSetup *flow.EpochSetup 200 previousEpochCommit *flow.EpochCommit 201 nextEpochSetup *flow.EpochSetup 202 nextEpochCommit *flow.EpochCommit 203 err error 204 ) 205 // query and fill in epoch setups and commits for previous and current epochs 206 if protocolState.PreviousEpoch != nil { 207 previousEpochSetup, err = setups.ByID(protocolState.PreviousEpoch.SetupID) 208 if err != nil { 209 return nil, fmt.Errorf("could not retrieve previous epoch setup: %w", err) 210 } 211 previousEpochCommit, err = commits.ByID(protocolState.PreviousEpoch.CommitID) 212 if err != nil { 213 return nil, fmt.Errorf("could not retrieve previous epoch commit: %w", err) 214 } 215 } 216 217 currentEpochSetup, err := setups.ByID(protocolState.CurrentEpoch.SetupID) 218 if err != nil { 219 return nil, fmt.Errorf("could not retrieve current epoch setup: %w", err) 220 } 221 currentEpochCommit, err := commits.ByID(protocolState.CurrentEpoch.CommitID) 222 if err != nil { 223 return nil, fmt.Errorf("could not retrieve current epoch commit: %w", err) 224 } 225 226 // if next epoch has been set up, fill in data for it as well 227 nextEpoch := protocolState.NextEpoch 228 if nextEpoch != nil { 229 nextEpochSetup, err = setups.ByID(nextEpoch.SetupID) 230 if err != nil { 231 return nil, fmt.Errorf("could not retrieve next epoch's setup event: %w", err) 232 } 233 if nextEpoch.CommitID != flow.ZeroID { 234 nextEpochCommit, err = commits.ByID(nextEpoch.CommitID) 235 if err != nil { 236 return nil, fmt.Errorf("could not retrieve next epoch's commit event: %w", err) 237 } 238 } 239 } 240 241 result, err := flow.NewRichProtocolStateEntry( 242 protocolState, 243 previousEpochSetup, 244 previousEpochCommit, 245 currentEpochSetup, 246 currentEpochCommit, 247 nextEpochSetup, 248 nextEpochCommit, 249 ) 250 if err != nil { 251 // observing an error here would be an indication of severe data corruption or bug in our code since 252 // all data should be available and correctly structured at this point. 253 return nil, irrecoverable.NewExceptionf("critical failure while instantiating RichProtocolStateEntry: %w", err) 254 } 255 return result, nil 256 }