github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/ledger/complete/mtrie/forest.go (about) 1 package mtrie 2 3 import ( 4 "fmt" 5 6 "github.com/onflow/flow-go/ledger" 7 "github.com/onflow/flow-go/ledger/common/hash" 8 "github.com/onflow/flow-go/ledger/complete/mtrie/trie" 9 "github.com/onflow/flow-go/module" 10 ) 11 12 // Forest holds several in-memory tries. As Forest is a storage-abstraction layer, 13 // we assume that all registers are addressed via paths of pre-defined uniform length. 14 // 15 // Forest has a limit, the forestCapacity, on the number of tries it is able to store. 16 // If more tries are added than the capacity, the Least Recently Used trie is 17 // removed (evicted) from the Forest. THIS IS A ROUGH HEURISTIC as it might evict 18 // tries that are still needed. In fully matured Flow, we will have an 19 // explicit eviction policy. 20 // 21 // TODO: Storage Eviction Policy for Forest 22 // For the execution node: we only evict on sealing a result. 23 type Forest struct { 24 // tries stores all MTries in the forest. It is NOT a CACHE in the conventional sense: 25 // there is no mechanism to load a trie from disk in case of a cache miss. Missing a 26 // needed trie in the forest might cause a fatal application logic error. 27 tries *TrieCache 28 forestCapacity int 29 onTreeEvicted func(tree *trie.MTrie) 30 metrics module.LedgerMetrics 31 } 32 33 // NewForest returns a new instance of memory forest. 34 // 35 // CAUTION on forestCapacity: the specified capacity MUST be SUFFICIENT to store all needed MTries in the forest. 36 // If more tries are added than the capacity, the Least Recently Added trie is removed (evicted) from the Forest (FIFO queue). 37 // Make sure you chose a sufficiently large forestCapacity, such that, when reaching the capacity, the 38 // Least Recently Added trie will never be needed again. 39 func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted func(tree *trie.MTrie)) (*Forest, error) { 40 forest := &Forest{tries: NewTrieCache(uint(forestCapacity), onTreeEvicted), 41 forestCapacity: forestCapacity, 42 onTreeEvicted: onTreeEvicted, 43 metrics: metrics, 44 } 45 46 // add trie with no allocated registers 47 emptyTrie := trie.NewEmptyMTrie() 48 err := forest.AddTrie(emptyTrie) 49 if err != nil { 50 return nil, fmt.Errorf("adding empty trie to forest failed: %w", err) 51 } 52 return forest, nil 53 } 54 55 // ValueSizes returns value sizes for a slice of paths and error (if any) 56 // TODO: can be optimized further if we don't care about changing the order of the input r.Paths 57 func (f *Forest) ValueSizes(r *ledger.TrieRead) ([]int, error) { 58 59 if len(r.Paths) == 0 { 60 return []int{}, nil 61 } 62 63 // lookup the trie by rootHash 64 trie, err := f.GetTrie(r.RootHash) 65 if err != nil { 66 return nil, err 67 } 68 69 // deduplicate paths: 70 // Generally, we expect the VM to deduplicate reads and writes. Hence, the following is a pre-caution. 71 // TODO: We could take out the following de-duplication logic 72 // Which increases the cost for duplicates but reduces ValueSizes complexity without duplicates. 73 deduplicatedPaths := make([]ledger.Path, 0, len(r.Paths)) 74 pathOrgIndex := make(map[ledger.Path][]int) 75 for i, path := range r.Paths { 76 // only collect duplicated paths once 77 indices, ok := pathOrgIndex[path] 78 if !ok { // deduplication here is optional 79 deduplicatedPaths = append(deduplicatedPaths, path) 80 } 81 // append the index 82 pathOrgIndex[path] = append(indices, i) 83 } 84 85 sizes := trie.UnsafeValueSizes(deduplicatedPaths) // this sorts deduplicatedPaths IN-PLACE 86 87 // reconstruct value sizes in the same key order that called the method 88 orderedValueSizes := make([]int, len(r.Paths)) 89 totalValueSize := 0 90 for i, p := range deduplicatedPaths { 91 size := sizes[i] 92 indices := pathOrgIndex[p] 93 for _, j := range indices { 94 orderedValueSizes[j] = size 95 } 96 totalValueSize += len(indices) * size 97 } 98 // TODO rename the metrics 99 f.metrics.ReadValuesSize(uint64(totalValueSize)) 100 101 return orderedValueSizes, nil 102 } 103 104 // ReadSingleValue reads value for a single path and returns value and error (if any) 105 func (f *Forest) ReadSingleValue(r *ledger.TrieReadSingleValue) (ledger.Value, error) { 106 // lookup the trie by rootHash 107 trie, err := f.GetTrie(r.RootHash) 108 if err != nil { 109 return nil, err 110 } 111 112 payload := trie.ReadSinglePayload(r.Path) 113 return payload.Value().DeepCopy(), nil 114 } 115 116 // Read reads values for an slice of paths and returns values and error (if any) 117 // TODO: can be optimized further if we don't care about changing the order of the input r.Paths 118 func (f *Forest) Read(r *ledger.TrieRead) ([]ledger.Value, error) { 119 120 if len(r.Paths) == 0 { 121 return []ledger.Value{}, nil 122 } 123 124 // lookup the trie by rootHash 125 trie, err := f.GetTrie(r.RootHash) 126 if err != nil { 127 return nil, err 128 } 129 130 // call ReadSinglePayload if there is only one path 131 if len(r.Paths) == 1 { 132 payload := trie.ReadSinglePayload(r.Paths[0]) 133 return []ledger.Value{payload.Value().DeepCopy()}, nil 134 } 135 136 // deduplicate keys: 137 // Generally, we expect the VM to deduplicate reads and writes. Hence, the following is a pre-caution. 138 // TODO: We could take out the following de-duplication logic 139 // Which increases the cost for duplicates but reduces read complexity without duplicates. 140 deduplicatedPaths := make([]ledger.Path, 0, len(r.Paths)) 141 pathOrgIndex := make(map[ledger.Path][]int) 142 for i, path := range r.Paths { 143 // only collect duplicated keys once 144 indices, ok := pathOrgIndex[path] 145 if !ok { // deduplication here is optional 146 deduplicatedPaths = append(deduplicatedPaths, path) 147 } 148 // append the index 149 pathOrgIndex[path] = append(indices, i) 150 } 151 152 payloads := trie.UnsafeRead(deduplicatedPaths) // this sorts deduplicatedPaths IN-PLACE 153 154 // reconstruct the payloads in the same key order that called the method 155 orderedValues := make([]ledger.Value, len(r.Paths)) 156 totalPayloadSize := 0 157 for i, p := range deduplicatedPaths { 158 payload := payloads[i] 159 indices := pathOrgIndex[p] 160 for _, j := range indices { 161 orderedValues[j] = payload.Value().DeepCopy() 162 } 163 totalPayloadSize += len(indices) * payload.Size() 164 } 165 // TODO rename the metrics 166 f.metrics.ReadValuesSize(uint64(totalPayloadSize)) 167 168 return orderedValues, nil 169 } 170 171 // Update creates a new trie by updating Values for registers in the parent trie, 172 // adds new trie to forest, and returns rootHash and error (if any). 173 // In case there are multiple updates to the same register, Update will persist 174 // the latest written value. 175 // Note: Update adds new trie to forest, unlike NewTrie(). 176 func (f *Forest) Update(u *ledger.TrieUpdate) (ledger.RootHash, error) { 177 t, err := f.NewTrie(u) 178 if err != nil { 179 return ledger.RootHash(hash.DummyHash), err 180 } 181 182 err = f.AddTrie(t) 183 if err != nil { 184 return ledger.RootHash(hash.DummyHash), fmt.Errorf("adding updated trie to forest failed: %w", err) 185 } 186 187 return t.RootHash(), nil 188 } 189 190 // NewTrie creates a new trie by updating Values for registers in the parent trie, 191 // and returns new trie and error (if any). 192 // In case there are multiple updates to the same register, NewTrie will persist 193 // the latest written value. 194 // Note: NewTrie doesn't add new trie to forest, unlike Update(). 195 func (f *Forest) NewTrie(u *ledger.TrieUpdate) (*trie.MTrie, error) { 196 197 parentTrie, err := f.GetTrie(u.RootHash) 198 if err != nil { 199 return nil, err 200 } 201 202 if len(u.Paths) == 0 { // no key no change 203 return parentTrie, nil 204 } 205 206 // Deduplicate writes to the same register: we only retain the value of the last write 207 // Generally, we expect the VM to deduplicate reads and writes. 208 deduplicatedPaths := make([]ledger.Path, 0, len(u.Paths)) 209 deduplicatedPayloads := make([]ledger.Payload, 0, len(u.Paths)) 210 payloadMap := make(map[ledger.Path]int) // index into deduplicatedPaths, deduplicatedPayloads with register update 211 totalPayloadSize := 0 212 for i, path := range u.Paths { 213 payload := u.Payloads[i] 214 // check if we already have encountered an update for the respective register 215 if idx, ok := payloadMap[path]; ok { 216 oldPayload := deduplicatedPayloads[idx] 217 deduplicatedPayloads[idx] = *payload 218 totalPayloadSize += -oldPayload.Size() + payload.Size() 219 } else { 220 payloadMap[path] = len(deduplicatedPaths) 221 deduplicatedPaths = append(deduplicatedPaths, path) 222 deduplicatedPayloads = append(deduplicatedPayloads, *u.Payloads[i]) 223 totalPayloadSize += payload.Size() 224 } 225 } 226 227 // Update metrics with number of updated payloads and size of updated payloads. 228 // TODO rename metrics names 229 f.metrics.UpdateValuesNumber(uint64(len(deduplicatedPayloads))) 230 f.metrics.UpdateValuesSize(uint64(totalPayloadSize)) 231 232 // apply pruning on update 233 applyPruning := true 234 newTrie, maxDepthTouched, err := trie.NewTrieWithUpdatedRegisters(parentTrie, deduplicatedPaths, deduplicatedPayloads, applyPruning) 235 if err != nil { 236 return nil, fmt.Errorf("constructing updated trie failed: %w", err) 237 } 238 239 f.metrics.LatestTrieRegCount(newTrie.AllocatedRegCount()) 240 f.metrics.LatestTrieRegCountDiff(int64(newTrie.AllocatedRegCount() - parentTrie.AllocatedRegCount())) 241 f.metrics.LatestTrieRegSize(newTrie.AllocatedRegSize()) 242 f.metrics.LatestTrieRegSizeDiff(int64(newTrie.AllocatedRegSize() - parentTrie.AllocatedRegSize())) 243 f.metrics.LatestTrieMaxDepthTouched(maxDepthTouched) 244 245 return newTrie, nil 246 } 247 248 // Proofs returns a batch proof for the given paths. 249 // 250 // Proves are generally _not_ provided in the register order of the query. 251 // In the current implementation, input paths in the TrieRead `r` are sorted in an ascendent order, 252 // The output proofs are provided following the order of the sorted paths. 253 func (f *Forest) Proofs(r *ledger.TrieRead) (*ledger.TrieBatchProof, error) { 254 255 // no path, empty batchproof 256 if len(r.Paths) == 0 { 257 return ledger.NewTrieBatchProof(), nil 258 } 259 260 // look up for non existing paths 261 retValueSizes, err := f.ValueSizes(r) 262 if err != nil { 263 return nil, err 264 } 265 266 notFoundPaths := make([]ledger.Path, 0) 267 notFoundPayloads := make([]ledger.Payload, 0) 268 for i, path := range r.Paths { 269 // add if empty 270 if retValueSizes[i] == 0 { 271 notFoundPaths = append(notFoundPaths, path) 272 notFoundPayloads = append(notFoundPayloads, *ledger.EmptyPayload()) 273 } 274 } 275 276 stateTrie, err := f.GetTrie(r.RootHash) 277 if err != nil { 278 return nil, err 279 } 280 281 // if we have to insert empty values 282 if len(notFoundPaths) > 0 { 283 // for proofs, we have to set the pruning to false, 284 // currently batch proofs are only consists of inclusion proofs 285 // so for non-inclusion proofs we expand the trie with nil value and use an inclusion proof 286 // instead. if pruning is enabled it would break this trick and return the exact trie. 287 applyPruning := false 288 newTrie, _, err := trie.NewTrieWithUpdatedRegisters(stateTrie, notFoundPaths, notFoundPayloads, applyPruning) 289 if err != nil { 290 return nil, err 291 } 292 293 // rootHash shouldn't change 294 if newTrie.RootHash() != r.RootHash { 295 return nil, fmt.Errorf("root hash has changed during the operation %x, %x", newTrie.RootHash(), r.RootHash) 296 } 297 stateTrie = newTrie 298 } 299 300 bp := stateTrie.UnsafeProofs(r.Paths) 301 return bp, nil 302 } 303 304 // HasTrie returns true if trie exist at specific rootHash 305 func (f *Forest) HasTrie(rootHash ledger.RootHash) bool { 306 _, found := f.tries.Get(rootHash) 307 return found 308 } 309 310 // GetTrie returns trie at specific rootHash 311 // warning, use this function for read-only operation 312 func (f *Forest) GetTrie(rootHash ledger.RootHash) (*trie.MTrie, error) { 313 // if in memory 314 if trie, found := f.tries.Get(rootHash); found { 315 return trie, nil 316 } 317 return nil, fmt.Errorf("trie with the given rootHash %s not found", rootHash) 318 } 319 320 // GetTries returns list of currently cached tree root hashes 321 func (f *Forest) GetTries() ([]*trie.MTrie, error) { 322 return f.tries.Tries(), nil 323 } 324 325 // AddTries adds a trie to the forest 326 func (f *Forest) AddTries(newTries []*trie.MTrie) error { 327 for _, t := range newTries { 328 err := f.AddTrie(t) 329 if err != nil { 330 return fmt.Errorf("adding tries to forest failed: %w", err) 331 } 332 } 333 return nil 334 } 335 336 // AddTrie adds a trie to the forest 337 func (f *Forest) AddTrie(newTrie *trie.MTrie) error { 338 if newTrie == nil { 339 return nil 340 } 341 342 // TODO: check Thread safety 343 rootHash := newTrie.RootHash() 344 if _, found := f.tries.Get(rootHash); found { 345 // do no op 346 return nil 347 } 348 f.tries.Push(newTrie) 349 f.metrics.ForestNumberOfTrees(uint64(f.tries.Count())) 350 351 return nil 352 } 353 354 // GetEmptyRootHash returns the rootHash of empty Trie 355 func (f *Forest) GetEmptyRootHash() ledger.RootHash { 356 return trie.EmptyTrieRootHash() 357 } 358 359 // MostRecentTouchedRootHash returns the rootHash of the most recently touched trie 360 func (f *Forest) MostRecentTouchedRootHash() (ledger.RootHash, error) { 361 trie := f.tries.LastAddedTrie() 362 if trie != nil { 363 return trie.RootHash(), nil 364 } 365 return ledger.RootHash(hash.DummyHash), fmt.Errorf("no trie is stored in the forest") 366 } 367 368 // PurgeCacheExcept removes all tries in the memory except the one with the given root hash 369 func (f *Forest) PurgeCacheExcept(rootHash ledger.RootHash) error { 370 trie, found := f.tries.Get(rootHash) 371 if !found { 372 return fmt.Errorf("trie with the given root hash not found") 373 } 374 f.tries.Purge() 375 f.tries.Push(trie) 376 return nil 377 } 378 379 // Size returns the number of active tries in this store 380 func (f *Forest) Size() int { 381 return f.tries.Count() 382 }