github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/ledger/complete/mtrie/forest.go (about)

     1  package mtrie
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/onflow/flow-go/ledger"
     7  	"github.com/onflow/flow-go/ledger/common/hash"
     8  	"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
     9  	"github.com/onflow/flow-go/module"
    10  )
    11  
    12  // Forest holds several in-memory tries. As Forest is a storage-abstraction layer,
    13  // we assume that all registers are addressed via paths of pre-defined uniform length.
    14  //
    15  // Forest has a limit, the forestCapacity, on the number of tries it is able to store.
    16  // If more tries are added than the capacity, the Least Recently Used trie is
    17  // removed (evicted) from the Forest. THIS IS A ROUGH HEURISTIC as it might evict
    18  // tries that are still needed. In fully matured Flow, we will have an
    19  // explicit eviction policy.
    20  //
    21  // TODO: Storage Eviction Policy for Forest
    22  // For the execution node: we only evict on sealing a result.
    23  type Forest struct {
    24  	// tries stores all MTries in the forest. It is NOT a CACHE in the conventional sense:
    25  	// there is no mechanism to load a trie from disk in case of a cache miss. Missing a
    26  	// needed trie in the forest might cause a fatal application logic error.
    27  	tries          *TrieCache
    28  	forestCapacity int
    29  	onTreeEvicted  func(tree *trie.MTrie)
    30  	metrics        module.LedgerMetrics
    31  }
    32  
    33  // NewForest returns a new instance of memory forest.
    34  //
    35  // CAUTION on forestCapacity: the specified capacity MUST be SUFFICIENT to store all needed MTries in the forest.
    36  // If more tries are added than the capacity, the Least Recently Added trie is removed (evicted) from the Forest (FIFO queue).
    37  // Make sure you chose a sufficiently large forestCapacity, such that, when reaching the capacity, the
    38  // Least Recently Added trie will never be needed again.
    39  func NewForest(forestCapacity int, metrics module.LedgerMetrics, onTreeEvicted func(tree *trie.MTrie)) (*Forest, error) {
    40  	forest := &Forest{tries: NewTrieCache(uint(forestCapacity), onTreeEvicted),
    41  		forestCapacity: forestCapacity,
    42  		onTreeEvicted:  onTreeEvicted,
    43  		metrics:        metrics,
    44  	}
    45  
    46  	// add trie with no allocated registers
    47  	emptyTrie := trie.NewEmptyMTrie()
    48  	err := forest.AddTrie(emptyTrie)
    49  	if err != nil {
    50  		return nil, fmt.Errorf("adding empty trie to forest failed: %w", err)
    51  	}
    52  	return forest, nil
    53  }
    54  
    55  // ValueSizes returns value sizes for a slice of paths and error (if any)
    56  // TODO: can be optimized further if we don't care about changing the order of the input r.Paths
    57  func (f *Forest) ValueSizes(r *ledger.TrieRead) ([]int, error) {
    58  
    59  	if len(r.Paths) == 0 {
    60  		return []int{}, nil
    61  	}
    62  
    63  	// lookup the trie by rootHash
    64  	trie, err := f.GetTrie(r.RootHash)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	// deduplicate paths:
    70  	// Generally, we expect the VM to deduplicate reads and writes. Hence, the following is a pre-caution.
    71  	// TODO: We could take out the following de-duplication logic
    72  	//       Which increases the cost for duplicates but reduces ValueSizes complexity without duplicates.
    73  	deduplicatedPaths := make([]ledger.Path, 0, len(r.Paths))
    74  	pathOrgIndex := make(map[ledger.Path][]int)
    75  	for i, path := range r.Paths {
    76  		// only collect duplicated paths once
    77  		indices, ok := pathOrgIndex[path]
    78  		if !ok { // deduplication here is optional
    79  			deduplicatedPaths = append(deduplicatedPaths, path)
    80  		}
    81  		// append the index
    82  		pathOrgIndex[path] = append(indices, i)
    83  	}
    84  
    85  	sizes := trie.UnsafeValueSizes(deduplicatedPaths) // this sorts deduplicatedPaths IN-PLACE
    86  
    87  	// reconstruct value sizes in the same key order that called the method
    88  	orderedValueSizes := make([]int, len(r.Paths))
    89  	totalValueSize := 0
    90  	for i, p := range deduplicatedPaths {
    91  		size := sizes[i]
    92  		indices := pathOrgIndex[p]
    93  		for _, j := range indices {
    94  			orderedValueSizes[j] = size
    95  		}
    96  		totalValueSize += len(indices) * size
    97  	}
    98  	// TODO rename the metrics
    99  	f.metrics.ReadValuesSize(uint64(totalValueSize))
   100  
   101  	return orderedValueSizes, nil
   102  }
   103  
   104  // ReadSingleValue reads value for a single path and returns value and error (if any)
   105  func (f *Forest) ReadSingleValue(r *ledger.TrieReadSingleValue) (ledger.Value, error) {
   106  	// lookup the trie by rootHash
   107  	trie, err := f.GetTrie(r.RootHash)
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  
   112  	payload := trie.ReadSinglePayload(r.Path)
   113  	return payload.Value().DeepCopy(), nil
   114  }
   115  
   116  // Read reads values for an slice of paths and returns values and error (if any)
   117  // TODO: can be optimized further if we don't care about changing the order of the input r.Paths
   118  func (f *Forest) Read(r *ledger.TrieRead) ([]ledger.Value, error) {
   119  
   120  	if len(r.Paths) == 0 {
   121  		return []ledger.Value{}, nil
   122  	}
   123  
   124  	// lookup the trie by rootHash
   125  	trie, err := f.GetTrie(r.RootHash)
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	// call ReadSinglePayload if there is only one path
   131  	if len(r.Paths) == 1 {
   132  		payload := trie.ReadSinglePayload(r.Paths[0])
   133  		return []ledger.Value{payload.Value().DeepCopy()}, nil
   134  	}
   135  
   136  	// deduplicate keys:
   137  	// Generally, we expect the VM to deduplicate reads and writes. Hence, the following is a pre-caution.
   138  	// TODO: We could take out the following de-duplication logic
   139  	//       Which increases the cost for duplicates but reduces read complexity without duplicates.
   140  	deduplicatedPaths := make([]ledger.Path, 0, len(r.Paths))
   141  	pathOrgIndex := make(map[ledger.Path][]int)
   142  	for i, path := range r.Paths {
   143  		// only collect duplicated keys once
   144  		indices, ok := pathOrgIndex[path]
   145  		if !ok { // deduplication here is optional
   146  			deduplicatedPaths = append(deduplicatedPaths, path)
   147  		}
   148  		// append the index
   149  		pathOrgIndex[path] = append(indices, i)
   150  	}
   151  
   152  	payloads := trie.UnsafeRead(deduplicatedPaths) // this sorts deduplicatedPaths IN-PLACE
   153  
   154  	// reconstruct the payloads in the same key order that called the method
   155  	orderedValues := make([]ledger.Value, len(r.Paths))
   156  	totalPayloadSize := 0
   157  	for i, p := range deduplicatedPaths {
   158  		payload := payloads[i]
   159  		indices := pathOrgIndex[p]
   160  		for _, j := range indices {
   161  			orderedValues[j] = payload.Value().DeepCopy()
   162  		}
   163  		totalPayloadSize += len(indices) * payload.Size()
   164  	}
   165  	// TODO rename the metrics
   166  	f.metrics.ReadValuesSize(uint64(totalPayloadSize))
   167  
   168  	return orderedValues, nil
   169  }
   170  
   171  // Update creates a new trie by updating Values for registers in the parent trie,
   172  // adds new trie to forest, and returns rootHash and error (if any).
   173  // In case there are multiple updates to the same register, Update will persist
   174  // the latest written value.
   175  // Note: Update adds new trie to forest, unlike NewTrie().
   176  func (f *Forest) Update(u *ledger.TrieUpdate) (ledger.RootHash, error) {
   177  	t, err := f.NewTrie(u)
   178  	if err != nil {
   179  		return ledger.RootHash(hash.DummyHash), err
   180  	}
   181  
   182  	err = f.AddTrie(t)
   183  	if err != nil {
   184  		return ledger.RootHash(hash.DummyHash), fmt.Errorf("adding updated trie to forest failed: %w", err)
   185  	}
   186  
   187  	return t.RootHash(), nil
   188  }
   189  
   190  // NewTrie creates a new trie by updating Values for registers in the parent trie,
   191  // and returns new trie and error (if any).
   192  // In case there are multiple updates to the same register, NewTrie will persist
   193  // the latest written value.
   194  // Note: NewTrie doesn't add new trie to forest, unlike Update().
   195  func (f *Forest) NewTrie(u *ledger.TrieUpdate) (*trie.MTrie, error) {
   196  
   197  	parentTrie, err := f.GetTrie(u.RootHash)
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  
   202  	if len(u.Paths) == 0 { // no key no change
   203  		return parentTrie, nil
   204  	}
   205  
   206  	// Deduplicate writes to the same register: we only retain the value of the last write
   207  	// Generally, we expect the VM to deduplicate reads and writes.
   208  	deduplicatedPaths := make([]ledger.Path, 0, len(u.Paths))
   209  	deduplicatedPayloads := make([]ledger.Payload, 0, len(u.Paths))
   210  	payloadMap := make(map[ledger.Path]int) // index into deduplicatedPaths, deduplicatedPayloads with register update
   211  	totalPayloadSize := 0
   212  	for i, path := range u.Paths {
   213  		payload := u.Payloads[i]
   214  		// check if we already have encountered an update for the respective register
   215  		if idx, ok := payloadMap[path]; ok {
   216  			oldPayload := deduplicatedPayloads[idx]
   217  			deduplicatedPayloads[idx] = *payload
   218  			totalPayloadSize += -oldPayload.Size() + payload.Size()
   219  		} else {
   220  			payloadMap[path] = len(deduplicatedPaths)
   221  			deduplicatedPaths = append(deduplicatedPaths, path)
   222  			deduplicatedPayloads = append(deduplicatedPayloads, *u.Payloads[i])
   223  			totalPayloadSize += payload.Size()
   224  		}
   225  	}
   226  
   227  	// Update metrics with number of updated payloads and size of updated payloads.
   228  	// TODO rename metrics names
   229  	f.metrics.UpdateValuesNumber(uint64(len(deduplicatedPayloads)))
   230  	f.metrics.UpdateValuesSize(uint64(totalPayloadSize))
   231  
   232  	// apply pruning on update
   233  	applyPruning := true
   234  	newTrie, maxDepthTouched, err := trie.NewTrieWithUpdatedRegisters(parentTrie, deduplicatedPaths, deduplicatedPayloads, applyPruning)
   235  	if err != nil {
   236  		return nil, fmt.Errorf("constructing updated trie failed: %w", err)
   237  	}
   238  
   239  	f.metrics.LatestTrieRegCount(newTrie.AllocatedRegCount())
   240  	f.metrics.LatestTrieRegCountDiff(int64(newTrie.AllocatedRegCount() - parentTrie.AllocatedRegCount()))
   241  	f.metrics.LatestTrieRegSize(newTrie.AllocatedRegSize())
   242  	f.metrics.LatestTrieRegSizeDiff(int64(newTrie.AllocatedRegSize() - parentTrie.AllocatedRegSize()))
   243  	f.metrics.LatestTrieMaxDepthTouched(maxDepthTouched)
   244  
   245  	return newTrie, nil
   246  }
   247  
   248  // Proofs returns a batch proof for the given paths.
   249  //
   250  // Proves are generally _not_ provided in the register order of the query.
   251  // In the current implementation, input paths in the TrieRead `r` are sorted in an ascendent order,
   252  // The output proofs are provided following the order of the sorted paths.
   253  func (f *Forest) Proofs(r *ledger.TrieRead) (*ledger.TrieBatchProof, error) {
   254  
   255  	// no path, empty batchproof
   256  	if len(r.Paths) == 0 {
   257  		return ledger.NewTrieBatchProof(), nil
   258  	}
   259  
   260  	// look up for non existing paths
   261  	retValueSizes, err := f.ValueSizes(r)
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  
   266  	notFoundPaths := make([]ledger.Path, 0)
   267  	notFoundPayloads := make([]ledger.Payload, 0)
   268  	for i, path := range r.Paths {
   269  		// add if empty
   270  		if retValueSizes[i] == 0 {
   271  			notFoundPaths = append(notFoundPaths, path)
   272  			notFoundPayloads = append(notFoundPayloads, *ledger.EmptyPayload())
   273  		}
   274  	}
   275  
   276  	stateTrie, err := f.GetTrie(r.RootHash)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	// if we have to insert empty values
   282  	if len(notFoundPaths) > 0 {
   283  		// for proofs, we have to set the pruning to false,
   284  		// currently batch proofs are only consists of inclusion proofs
   285  		// so for non-inclusion proofs we expand the trie with nil value and use an inclusion proof
   286  		// instead. if pruning is enabled it would break this trick and return the exact trie.
   287  		applyPruning := false
   288  		newTrie, _, err := trie.NewTrieWithUpdatedRegisters(stateTrie, notFoundPaths, notFoundPayloads, applyPruning)
   289  		if err != nil {
   290  			return nil, err
   291  		}
   292  
   293  		// rootHash shouldn't change
   294  		if newTrie.RootHash() != r.RootHash {
   295  			return nil, fmt.Errorf("root hash has changed during the operation %x, %x", newTrie.RootHash(), r.RootHash)
   296  		}
   297  		stateTrie = newTrie
   298  	}
   299  
   300  	bp := stateTrie.UnsafeProofs(r.Paths)
   301  	return bp, nil
   302  }
   303  
   304  // HasTrie returns true if trie exist at specific rootHash
   305  func (f *Forest) HasTrie(rootHash ledger.RootHash) bool {
   306  	_, found := f.tries.Get(rootHash)
   307  	return found
   308  }
   309  
   310  // GetTrie returns trie at specific rootHash
   311  // warning, use this function for read-only operation
   312  func (f *Forest) GetTrie(rootHash ledger.RootHash) (*trie.MTrie, error) {
   313  	// if in memory
   314  	if trie, found := f.tries.Get(rootHash); found {
   315  		return trie, nil
   316  	}
   317  	return nil, fmt.Errorf("trie with the given rootHash %s not found", rootHash)
   318  }
   319  
   320  // GetTries returns list of currently cached tree root hashes
   321  func (f *Forest) GetTries() ([]*trie.MTrie, error) {
   322  	return f.tries.Tries(), nil
   323  }
   324  
   325  // AddTries adds a trie to the forest
   326  func (f *Forest) AddTries(newTries []*trie.MTrie) error {
   327  	for _, t := range newTries {
   328  		err := f.AddTrie(t)
   329  		if err != nil {
   330  			return fmt.Errorf("adding tries to forest failed: %w", err)
   331  		}
   332  	}
   333  	return nil
   334  }
   335  
   336  // AddTrie adds a trie to the forest
   337  func (f *Forest) AddTrie(newTrie *trie.MTrie) error {
   338  	if newTrie == nil {
   339  		return nil
   340  	}
   341  
   342  	// TODO: check Thread safety
   343  	rootHash := newTrie.RootHash()
   344  	if _, found := f.tries.Get(rootHash); found {
   345  		// do no op
   346  		return nil
   347  	}
   348  	f.tries.Push(newTrie)
   349  	f.metrics.ForestNumberOfTrees(uint64(f.tries.Count()))
   350  
   351  	return nil
   352  }
   353  
   354  // GetEmptyRootHash returns the rootHash of empty Trie
   355  func (f *Forest) GetEmptyRootHash() ledger.RootHash {
   356  	return trie.EmptyTrieRootHash()
   357  }
   358  
   359  // MostRecentTouchedRootHash returns the rootHash of the most recently touched trie
   360  func (f *Forest) MostRecentTouchedRootHash() (ledger.RootHash, error) {
   361  	trie := f.tries.LastAddedTrie()
   362  	if trie != nil {
   363  		return trie.RootHash(), nil
   364  	}
   365  	return ledger.RootHash(hash.DummyHash), fmt.Errorf("no trie is stored in the forest")
   366  }
   367  
   368  // PurgeCacheExcept removes all tries in the memory except the one with the given root hash
   369  func (f *Forest) PurgeCacheExcept(rootHash ledger.RootHash) error {
   370  	trie, found := f.tries.Get(rootHash)
   371  	if !found {
   372  		return fmt.Errorf("trie with the given root hash not found")
   373  	}
   374  	f.tries.Purge()
   375  	f.tries.Push(trie)
   376  	return nil
   377  }
   378  
   379  // Size returns the number of active tries in this store
   380  func (f *Forest) Size() int {
   381  	return f.tries.Count()
   382  }