github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/operation/common.go (about)

     1  package operation
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  
     8  	"github.com/dgraph-io/badger/v2"
     9  	"github.com/vmihailenco/msgpack/v4"
    10  
    11  	"github.com/onflow/flow-go/model/flow"
    12  	"github.com/onflow/flow-go/module/irrecoverable"
    13  	"github.com/onflow/flow-go/storage"
    14  )
    15  
    16  // batchWrite will encode the given entity using msgpack and will upsert the resulting
    17  // binary data in the badger wrote batch under the provided key - if the value already exists
    18  // in the database it will be overridden.
    19  // No errors are expected during normal operation.
    20  func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error {
    21  	return func(writeBatch *badger.WriteBatch) error {
    22  
    23  		// update the maximum key size if the inserted key is bigger
    24  		if uint32(len(key)) > max {
    25  			max = uint32(len(key))
    26  			err := SetMax(writeBatch)
    27  			if err != nil {
    28  				return fmt.Errorf("could not update max tracker: %w", err)
    29  			}
    30  		}
    31  
    32  		// serialize the entity data
    33  		val, err := msgpack.Marshal(entity)
    34  		if err != nil {
    35  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
    36  		}
    37  
    38  		// persist the entity data into the DB
    39  		err = writeBatch.Set(key, val)
    40  		if err != nil {
    41  			return irrecoverable.NewExceptionf("could not store data: %w", err)
    42  		}
    43  		return nil
    44  	}
    45  }
    46  
    47  // insert will encode the given entity using msgpack and will insert the resulting
    48  // binary data in the badger DB under the provided key. It will error if the
    49  // key already exists.
    50  // Error returns:
    51  //   - storage.ErrAlreadyExists if the key already exists in the database.
    52  //   - generic error in case of unexpected failure from the database layer or
    53  //     encoding failure.
    54  func insert(key []byte, entity interface{}) func(*badger.Txn) error {
    55  	return func(tx *badger.Txn) error {
    56  
    57  		// update the maximum key size if the inserted key is bigger
    58  		if uint32(len(key)) > max {
    59  			max = uint32(len(key))
    60  			err := SetMax(tx)
    61  			if err != nil {
    62  				return fmt.Errorf("could not update max tracker: %w", err)
    63  			}
    64  		}
    65  
    66  		// check if the key already exists in the db
    67  		_, err := tx.Get(key)
    68  		if err == nil {
    69  			return storage.ErrAlreadyExists
    70  		}
    71  
    72  		if !errors.Is(err, badger.ErrKeyNotFound) {
    73  			return irrecoverable.NewExceptionf("could not retrieve key: %w", err)
    74  		}
    75  
    76  		// serialize the entity data
    77  		val, err := msgpack.Marshal(entity)
    78  		if err != nil {
    79  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
    80  		}
    81  
    82  		// persist the entity data into the DB
    83  		err = tx.Set(key, val)
    84  		if err != nil {
    85  			return irrecoverable.NewExceptionf("could not store data: %w", err)
    86  		}
    87  		return nil
    88  	}
    89  }
    90  
    91  // update will encode the given entity with MsgPack and update the binary data
    92  // under the given key in the badger DB. The key must already exist.
    93  // Error returns:
    94  //   - storage.ErrNotFound if the key does not already exist in the database.
    95  //   - generic error in case of unexpected failure from the database layer or
    96  //     encoding failure.
    97  func update(key []byte, entity interface{}) func(*badger.Txn) error {
    98  	return func(tx *badger.Txn) error {
    99  
   100  		// retrieve the item from the key-value store
   101  		_, err := tx.Get(key)
   102  		if errors.Is(err, badger.ErrKeyNotFound) {
   103  			return storage.ErrNotFound
   104  		}
   105  		if err != nil {
   106  			return irrecoverable.NewExceptionf("could not check key: %w", err)
   107  		}
   108  
   109  		// serialize the entity data
   110  		val, err := msgpack.Marshal(entity)
   111  		if err != nil {
   112  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
   113  		}
   114  
   115  		// persist the entity data into the DB
   116  		err = tx.Set(key, val)
   117  		if err != nil {
   118  			return irrecoverable.NewExceptionf("could not replace data: %w", err)
   119  		}
   120  
   121  		return nil
   122  	}
   123  }
   124  
   125  // upsert will encode the given entity with MsgPack and upsert the binary data
   126  // under the given key in the badger DB.
   127  func upsert(key []byte, entity interface{}) func(*badger.Txn) error {
   128  	return func(tx *badger.Txn) error {
   129  		// update the maximum key size if the inserted key is bigger
   130  		if uint32(len(key)) > max {
   131  			max = uint32(len(key))
   132  			err := SetMax(tx)
   133  			if err != nil {
   134  				return fmt.Errorf("could not update max tracker: %w", err)
   135  			}
   136  		}
   137  
   138  		// serialize the entity data
   139  		val, err := msgpack.Marshal(entity)
   140  		if err != nil {
   141  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
   142  		}
   143  
   144  		// persist the entity data into the DB
   145  		err = tx.Set(key, val)
   146  		if err != nil {
   147  			return irrecoverable.NewExceptionf("could not upsert data: %w", err)
   148  		}
   149  
   150  		return nil
   151  	}
   152  }
   153  
   154  // remove removes the entity with the given key, if it exists. If it doesn't
   155  // exist, this is a no-op.
   156  // Error returns:
   157  // * storage.ErrNotFound if the key to delete does not exist.
   158  // * generic error in case of unexpected database error
   159  func remove(key []byte) func(*badger.Txn) error {
   160  	return func(tx *badger.Txn) error {
   161  		// retrieve the item from the key-value store
   162  		_, err := tx.Get(key)
   163  		if err != nil {
   164  			if errors.Is(err, badger.ErrKeyNotFound) {
   165  				return storage.ErrNotFound
   166  			}
   167  			return irrecoverable.NewExceptionf("could not check key: %w", err)
   168  		}
   169  
   170  		err = tx.Delete(key)
   171  		if err != nil {
   172  			return irrecoverable.NewExceptionf("could not delete item: %w", err)
   173  		}
   174  		return nil
   175  	}
   176  }
   177  
   178  // batchRemove removes entry under a given key in a write-batch.
   179  // if key doesn't exist, does nothing.
   180  // No errors are expected during normal operation.
   181  func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error {
   182  	return func(writeBatch *badger.WriteBatch) error {
   183  		err := writeBatch.Delete(key)
   184  		if err != nil {
   185  			return irrecoverable.NewExceptionf("could not batch delete data: %w", err)
   186  		}
   187  		return nil
   188  	}
   189  }
   190  
   191  // removeByPrefix removes all the entities if the prefix of the key matches the given prefix.
   192  // if no key matches, this is a no-op
   193  // No errors are expected during normal operation.
   194  func removeByPrefix(prefix []byte) func(*badger.Txn) error {
   195  	return func(tx *badger.Txn) error {
   196  		opts := badger.DefaultIteratorOptions
   197  		opts.AllVersions = false
   198  		opts.PrefetchValues = false
   199  		it := tx.NewIterator(opts)
   200  		defer it.Close()
   201  
   202  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   203  			key := it.Item().KeyCopy(nil)
   204  			err := tx.Delete(key)
   205  			if err != nil {
   206  				return irrecoverable.NewExceptionf("could not delete item with prefix: %w", err)
   207  			}
   208  		}
   209  
   210  		return nil
   211  	}
   212  }
   213  
   214  // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction.
   215  // no error would be returned if no key was found with the given prefix.
   216  // all error returned should be exception
   217  func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   218  	return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   219  
   220  		opts := badger.DefaultIteratorOptions
   221  		opts.AllVersions = false
   222  		opts.PrefetchValues = false
   223  		it := tx.NewIterator(opts)
   224  		defer it.Close()
   225  
   226  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   227  			key := it.Item().KeyCopy(nil)
   228  			err := writeBatch.Delete(key)
   229  			if err != nil {
   230  				return irrecoverable.NewExceptionf("could not delete item in batch: %w", err)
   231  			}
   232  		}
   233  		return nil
   234  	}
   235  }
   236  
   237  // retrieve will retrieve the binary data under the given key from the badger DB
   238  // and decode it into the given entity. The provided entity needs to be a
   239  // pointer to an initialized entity of the correct type.
   240  // Error returns:
   241  //   - storage.ErrNotFound if the key does not exist in the database
   242  //   - generic error in case of unexpected failure from the database layer, or failure
   243  //     to decode an existing database value
   244  func retrieve(key []byte, entity interface{}) func(*badger.Txn) error {
   245  	return func(tx *badger.Txn) error {
   246  
   247  		// retrieve the item from the key-value store
   248  		item, err := tx.Get(key)
   249  		if errors.Is(err, badger.ErrKeyNotFound) {
   250  			return storage.ErrNotFound
   251  		}
   252  		if err != nil {
   253  			return irrecoverable.NewExceptionf("could not load data: %w", err)
   254  		}
   255  
   256  		// get the value from the item
   257  		err = item.Value(func(val []byte) error {
   258  			err := msgpack.Unmarshal(val, entity)
   259  			return err
   260  		})
   261  		if err != nil {
   262  			return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   263  		}
   264  
   265  		return nil
   266  	}
   267  }
   268  
   269  // exists returns true if a key exists in the database.
   270  // No errors are expected during normal operation.
   271  func exists(key []byte, keyExists *bool) func(*badger.Txn) error {
   272  	return func(tx *badger.Txn) error {
   273  		_, err := tx.Get(key)
   274  		if err != nil {
   275  			// the key does not exist in the database
   276  			if errors.Is(err, badger.ErrKeyNotFound) {
   277  				*keyExists = false
   278  				return nil
   279  			}
   280  			// exception while checking for the key
   281  			return irrecoverable.NewExceptionf("could not load data: %w", err)
   282  		}
   283  
   284  		// the key does exist in the database
   285  		*keyExists = true
   286  		return nil
   287  	}
   288  }
   289  
   290  // checkFunc is called during key iteration through the badger DB in order to
   291  // check whether we should process the given key-value pair. It can be used to
   292  // avoid loading the value if its not of interest, as well as storing the key
   293  // for the current iteration step.
   294  type checkFunc func(key []byte) bool
   295  
   296  // createFunc returns a pointer to an initialized entity that we can potentially
   297  // decode the next value into during a badger DB iteration.
   298  type createFunc func() interface{}
   299  
   300  // handleFunc is a function that starts the processing of the current key-value
   301  // pair during a badger iteration. It should be called after the key was checked
   302  // and the entity was decoded.
   303  // No errors are expected during normal operation. Any errors will halt the iteration.
   304  type handleFunc func() error
   305  
   306  // iterationFunc is a function provided to our low-level iteration function that
   307  // allows us to pass badger efficiencies across badger boundaries. By calling it
   308  // for each iteration step, we can inject a function to check the key, a
   309  // function to create the decode target and a function to process the current
   310  // key-value pair. This a consumer of the API to decode when to skip the loading
   311  // of values, the initialization of entities and the processing.
   312  type iterationFunc func() (checkFunc, createFunc, handleFunc)
   313  
   314  // lookup is the default iteration function allowing us to collect a list of
   315  // entity IDs from an index.
   316  func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) {
   317  	*entityIDs = make([]flow.Identifier, 0, len(*entityIDs))
   318  	return func() (checkFunc, createFunc, handleFunc) {
   319  		check := func(key []byte) bool {
   320  			return true
   321  		}
   322  		var entityID flow.Identifier
   323  		create := func() interface{} {
   324  			return &entityID
   325  		}
   326  		handle := func() error {
   327  			*entityIDs = append(*entityIDs, entityID)
   328  			return nil
   329  		}
   330  		return check, create, handle
   331  	}
   332  }
   333  
   334  // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load
   335  // the values when iterating over keys (ie. key-only iteration). Key-only iteration
   336  // is several order of magnitudes faster than regular iteration, because it involves
   337  // access to the LSM-tree only, which is usually resident entirely in RAM.
   338  func withPrefetchValuesFalse(options *badger.IteratorOptions) {
   339  	options.PrefetchValues = false
   340  }
   341  
   342  // iterate iterates over a range of keys defined by a start and end key. The
   343  // start key may be higher than the end key, in which case we iterate in
   344  // reverse order.
   345  //
   346  // The iteration range uses prefix-wise semantics. Specifically, all keys that
   347  // meet ANY of the following conditions are included in the iteration:
   348  //   - have a prefix equal to the start key OR
   349  //   - have a prefix equal to the end key OR
   350  //   - have a prefix that is lexicographically between start and end
   351  //
   352  // On each iteration, it will call the iteration function to initialize
   353  // functions specific to processing the given key-value pair.
   354  //
   355  // TODO: this function is unbounded – pass context.Context to this or calling functions to allow timing functions out.
   356  // No errors are expected during normal operation. Any errors returned by the
   357  // provided handleFunc will be propagated back to the caller of iterate.
   358  func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error {
   359  	return func(tx *badger.Txn) error {
   360  
   361  		// initialize the default options and comparison modifier for iteration
   362  		modifier := 1
   363  		options := badger.DefaultIteratorOptions
   364  		for _, apply := range opts {
   365  			apply(&options)
   366  		}
   367  
   368  		// In order to satisfy this function's prefix-wise inclusion semantics,
   369  		// we append 0xff bytes to the largest of start and end.
   370  		// This ensures Badger will seek to the largest key with that prefix
   371  		// for reverse iteration, thus including all keys with a prefix matching
   372  		// the starting key. It also enables us to detect boundary conditions by
   373  		// simple lexicographic comparison (ie. bytes.Compare) rather than
   374  		// explicitly comparing prefixes.
   375  		//
   376  		// See https://github.com/onflow/flow-go/pull/3310#issuecomment-618127494
   377  		// for discussion and more detail on this.
   378  
   379  		// If start is bigger than end, we have a backwards iteration:
   380  		// 1) We set the reverse option on the iterator, so we step through all
   381  		//    the keys backwards. This modifies the behaviour of Seek to go to
   382  		//    the first key that is less than or equal to the start key (as
   383  		//    opposed to greater than or equal in a regular iteration).
   384  		// 2) In order to satisfy this function's prefix-wise inclusion semantics,
   385  		//    we append a 0xff-byte suffix to the start key so the seek will go
   386  		// to the right place.
   387  		// 3) For a regular iteration, we break the loop upon hitting the first
   388  		//    item that has a key higher than the end prefix. In order to reverse
   389  		//    this, we use a modifier for the comparison that reverses the check
   390  		//    and makes it stop upon the first item lower than the end prefix.
   391  		if bytes.Compare(start, end) > 0 {
   392  			options.Reverse = true // make sure to go in reverse order
   393  			modifier = -1          // make sure to stop after end prefix
   394  			length := uint32(len(start))
   395  			diff := max - length
   396  			for i := uint32(0); i < diff; i++ {
   397  				start = append(start, 0xff)
   398  			}
   399  		} else {
   400  			// for forward iteration, add the 0xff-bytes suffix to the end
   401  			// prefix, to ensure we include all keys with that prefix before
   402  			// finishing.
   403  			length := uint32(len(end))
   404  			diff := max - length
   405  			for i := uint32(0); i < diff; i++ {
   406  				end = append(end, 0xff)
   407  			}
   408  		}
   409  
   410  		it := tx.NewIterator(options)
   411  		defer it.Close()
   412  
   413  		for it.Seek(start); it.Valid(); it.Next() {
   414  
   415  			item := it.Item()
   416  
   417  			key := item.Key()
   418  			// for forward iteration, check whether key > end, for backward
   419  			// iteration check whether key < end
   420  			if bytes.Compare(key, end)*modifier > 0 {
   421  				break
   422  			}
   423  
   424  			// initialize processing functions for iteration
   425  			check, create, handle := iteration()
   426  
   427  			// check if we should process the item at all
   428  			ok := check(key)
   429  			if !ok {
   430  				continue
   431  			}
   432  
   433  			// process the actual item
   434  			err := item.Value(func(val []byte) error {
   435  
   436  				// decode into the entity
   437  				entity := create()
   438  				err := msgpack.Unmarshal(val, entity)
   439  				if err != nil {
   440  					return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   441  				}
   442  
   443  				// process the entity
   444  				err = handle()
   445  				if err != nil {
   446  					return fmt.Errorf("could not handle entity: %w", err)
   447  				}
   448  
   449  				return nil
   450  			})
   451  			if err != nil {
   452  				return fmt.Errorf("could not process value: %w", err)
   453  			}
   454  		}
   455  
   456  		return nil
   457  	}
   458  }
   459  
   460  // traverse iterates over a range of keys defined by a prefix.
   461  //
   462  // The prefix must be shared by all keys in the iteration.
   463  //
   464  // On each iteration, it will call the iteration function to initialize
   465  // functions specific to processing the given key-value pair.
   466  func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error {
   467  	return func(tx *badger.Txn) error {
   468  		if len(prefix) == 0 {
   469  			return fmt.Errorf("prefix must not be empty")
   470  		}
   471  
   472  		opts := badger.DefaultIteratorOptions
   473  		// NOTE: this is an optimization only, it does not enforce that all
   474  		// results in the iteration have this prefix.
   475  		opts.Prefix = prefix
   476  
   477  		it := tx.NewIterator(opts)
   478  		defer it.Close()
   479  
   480  		// this is where we actually enforce that all results have the prefix
   481  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   482  
   483  			item := it.Item()
   484  
   485  			// initialize processing functions for iteration
   486  			check, create, handle := iteration()
   487  
   488  			// check if we should process the item at all
   489  			key := item.Key()
   490  			ok := check(key)
   491  			if !ok {
   492  				continue
   493  			}
   494  
   495  			// process the actual item
   496  			err := item.Value(func(val []byte) error {
   497  
   498  				// decode into the entity
   499  				entity := create()
   500  				err := msgpack.Unmarshal(val, entity)
   501  				if err != nil {
   502  					return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   503  				}
   504  
   505  				// process the entity
   506  				err = handle()
   507  				if err != nil {
   508  					return fmt.Errorf("could not handle entity: %w", err)
   509  				}
   510  
   511  				return nil
   512  			})
   513  			if err != nil {
   514  				return fmt.Errorf("could not process value: %w", err)
   515  			}
   516  		}
   517  
   518  		return nil
   519  	}
   520  }
   521  
   522  // findHighestAtOrBelow searches for the highest key with the given prefix and a height
   523  // at or below the target height, and retrieves and decodes the value associated with the
   524  // key into the given entity.
   525  // If no key is found, the function returns storage.ErrNotFound.
   526  func findHighestAtOrBelow(
   527  	prefix []byte,
   528  	height uint64,
   529  	entity interface{},
   530  ) func(*badger.Txn) error {
   531  	return func(tx *badger.Txn) error {
   532  		if len(prefix) == 0 {
   533  			return fmt.Errorf("prefix must not be empty")
   534  		}
   535  
   536  		opts := badger.DefaultIteratorOptions
   537  		opts.Prefix = prefix
   538  		opts.Reverse = true
   539  
   540  		it := tx.NewIterator(opts)
   541  		defer it.Close()
   542  
   543  		it.Seek(append(prefix, b(height)...))
   544  
   545  		if !it.Valid() {
   546  			return storage.ErrNotFound
   547  		}
   548  
   549  		return it.Item().Value(func(val []byte) error {
   550  			err := msgpack.Unmarshal(val, entity)
   551  			if err != nil {
   552  				return fmt.Errorf("could not decode entity: %w", err)
   553  			}
   554  			return nil
   555  		})
   556  	}
   557  }
   558  
   559  // Fail returns a DB operation function that always fails with the given error.
   560  func Fail(err error) func(*badger.Txn) error {
   561  	return func(_ *badger.Txn) error {
   562  		return err
   563  	}
   564  }