github.com/onflow/flow-go@v0.33.17/storage/badger/operation/common.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package operation
     4  
     5  import (
     6  	"bytes"
     7  	"errors"
     8  	"fmt"
     9  
    10  	"github.com/dgraph-io/badger/v2"
    11  	"github.com/vmihailenco/msgpack/v4"
    12  
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/module/irrecoverable"
    15  	"github.com/onflow/flow-go/storage"
    16  )
    17  
    18  // batchWrite will encode the given entity using msgpack and will upsert the resulting
    19  // binary data in the badger wrote batch under the provided key - if the value already exists
    20  // in the database it will be overridden.
    21  // No errors are expected during normal operation.
    22  func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error {
    23  	return func(writeBatch *badger.WriteBatch) error {
    24  
    25  		// update the maximum key size if the inserted key is bigger
    26  		if uint32(len(key)) > max {
    27  			max = uint32(len(key))
    28  			err := SetMax(writeBatch)
    29  			if err != nil {
    30  				return fmt.Errorf("could not update max tracker: %w", err)
    31  			}
    32  		}
    33  
    34  		// serialize the entity data
    35  		val, err := msgpack.Marshal(entity)
    36  		if err != nil {
    37  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
    38  		}
    39  
    40  		// persist the entity data into the DB
    41  		err = writeBatch.Set(key, val)
    42  		if err != nil {
    43  			return irrecoverable.NewExceptionf("could not store data: %w", err)
    44  		}
    45  		return nil
    46  	}
    47  }
    48  
    49  // insert will encode the given entity using msgpack and will insert the resulting
    50  // binary data in the badger DB under the provided key. It will error if the
    51  // key already exists.
    52  // Error returns:
    53  //   - storage.ErrAlreadyExists if the key already exists in the database.
    54  //   - generic error in case of unexpected failure from the database layer or
    55  //     encoding failure.
    56  func insert(key []byte, entity interface{}) func(*badger.Txn) error {
    57  	return func(tx *badger.Txn) error {
    58  
    59  		// update the maximum key size if the inserted key is bigger
    60  		if uint32(len(key)) > max {
    61  			max = uint32(len(key))
    62  			err := SetMax(tx)
    63  			if err != nil {
    64  				return fmt.Errorf("could not update max tracker: %w", err)
    65  			}
    66  		}
    67  
    68  		// check if the key already exists in the db
    69  		_, err := tx.Get(key)
    70  		if err == nil {
    71  			return storage.ErrAlreadyExists
    72  		}
    73  
    74  		if !errors.Is(err, badger.ErrKeyNotFound) {
    75  			return irrecoverable.NewExceptionf("could not retrieve key: %w", err)
    76  		}
    77  
    78  		// serialize the entity data
    79  		val, err := msgpack.Marshal(entity)
    80  		if err != nil {
    81  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
    82  		}
    83  
    84  		// persist the entity data into the DB
    85  		err = tx.Set(key, val)
    86  		if err != nil {
    87  			return irrecoverable.NewExceptionf("could not store data: %w", err)
    88  		}
    89  		return nil
    90  	}
    91  }
    92  
    93  // update will encode the given entity with MsgPack and update the binary data
    94  // under the given key in the badger DB. The key must already exist.
    95  // Error returns:
    96  //   - storage.ErrNotFound if the key does not already exist in the database.
    97  //   - generic error in case of unexpected failure from the database layer or
    98  //     encoding failure.
    99  func update(key []byte, entity interface{}) func(*badger.Txn) error {
   100  	return func(tx *badger.Txn) error {
   101  
   102  		// retrieve the item from the key-value store
   103  		_, err := tx.Get(key)
   104  		if errors.Is(err, badger.ErrKeyNotFound) {
   105  			return storage.ErrNotFound
   106  		}
   107  		if err != nil {
   108  			return irrecoverable.NewExceptionf("could not check key: %w", err)
   109  		}
   110  
   111  		// serialize the entity data
   112  		val, err := msgpack.Marshal(entity)
   113  		if err != nil {
   114  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
   115  		}
   116  
   117  		// persist the entity data into the DB
   118  		err = tx.Set(key, val)
   119  		if err != nil {
   120  			return irrecoverable.NewExceptionf("could not replace data: %w", err)
   121  		}
   122  
   123  		return nil
   124  	}
   125  }
   126  
   127  // upsert will encode the given entity with MsgPack and upsert the binary data
   128  // under the given key in the badger DB.
   129  func upsert(key []byte, entity interface{}) func(*badger.Txn) error {
   130  	return func(tx *badger.Txn) error {
   131  		// update the maximum key size if the inserted key is bigger
   132  		if uint32(len(key)) > max {
   133  			max = uint32(len(key))
   134  			err := SetMax(tx)
   135  			if err != nil {
   136  				return fmt.Errorf("could not update max tracker: %w", err)
   137  			}
   138  		}
   139  
   140  		// serialize the entity data
   141  		val, err := msgpack.Marshal(entity)
   142  		if err != nil {
   143  			return irrecoverable.NewExceptionf("could not encode entity: %w", err)
   144  		}
   145  
   146  		// persist the entity data into the DB
   147  		err = tx.Set(key, val)
   148  		if err != nil {
   149  			return irrecoverable.NewExceptionf("could not upsert data: %w", err)
   150  		}
   151  
   152  		return nil
   153  	}
   154  }
   155  
   156  // remove removes the entity with the given key, if it exists. If it doesn't
   157  // exist, this is a no-op.
   158  // Error returns:
   159  // * storage.ErrNotFound if the key to delete does not exist.
   160  // * generic error in case of unexpected database error
   161  func remove(key []byte) func(*badger.Txn) error {
   162  	return func(tx *badger.Txn) error {
   163  		// retrieve the item from the key-value store
   164  		_, err := tx.Get(key)
   165  		if err != nil {
   166  			if errors.Is(err, badger.ErrKeyNotFound) {
   167  				return storage.ErrNotFound
   168  			}
   169  			return irrecoverable.NewExceptionf("could not check key: %w", err)
   170  		}
   171  
   172  		err = tx.Delete(key)
   173  		if err != nil {
   174  			return irrecoverable.NewExceptionf("could not delete item: %w", err)
   175  		}
   176  		return nil
   177  	}
   178  }
   179  
   180  // batchRemove removes entry under a given key in a write-batch.
   181  // if key doesn't exist, does nothing.
   182  // No errors are expected during normal operation.
   183  func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error {
   184  	return func(writeBatch *badger.WriteBatch) error {
   185  		err := writeBatch.Delete(key)
   186  		if err != nil {
   187  			return irrecoverable.NewExceptionf("could not batch delete data: %w", err)
   188  		}
   189  		return nil
   190  	}
   191  }
   192  
   193  // removeByPrefix removes all the entities if the prefix of the key matches the given prefix.
   194  // if no key matches, this is a no-op
   195  // No errors are expected during normal operation.
   196  func removeByPrefix(prefix []byte) func(*badger.Txn) error {
   197  	return func(tx *badger.Txn) error {
   198  		opts := badger.DefaultIteratorOptions
   199  		opts.AllVersions = false
   200  		opts.PrefetchValues = false
   201  		it := tx.NewIterator(opts)
   202  		defer it.Close()
   203  
   204  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   205  			key := it.Item().KeyCopy(nil)
   206  			err := tx.Delete(key)
   207  			if err != nil {
   208  				return irrecoverable.NewExceptionf("could not delete item with prefix: %w", err)
   209  			}
   210  		}
   211  
   212  		return nil
   213  	}
   214  }
   215  
   216  // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction.
   217  // no error would be returned if no key was found with the given prefix.
   218  // all error returned should be exception
   219  func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   220  	return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   221  
   222  		opts := badger.DefaultIteratorOptions
   223  		opts.AllVersions = false
   224  		opts.PrefetchValues = false
   225  		it := tx.NewIterator(opts)
   226  		defer it.Close()
   227  
   228  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   229  			key := it.Item().KeyCopy(nil)
   230  			err := writeBatch.Delete(key)
   231  			if err != nil {
   232  				return irrecoverable.NewExceptionf("could not delete item in batch: %w", err)
   233  			}
   234  		}
   235  		return nil
   236  	}
   237  }
   238  
   239  // retrieve will retrieve the binary data under the given key from the badger DB
   240  // and decode it into the given entity. The provided entity needs to be a
   241  // pointer to an initialized entity of the correct type.
   242  // Error returns:
   243  //   - storage.ErrNotFound if the key does not exist in the database
   244  //   - generic error in case of unexpected failure from the database layer, or failure
   245  //     to decode an existing database value
   246  func retrieve(key []byte, entity interface{}) func(*badger.Txn) error {
   247  	return func(tx *badger.Txn) error {
   248  
   249  		// retrieve the item from the key-value store
   250  		item, err := tx.Get(key)
   251  		if errors.Is(err, badger.ErrKeyNotFound) {
   252  			return storage.ErrNotFound
   253  		}
   254  		if err != nil {
   255  			return irrecoverable.NewExceptionf("could not load data: %w", err)
   256  		}
   257  
   258  		// get the value from the item
   259  		err = item.Value(func(val []byte) error {
   260  			err := msgpack.Unmarshal(val, entity)
   261  			return err
   262  		})
   263  		if err != nil {
   264  			return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   265  		}
   266  
   267  		return nil
   268  	}
   269  }
   270  
   271  // exists returns true if a key exists in the database.
   272  // No errors are expected during normal operation.
   273  func exists(key []byte, keyExists *bool) func(*badger.Txn) error {
   274  	return func(tx *badger.Txn) error {
   275  		_, err := tx.Get(key)
   276  		if err != nil {
   277  			// the key does not exist in the database
   278  			if errors.Is(err, badger.ErrKeyNotFound) {
   279  				*keyExists = false
   280  				return nil
   281  			}
   282  			// exception while checking for the key
   283  			return irrecoverable.NewExceptionf("could not load data: %w", err)
   284  		}
   285  
   286  		// the key does exist in the database
   287  		*keyExists = true
   288  		return nil
   289  	}
   290  }
   291  
   292  // checkFunc is called during key iteration through the badger DB in order to
   293  // check whether we should process the given key-value pair. It can be used to
   294  // avoid loading the value if its not of interest, as well as storing the key
   295  // for the current iteration step.
   296  type checkFunc func(key []byte) bool
   297  
   298  // createFunc returns a pointer to an initialized entity that we can potentially
   299  // decode the next value into during a badger DB iteration.
   300  type createFunc func() interface{}
   301  
   302  // handleFunc is a function that starts the processing of the current key-value
   303  // pair during a badger iteration. It should be called after the key was checked
   304  // and the entity was decoded.
   305  // No errors are expected during normal operation. Any errors will halt the iteration.
   306  type handleFunc func() error
   307  
   308  // iterationFunc is a function provided to our low-level iteration function that
   309  // allows us to pass badger efficiencies across badger boundaries. By calling it
   310  // for each iteration step, we can inject a function to check the key, a
   311  // function to create the decode target and a function to process the current
   312  // key-value pair. This a consumer of the API to decode when to skip the loading
   313  // of values, the initialization of entities and the processing.
   314  type iterationFunc func() (checkFunc, createFunc, handleFunc)
   315  
   316  // lookup is the default iteration function allowing us to collect a list of
   317  // entity IDs from an index.
   318  func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) {
   319  	*entityIDs = make([]flow.Identifier, 0, len(*entityIDs))
   320  	return func() (checkFunc, createFunc, handleFunc) {
   321  		check := func(key []byte) bool {
   322  			return true
   323  		}
   324  		var entityID flow.Identifier
   325  		create := func() interface{} {
   326  			return &entityID
   327  		}
   328  		handle := func() error {
   329  			*entityIDs = append(*entityIDs, entityID)
   330  			return nil
   331  		}
   332  		return check, create, handle
   333  	}
   334  }
   335  
   336  // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load
   337  // the values when iterating over keys (ie. key-only iteration). Key-only iteration
   338  // is several order of magnitudes faster than regular iteration, because it involves
   339  // access to the LSM-tree only, which is usually resident entirely in RAM.
   340  func withPrefetchValuesFalse(options *badger.IteratorOptions) {
   341  	options.PrefetchValues = false
   342  }
   343  
   344  // iterate iterates over a range of keys defined by a start and end key. The
   345  // start key may be higher than the end key, in which case we iterate in
   346  // reverse order.
   347  //
   348  // The iteration range uses prefix-wise semantics. Specifically, all keys that
   349  // meet ANY of the following conditions are included in the iteration:
   350  //   - have a prefix equal to the start key OR
   351  //   - have a prefix equal to the end key OR
   352  //   - have a prefix that is lexicographically between start and end
   353  //
   354  // On each iteration, it will call the iteration function to initialize
   355  // functions specific to processing the given key-value pair.
   356  //
   357  // TODO: this function is unbounded – pass context.Context to this or calling functions to allow timing functions out.
   358  // No errors are expected during normal operation. Any errors returned by the
   359  // provided handleFunc will be propagated back to the caller of iterate.
   360  func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error {
   361  	return func(tx *badger.Txn) error {
   362  
   363  		// initialize the default options and comparison modifier for iteration
   364  		modifier := 1
   365  		options := badger.DefaultIteratorOptions
   366  		for _, apply := range opts {
   367  			apply(&options)
   368  		}
   369  
   370  		// In order to satisfy this function's prefix-wise inclusion semantics,
   371  		// we append 0xff bytes to the largest of start and end.
   372  		// This ensures Badger will seek to the largest key with that prefix
   373  		// for reverse iteration, thus including all keys with a prefix matching
   374  		// the starting key. It also enables us to detect boundary conditions by
   375  		// simple lexicographic comparison (ie. bytes.Compare) rather than
   376  		// explicitly comparing prefixes.
   377  		//
   378  		// See https://github.com/onflow/flow-go/pull/3310#issuecomment-618127494
   379  		// for discussion and more detail on this.
   380  
   381  		// If start is bigger than end, we have a backwards iteration:
   382  		// 1) We set the reverse option on the iterator, so we step through all
   383  		//    the keys backwards. This modifies the behaviour of Seek to go to
   384  		//    the first key that is less than or equal to the start key (as
   385  		//    opposed to greater than or equal in a regular iteration).
   386  		// 2) In order to satisfy this function's prefix-wise inclusion semantics,
   387  		//    we append a 0xff-byte suffix to the start key so the seek will go
   388  		// to the right place.
   389  		// 3) For a regular iteration, we break the loop upon hitting the first
   390  		//    item that has a key higher than the end prefix. In order to reverse
   391  		//    this, we use a modifier for the comparison that reverses the check
   392  		//    and makes it stop upon the first item lower than the end prefix.
   393  		if bytes.Compare(start, end) > 0 {
   394  			options.Reverse = true // make sure to go in reverse order
   395  			modifier = -1          // make sure to stop after end prefix
   396  			length := uint32(len(start))
   397  			diff := max - length
   398  			for i := uint32(0); i < diff; i++ {
   399  				start = append(start, 0xff)
   400  			}
   401  		} else {
   402  			// for forward iteration, add the 0xff-bytes suffix to the end
   403  			// prefix, to ensure we include all keys with that prefix before
   404  			// finishing.
   405  			length := uint32(len(end))
   406  			diff := max - length
   407  			for i := uint32(0); i < diff; i++ {
   408  				end = append(end, 0xff)
   409  			}
   410  		}
   411  
   412  		it := tx.NewIterator(options)
   413  		defer it.Close()
   414  
   415  		for it.Seek(start); it.Valid(); it.Next() {
   416  
   417  			item := it.Item()
   418  
   419  			key := item.Key()
   420  			// for forward iteration, check whether key > end, for backward
   421  			// iteration check whether key < end
   422  			if bytes.Compare(key, end)*modifier > 0 {
   423  				break
   424  			}
   425  
   426  			// initialize processing functions for iteration
   427  			check, create, handle := iteration()
   428  
   429  			// check if we should process the item at all
   430  			ok := check(key)
   431  			if !ok {
   432  				continue
   433  			}
   434  
   435  			// process the actual item
   436  			err := item.Value(func(val []byte) error {
   437  
   438  				// decode into the entity
   439  				entity := create()
   440  				err := msgpack.Unmarshal(val, entity)
   441  				if err != nil {
   442  					return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   443  				}
   444  
   445  				// process the entity
   446  				err = handle()
   447  				if err != nil {
   448  					return fmt.Errorf("could not handle entity: %w", err)
   449  				}
   450  
   451  				return nil
   452  			})
   453  			if err != nil {
   454  				return fmt.Errorf("could not process value: %w", err)
   455  			}
   456  		}
   457  
   458  		return nil
   459  	}
   460  }
   461  
   462  // traverse iterates over a range of keys defined by a prefix.
   463  //
   464  // The prefix must be shared by all keys in the iteration.
   465  //
   466  // On each iteration, it will call the iteration function to initialize
   467  // functions specific to processing the given key-value pair.
   468  func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error {
   469  	return func(tx *badger.Txn) error {
   470  		if len(prefix) == 0 {
   471  			return fmt.Errorf("prefix must not be empty")
   472  		}
   473  
   474  		opts := badger.DefaultIteratorOptions
   475  		// NOTE: this is an optimization only, it does not enforce that all
   476  		// results in the iteration have this prefix.
   477  		opts.Prefix = prefix
   478  
   479  		it := tx.NewIterator(opts)
   480  		defer it.Close()
   481  
   482  		// this is where we actually enforce that all results have the prefix
   483  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   484  
   485  			item := it.Item()
   486  
   487  			// initialize processing functions for iteration
   488  			check, create, handle := iteration()
   489  
   490  			// check if we should process the item at all
   491  			key := item.Key()
   492  			ok := check(key)
   493  			if !ok {
   494  				continue
   495  			}
   496  
   497  			// process the actual item
   498  			err := item.Value(func(val []byte) error {
   499  
   500  				// decode into the entity
   501  				entity := create()
   502  				err := msgpack.Unmarshal(val, entity)
   503  				if err != nil {
   504  					return irrecoverable.NewExceptionf("could not decode entity: %w", err)
   505  				}
   506  
   507  				// process the entity
   508  				err = handle()
   509  				if err != nil {
   510  					return fmt.Errorf("could not handle entity: %w", err)
   511  				}
   512  
   513  				return nil
   514  			})
   515  			if err != nil {
   516  				return fmt.Errorf("could not process value: %w", err)
   517  			}
   518  		}
   519  
   520  		return nil
   521  	}
   522  }
   523  
   524  // findHighestAtOrBelow searches for the highest key with the given prefix and a height
   525  // at or below the target height, and retrieves and decodes the value associated with the
   526  // key into the given entity.
   527  // If no key is found, the function returns storage.ErrNotFound.
   528  func findHighestAtOrBelow(
   529  	prefix []byte,
   530  	height uint64,
   531  	entity interface{},
   532  ) func(*badger.Txn) error {
   533  	return func(tx *badger.Txn) error {
   534  		if len(prefix) == 0 {
   535  			return fmt.Errorf("prefix must not be empty")
   536  		}
   537  
   538  		opts := badger.DefaultIteratorOptions
   539  		opts.Prefix = prefix
   540  		opts.Reverse = true
   541  
   542  		it := tx.NewIterator(opts)
   543  		defer it.Close()
   544  
   545  		it.Seek(append(prefix, b(height)...))
   546  
   547  		if !it.Valid() {
   548  			return storage.ErrNotFound
   549  		}
   550  
   551  		return it.Item().Value(func(val []byte) error {
   552  			err := msgpack.Unmarshal(val, entity)
   553  			if err != nil {
   554  				return fmt.Errorf("could not decode entity: %w", err)
   555  			}
   556  			return nil
   557  		})
   558  	}
   559  }
   560  
   561  // Fail returns a DB operation function that always fails with the given error.
   562  func Fail(err error) func(*badger.Txn) error {
   563  	return func(_ *badger.Txn) error {
   564  		return err
   565  	}
   566  }