github.com/koko1123/flow-go-1@v0.29.6/storage/badger/operation/common.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package operation
     4  
     5  import (
     6  	"bytes"
     7  	"errors"
     8  	"fmt"
     9  
    10  	"github.com/dgraph-io/badger/v3"
    11  	"github.com/vmihailenco/msgpack/v4"
    12  
    13  	"github.com/koko1123/flow-go-1/model/flow"
    14  	"github.com/koko1123/flow-go-1/storage"
    15  )
    16  
    17  // batchWrite will encode the given entity using msgpack and will upsert the resulting
    18  // binary data in the badger wrote batch under the provided key - if the value already exists
    19  // in the database it will be overridden.
    20  // No errors are expected during normal operation.
    21  func batchWrite(key []byte, entity interface{}) func(writeBatch *badger.WriteBatch) error {
    22  	return func(writeBatch *badger.WriteBatch) error {
    23  
    24  		// update the maximum key size if the inserted key is bigger
    25  		if uint32(len(key)) > max {
    26  			max = uint32(len(key))
    27  			err := SetMax(writeBatch)
    28  			if err != nil {
    29  				return fmt.Errorf("could not update max tracker: %w", err)
    30  			}
    31  		}
    32  
    33  		// serialize the entity data
    34  		val, err := msgpack.Marshal(entity)
    35  		if err != nil {
    36  			return fmt.Errorf("could not encode entity: %w", err)
    37  		}
    38  
    39  		// persist the entity data into the DB
    40  		err = writeBatch.Set(key, val)
    41  		if err != nil {
    42  			return fmt.Errorf("could not store data: %w", err)
    43  		}
    44  		return nil
    45  	}
    46  }
    47  
    48  // insert will encode the given entity using msgpack and will insert the resulting
    49  // binary data in the badger DB under the provided key. It will error if the
    50  // key already exists.
    51  // Error returns:
    52  //   - storage.ErrAlreadyExists if the key already exists in the database.
    53  //   - generic error in case of unexpected failure from the database layer or
    54  //     encoding failure.
    55  func insert(key []byte, entity interface{}) func(*badger.Txn) error {
    56  	return func(tx *badger.Txn) error {
    57  
    58  		// update the maximum key size if the inserted key is bigger
    59  		if uint32(len(key)) > max {
    60  			max = uint32(len(key))
    61  			err := SetMax(tx)
    62  			if err != nil {
    63  				return fmt.Errorf("could not update max tracker: %w", err)
    64  			}
    65  		}
    66  
    67  		// check if the key already exists in the db
    68  		_, err := tx.Get(key)
    69  		if err == nil {
    70  			return storage.ErrAlreadyExists
    71  		}
    72  
    73  		if !errors.Is(err, badger.ErrKeyNotFound) {
    74  			return fmt.Errorf("could not retrieve key: %w", err)
    75  		}
    76  
    77  		// serialize the entity data
    78  		val, err := msgpack.Marshal(entity)
    79  		if err != nil {
    80  			return fmt.Errorf("could not encode entity: %w", err)
    81  		}
    82  
    83  		// persist the entity data into the DB
    84  		err = tx.Set(key, val)
    85  		if err != nil {
    86  			return fmt.Errorf("could not store data: %w", err)
    87  		}
    88  		return nil
    89  	}
    90  }
    91  
    92  // update will encode the given entity with MsgPack and update the binary data
    93  // under the given key in the badger DB. The key must already exist.
    94  // Error returns:
    95  //   - storage.ErrNotFound if the key does not already exist in the database.
    96  //   - generic error in case of unexpected failure from the database layer or
    97  //     encoding failure.
    98  func update(key []byte, entity interface{}) func(*badger.Txn) error {
    99  	return func(tx *badger.Txn) error {
   100  
   101  		// retrieve the item from the key-value store
   102  		_, err := tx.Get(key)
   103  		if errors.Is(err, badger.ErrKeyNotFound) {
   104  			return storage.ErrNotFound
   105  		}
   106  		if err != nil {
   107  			return fmt.Errorf("could not check key: %w", err)
   108  		}
   109  
   110  		// serialize the entity data
   111  		val, err := msgpack.Marshal(entity)
   112  		if err != nil {
   113  			return fmt.Errorf("could not encode entity: %w", err)
   114  		}
   115  
   116  		// persist the entity data into the DB
   117  		err = tx.Set(key, val)
   118  		if err != nil {
   119  			return fmt.Errorf("could not replace data: %w", err)
   120  		}
   121  
   122  		return nil
   123  	}
   124  }
   125  
   126  // upsert will encode the given entity with MsgPack and upsert the binary data
   127  // under the given key in the badger DB.
   128  func upsert(key []byte, entity interface{}) func(*badger.Txn) error {
   129  	return func(tx *badger.Txn) error {
   130  		// update the maximum key size if the inserted key is bigger
   131  		if uint32(len(key)) > max {
   132  			max = uint32(len(key))
   133  			err := SetMax(tx)
   134  			if err != nil {
   135  				return fmt.Errorf("could not update max tracker: %w", err)
   136  			}
   137  		}
   138  
   139  		// serialize the entity data
   140  		val, err := msgpack.Marshal(entity)
   141  		if err != nil {
   142  			return fmt.Errorf("could not encode entity: %w", err)
   143  		}
   144  
   145  		// persist the entity data into the DB
   146  		err = tx.Set(key, val)
   147  		if err != nil {
   148  			return fmt.Errorf("could not upsert data: %w", err)
   149  		}
   150  
   151  		return nil
   152  	}
   153  }
   154  
   155  // remove removes the entity with the given key, if it exists. If it doesn't
   156  // exist, this is a no-op.
   157  // Error returns:
   158  // * storage.ErrNotFound if the key to delete does not exist.
   159  // * generic error in case of unexpected database error
   160  func remove(key []byte) func(*badger.Txn) error {
   161  	return func(tx *badger.Txn) error {
   162  		// retrieve the item from the key-value store
   163  		_, err := tx.Get(key)
   164  		if errors.Is(err, badger.ErrKeyNotFound) {
   165  			return storage.ErrNotFound
   166  		}
   167  		if err != nil {
   168  			return fmt.Errorf("could not check key: %w", err)
   169  		}
   170  
   171  		err = tx.Delete(key)
   172  		return err
   173  	}
   174  }
   175  
   176  // batchRemove removes entry under a given key in a write-batch.
   177  // if key doesn't exist, does nothing.
   178  // No errors are expected during normal operation.
   179  func batchRemove(key []byte) func(writeBatch *badger.WriteBatch) error {
   180  	return func(writeBatch *badger.WriteBatch) error {
   181  		err := writeBatch.Delete(key)
   182  		if err != nil {
   183  			return fmt.Errorf("could not batch delete data: %w", err)
   184  		}
   185  		return nil
   186  	}
   187  }
   188  
   189  // removeByPrefix removes all the entities if the prefix of the key matches the given prefix.
   190  // if no key matches, this is a no-op
   191  // No errors are expected during normal operation.
   192  func removeByPrefix(prefix []byte) func(*badger.Txn) error {
   193  	return func(tx *badger.Txn) error {
   194  		opts := badger.DefaultIteratorOptions
   195  		opts.AllVersions = false
   196  		opts.PrefetchValues = false
   197  		it := tx.NewIterator(opts)
   198  		defer it.Close()
   199  
   200  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   201  			key := it.Item().KeyCopy(nil)
   202  			err := tx.Delete(key)
   203  			if err != nil {
   204  				return err
   205  			}
   206  		}
   207  
   208  		return nil
   209  	}
   210  }
   211  
   212  // batchRemoveByPrefix removes all items under the keys match the given prefix in a batch write transaction.
   213  // no error would be returned if no key was found with the given prefix.
   214  // all error returned should be exception
   215  func batchRemoveByPrefix(prefix []byte) func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   216  	return func(tx *badger.Txn, writeBatch *badger.WriteBatch) error {
   217  
   218  		opts := badger.DefaultIteratorOptions
   219  		opts.AllVersions = false
   220  		opts.PrefetchValues = false
   221  		it := tx.NewIterator(opts)
   222  		defer it.Close()
   223  
   224  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   225  			key := it.Item().KeyCopy(nil)
   226  			err := writeBatch.Delete(key)
   227  			if err != nil {
   228  				return err
   229  			}
   230  		}
   231  		return nil
   232  	}
   233  }
   234  
   235  // retrieve will retrieve the binary data under the given key from the badger DB
   236  // and decode it into the given entity. The provided entity needs to be a
   237  // pointer to an initialized entity of the correct type.
   238  // Error returns:
   239  //   - storage.ErrNotFound if the key does not exist in the database
   240  //   - generic error in case of unexpected failure from the database layer, or failure
   241  //     to decode an existing database value
   242  func retrieve(key []byte, entity interface{}) func(*badger.Txn) error {
   243  	return func(tx *badger.Txn) error {
   244  
   245  		// retrieve the item from the key-value store
   246  		item, err := tx.Get(key)
   247  		if errors.Is(err, badger.ErrKeyNotFound) {
   248  			return storage.ErrNotFound
   249  		}
   250  		if err != nil {
   251  			return fmt.Errorf("could not load data: %w", err)
   252  		}
   253  
   254  		// get the value from the item
   255  		err = item.Value(func(val []byte) error {
   256  			err := msgpack.Unmarshal(val, entity)
   257  			return err
   258  		})
   259  		if err != nil {
   260  			return fmt.Errorf("could not decode entity: %w", err)
   261  		}
   262  
   263  		return nil
   264  	}
   265  }
   266  
   267  // checkFunc is called during key iteration through the badger DB in order to
   268  // check whether we should process the given key-value pair. It can be used to
   269  // avoid loading the value if its not of interest, as well as storing the key
   270  // for the current iteration step.
   271  type checkFunc func(key []byte) bool
   272  
   273  // createFunc returns a pointer to an initialized entity that we can potentially
   274  // decode the next value into during a badger DB iteration.
   275  type createFunc func() interface{}
   276  
   277  // handleFunc is a function that starts the processing of the current key-value
   278  // pair during a badger iteration. It should be called after the key was checked
   279  // and the entity was decoded.
   280  // No errors are expected during normal operation. Any errors will halt the iteration.
   281  type handleFunc func() error
   282  
   283  // iterationFunc is a function provided to our low-level iteration function that
   284  // allows us to pass badger efficiencies across badger boundaries. By calling it
   285  // for each iteration step, we can inject a function to check the key, a
   286  // function to create the decode target and a function to process the current
   287  // key-value pair. This a consumer of the API to decode when to skip the loading
   288  // of values, the initialization of entities and the processing.
   289  type iterationFunc func() (checkFunc, createFunc, handleFunc)
   290  
   291  // lookup is the default iteration function allowing us to collect a list of
   292  // entity IDs from an index.
   293  func lookup(entityIDs *[]flow.Identifier) func() (checkFunc, createFunc, handleFunc) {
   294  	*entityIDs = make([]flow.Identifier, 0, len(*entityIDs))
   295  	return func() (checkFunc, createFunc, handleFunc) {
   296  		check := func(key []byte) bool {
   297  			return true
   298  		}
   299  		var entityID flow.Identifier
   300  		create := func() interface{} {
   301  			return &entityID
   302  		}
   303  		handle := func() error {
   304  			*entityIDs = append(*entityIDs, entityID)
   305  			return nil
   306  		}
   307  		return check, create, handle
   308  	}
   309  }
   310  
   311  // withPrefetchValuesFalse configures a Badger iteration to NOT preemptively load
   312  // the values when iterating over keys (ie. key-only iteration). Key-only iteration
   313  // is several order of magnitudes faster than regular iteration, because it involves
   314  // access to the LSM-tree only, which is usually resident entirely in RAM.
   315  func withPrefetchValuesFalse(options *badger.IteratorOptions) {
   316  	options.PrefetchValues = false
   317  }
   318  
   319  // iterate iterates over a range of keys defined by a start and end key. The
   320  // start key may be higher than the end key, in which case we iterate in
   321  // reverse order.
   322  //
   323  // The iteration range uses prefix-wise semantics. Specifically, all keys that
   324  // meet ANY of the following conditions are included in the iteration:
   325  //   - have a prefix equal to the start key OR
   326  //   - have a prefix equal to the end key OR
   327  //   - have a prefix that is lexicographically between start and end
   328  //
   329  // On each iteration, it will call the iteration function to initialize
   330  // functions specific to processing the given key-value pair.
   331  //
   332  // TODO: this function is unbounded – pass context.Context to this or calling
   333  // functions to allow timing functions out.
   334  // No errors are expected during normal operation. Any errors returned by the
   335  // provided handleFunc will be propagated back to the caller of iterate.
   336  func iterate(start []byte, end []byte, iteration iterationFunc, opts ...func(*badger.IteratorOptions)) func(*badger.Txn) error {
   337  	return func(tx *badger.Txn) error {
   338  
   339  		// initialize the default options and comparison modifier for iteration
   340  		modifier := 1
   341  		options := badger.DefaultIteratorOptions
   342  		for _, apply := range opts {
   343  			apply(&options)
   344  		}
   345  
   346  		// In order to satisfy this function's prefix-wise inclusion semantics,
   347  		// we append 0xff bytes to the largest of start and end.
   348  		// This ensures Badger will seek to the largest key with that prefix
   349  		// for reverse iteration, thus including all keys with a prefix matching
   350  		// the starting key. It also enables us to detect boundary conditions by
   351  		// simple lexicographic comparison (ie. bytes.Compare) rather than
   352  		// explicitly comparing prefixes.
   353  		//
   354  		// See https://github.com/koko1123/flow-go-1/pull/3310#issuecomment-618127494
   355  		// for discussion and more detail on this.
   356  
   357  		// If start is bigger than end, we have a backwards iteration:
   358  		// 1) We set the reverse option on the iterator, so we step through all
   359  		//    the keys backwards. This modifies the behaviour of Seek to go to
   360  		//    the first key that is less than or equal to the start key (as
   361  		//    opposed to greater than or equal in a regular iteration).
   362  		// 2) In order to satisfy this function's prefix-wise inclusion semantics,
   363  		//    we append a 0xff-byte suffix to the start key so the seek will go
   364  		// to the right place.
   365  		// 3) For a regular iteration, we break the loop upon hitting the first
   366  		//    item that has a key higher than the end prefix. In order to reverse
   367  		//    this, we use a modifier for the comparison that reverses the check
   368  		//    and makes it stop upon the first item lower than the end prefix.
   369  		if bytes.Compare(start, end) > 0 {
   370  			options.Reverse = true // make sure to go in reverse order
   371  			modifier = -1          // make sure to stop after end prefix
   372  			length := uint32(len(start))
   373  			diff := max - length
   374  			for i := uint32(0); i < diff; i++ {
   375  				start = append(start, 0xff)
   376  			}
   377  		} else {
   378  			// for forward iteration, add the 0xff-bytes suffix to the end
   379  			// prefix, to ensure we include all keys with that prefix before
   380  			// finishing.
   381  			length := uint32(len(end))
   382  			diff := max - length
   383  			for i := uint32(0); i < diff; i++ {
   384  				end = append(end, 0xff)
   385  			}
   386  		}
   387  
   388  		it := tx.NewIterator(options)
   389  		defer it.Close()
   390  
   391  		for it.Seek(start); it.Valid(); it.Next() {
   392  
   393  			item := it.Item()
   394  
   395  			key := item.Key()
   396  			// for forward iteration, check whether key > end, for backward
   397  			// iteration check whether key < end
   398  			if bytes.Compare(key, end)*modifier > 0 {
   399  				break
   400  			}
   401  
   402  			// initialize processing functions for iteration
   403  			check, create, handle := iteration()
   404  
   405  			// check if we should process the item at all
   406  			ok := check(key)
   407  			if !ok {
   408  				continue
   409  			}
   410  
   411  			// process the actual item
   412  			err := item.Value(func(val []byte) error {
   413  
   414  				// decode into the entity
   415  				entity := create()
   416  				err := msgpack.Unmarshal(val, entity)
   417  				if err != nil {
   418  					return fmt.Errorf("could not decode entity: %w", err)
   419  				}
   420  
   421  				// process the entity
   422  				err = handle()
   423  				if err != nil {
   424  					return fmt.Errorf("could not handle entity: %w", err)
   425  				}
   426  
   427  				return nil
   428  			})
   429  			if err != nil {
   430  				return fmt.Errorf("could not process value: %w", err)
   431  			}
   432  		}
   433  
   434  		return nil
   435  	}
   436  }
   437  
   438  // traverse iterates over a range of keys defined by a prefix.
   439  //
   440  // The prefix must be shared by all keys in the iteration.
   441  //
   442  // On each iteration, it will call the iteration function to initialize
   443  // functions specific to processing the given key-value pair.
   444  func traverse(prefix []byte, iteration iterationFunc) func(*badger.Txn) error {
   445  	return func(tx *badger.Txn) error {
   446  		if len(prefix) == 0 {
   447  			return fmt.Errorf("prefix must not be empty")
   448  		}
   449  
   450  		opts := badger.DefaultIteratorOptions
   451  		// NOTE: this is an optimization only, it does not enforce that all
   452  		// results in the iteration have this prefix.
   453  		opts.Prefix = prefix
   454  
   455  		it := tx.NewIterator(opts)
   456  		defer it.Close()
   457  
   458  		// this is where we actually enforce that all results have the prefix
   459  		for it.Seek(prefix); it.ValidForPrefix(prefix); it.Next() {
   460  
   461  			item := it.Item()
   462  
   463  			// initialize processing functions for iteration
   464  			check, create, handle := iteration()
   465  
   466  			// check if we should process the item at all
   467  			key := item.Key()
   468  			ok := check(key)
   469  			if !ok {
   470  				continue
   471  			}
   472  
   473  			// process the actual item
   474  			err := item.Value(func(val []byte) error {
   475  
   476  				// decode into the entity
   477  				entity := create()
   478  				err := msgpack.Unmarshal(val, entity)
   479  				if err != nil {
   480  					return fmt.Errorf("could not decode entity: %w", err)
   481  				}
   482  
   483  				// process the entity
   484  				err = handle()
   485  				if err != nil {
   486  					return fmt.Errorf("could not handle entity: %w", err)
   487  				}
   488  
   489  				return nil
   490  			})
   491  			if err != nil {
   492  				return fmt.Errorf("could not process value: %w", err)
   493  			}
   494  		}
   495  
   496  		return nil
   497  	}
   498  }
   499  
   500  // Fail returns a DB operation function that always fails with the given error.
   501  func Fail(err error) func(*badger.Txn) error {
   502  	return func(_ *badger.Txn) error {
   503  		return err
   504  	}
   505  }