github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/iterator.go (about)

     1  /*
     2   * Copyright 2017 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package badger
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"math"
    23  	"sort"
    24  	"sync/atomic"
    25  
    26  	"github.com/dgryski/go-farm"
    27  	"github.com/pingcap/badger/table"
    28  	"github.com/pingcap/badger/table/memtable"
    29  	"github.com/pingcap/badger/y"
    30  )
    31  
    32  // Item is returned during iteration. Both the Key() and Value() output is only valid until
    33  // iterator.Next() is called.
    34  type Item struct {
    35  	err      error
    36  	db       *DB
    37  	key      y.Key
    38  	vptr     []byte
    39  	meta     byte // We need to store meta to know about bitValuePointer.
    40  	userMeta []byte
    41  	slice    *y.Slice
    42  	next     *Item
    43  	txn      *Txn
    44  }
    45  
    46  // String returns a string representation of Item
    47  func (item *Item) String() string {
    48  	return fmt.Sprintf("key=%q, version=%d, meta=%x", item.Key(), item.Version(), item.meta)
    49  }
    50  
    51  // Key returns the key.
    52  //
    53  // Key is only valid as long as item is valid, or transaction is valid.  If you need to use it
    54  // outside its validity, please use KeyCopy
    55  func (item *Item) Key() []byte {
    56  	return item.key.UserKey
    57  }
    58  
    59  // KeyCopy returns a copy of the key of the item, writing it to dst slice.
    60  // If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
    61  // returned.
    62  func (item *Item) KeyCopy(dst []byte) []byte {
    63  	return y.SafeCopy(dst, item.key.UserKey)
    64  }
    65  
    66  // Version returns the commit timestamp of the item.
    67  func (item *Item) Version() uint64 {
    68  	return item.key.Version
    69  }
    70  
    71  // IsEmpty checks if the value is empty.
    72  func (item *Item) IsEmpty() bool {
    73  	return len(item.vptr) == 0
    74  }
    75  
    76  // Value retrieves the value of the item from the value log.
    77  //
    78  // This method must be called within a transaction. Calling it outside a
    79  // transaction is considered undefined behavior. If an iterator is being used,
    80  // then Item.Value() is defined in the current iteration only, because items are
    81  // reused.
    82  //
    83  // If you need to use a value outside a transaction, please use Item.ValueCopy
    84  // instead, or copy it yourself. Value might change once discard or commit is called.
    85  // Use ValueCopy if you want to do a Set after Get.
    86  func (item *Item) Value() ([]byte, error) {
    87  	if item.meta&bitValuePointer > 0 {
    88  		if item.slice == nil {
    89  			item.slice = new(y.Slice)
    90  		}
    91  		if item.txn.blobCache == nil {
    92  			item.txn.blobCache = map[uint32]*blobCache{}
    93  		}
    94  		return item.db.blobManger.read(item.vptr, item.slice, item.txn.blobCache)
    95  	}
    96  	return item.vptr, nil
    97  }
    98  
    99  // ValueSize returns the size of the value without the cost of retrieving the value.
   100  func (item *Item) ValueSize() int {
   101  	if item.meta&bitValuePointer > 0 {
   102  		var bp blobPointer
   103  		bp.decode(item.vptr)
   104  		return int(bp.length)
   105  	}
   106  	return len(item.vptr)
   107  }
   108  
   109  // ValueCopy returns a copy of the value of the item from the value log, writing it to dst slice.
   110  // If nil is passed, or capacity of dst isn't sufficient, a new slice would be allocated and
   111  // returned. Tip: It might make sense to reuse the returned slice as dst argument for the next call.
   112  //
   113  // This function is useful in long running iterate/update transactions to avoid a write deadlock.
   114  // See Github issue: https://github.com/pingcap/badger/issues/315
   115  func (item *Item) ValueCopy(dst []byte) ([]byte, error) {
   116  	buf, err := item.Value()
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	return y.SafeCopy(dst, buf), nil
   121  }
   122  
   123  func (item *Item) hasValue() bool {
   124  	if item.meta == 0 && item.vptr == nil {
   125  		// key not found
   126  		return false
   127  	}
   128  	return true
   129  }
   130  
   131  // IsDeleted returns true if item contains deleted or expired value.
   132  func (item *Item) IsDeleted() bool {
   133  	return isDeleted(item.meta)
   134  }
   135  
   136  // EstimatedSize returns approximate size of the key-value pair.
   137  //
   138  // This can be called while iterating through a store to quickly estimate the
   139  // size of a range of key-value pairs (without fetching the corresponding
   140  // values).
   141  func (item *Item) EstimatedSize() int64 {
   142  	if !item.hasValue() {
   143  		return 0
   144  	}
   145  	return int64(item.key.Len() + len(item.vptr))
   146  }
   147  
   148  // UserMeta returns the userMeta set by the user. Typically, this byte, optionally set by the user
   149  // is used to interpret the value.
   150  func (item *Item) UserMeta() []byte {
   151  	return item.userMeta
   152  }
   153  
   154  // IteratorOptions is used to set options when iterating over Badger key-value
   155  // stores.
   156  //
   157  // This package provides DefaultIteratorOptions which contains options that
   158  // should work for most applications. Consider using that as a starting point
   159  // before customizing it for your own needs.
   160  type IteratorOptions struct {
   161  	Reverse     bool // Direction of iteration. False is forward, true is backward.
   162  	AllVersions bool // Fetch all valid versions of the same key.
   163  
   164  	// StartKey and EndKey are used to prune non-overlapping table iterators.
   165  	// They are not boundary limits, the EndKey is exclusive.
   166  	StartKey y.Key
   167  	EndKey   y.Key
   168  
   169  	internalAccess bool // Used to allow internal access to badger keys.
   170  }
   171  
   172  func (opts *IteratorOptions) hasRange() bool {
   173  	return !opts.StartKey.IsEmpty() && !opts.EndKey.IsEmpty()
   174  }
   175  
   176  func (opts *IteratorOptions) OverlapPending(it *pendingWritesIterator) bool {
   177  	if it == nil {
   178  		return false
   179  	}
   180  	if !opts.hasRange() {
   181  		return true
   182  	}
   183  	if opts.EndKey.Compare(it.entries[0].Key) <= 0 {
   184  		return false
   185  	}
   186  	if opts.StartKey.Compare(it.entries[len(it.entries)-1].Key) > 0 {
   187  		return false
   188  	}
   189  	return true
   190  }
   191  
   192  func (opts *IteratorOptions) OverlapMemTable(t *memtable.Table) bool {
   193  	if t.Empty() {
   194  		return false
   195  	}
   196  	if !opts.hasRange() {
   197  		return true
   198  	}
   199  	iter := t.NewIterator(false)
   200  	defer iter.Close()
   201  	iter.Seek(opts.StartKey.UserKey)
   202  	if !iter.Valid() {
   203  		return false
   204  	}
   205  	if bytes.Compare(iter.Key().UserKey, opts.EndKey.UserKey) >= 0 {
   206  		return false
   207  	}
   208  	return true
   209  }
   210  
   211  func (opts *IteratorOptions) OverlapTable(t table.Table) bool {
   212  	if !opts.hasRange() {
   213  		return true
   214  	}
   215  	return t.HasOverlap(opts.StartKey, opts.EndKey, false)
   216  }
   217  
   218  func (opts *IteratorOptions) OverlapTables(tables []table.Table) []table.Table {
   219  	if len(tables) == 0 {
   220  		return nil
   221  	}
   222  	if !opts.hasRange() {
   223  		return tables
   224  	}
   225  	startIdx := sort.Search(len(tables), func(i int) bool {
   226  		t := tables[i]
   227  		return opts.StartKey.Compare(t.Biggest()) <= 0
   228  	})
   229  	if startIdx == len(tables) {
   230  		return nil
   231  	}
   232  	tables = tables[startIdx:]
   233  	endIdx := sort.Search(len(tables), func(i int) bool {
   234  		t := tables[i]
   235  		return t.Smallest().Compare(opts.EndKey) >= 0
   236  	})
   237  	tables = tables[:endIdx]
   238  	overlapTables := make([]table.Table, 0, 8)
   239  	for _, t := range tables {
   240  		if opts.OverlapTable(t) {
   241  			overlapTables = append(overlapTables, t)
   242  		}
   243  	}
   244  	return overlapTables
   245  }
   246  
   247  // DefaultIteratorOptions contains default options when iterating over Badger key-value stores.
   248  var DefaultIteratorOptions = IteratorOptions{
   249  	Reverse:     false,
   250  	AllVersions: false,
   251  }
   252  
   253  // Iterator helps iterating over the KV pairs in a lexicographically sorted order.
   254  type Iterator struct {
   255  	iitr   y.Iterator
   256  	txn    *Txn
   257  	readTs uint64
   258  
   259  	opt   IteratorOptions
   260  	item  *Item
   261  	itBuf Item
   262  	vs    y.ValueStruct
   263  
   264  	closed bool
   265  }
   266  
   267  // NewIterator returns a new iterator. Depending upon the options, either only keys, or both
   268  // key-value pairs would be fetched. The keys are returned in lexicographically sorted order.
   269  // Avoid long running iterations in update transactions.
   270  func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
   271  	atomic.AddInt32(&txn.numIterators, 1)
   272  
   273  	tables := txn.db.getMemTables()
   274  	if !opt.StartKey.IsEmpty() {
   275  		opt.StartKey.Version = math.MaxUint64
   276  	}
   277  	if !opt.EndKey.IsEmpty() {
   278  		opt.EndKey.Version = math.MaxUint64
   279  	}
   280  	var iters []y.Iterator
   281  	if itr := txn.newPendingWritesIterator(opt.Reverse); opt.OverlapPending(itr) {
   282  		iters = append(iters, itr)
   283  	}
   284  	for i := 0; i < len(tables); i++ {
   285  		if opt.OverlapMemTable(tables[i]) {
   286  			iters = append(iters, tables[i].NewIterator(opt.Reverse))
   287  		}
   288  	}
   289  	iters = txn.db.lc.appendIterators(iters, &opt) // This will increment references.
   290  	res := &Iterator{
   291  		txn:    txn,
   292  		iitr:   table.NewMergeIterator(iters, opt.Reverse),
   293  		opt:    opt,
   294  		readTs: txn.readTs,
   295  	}
   296  	res.itBuf.db = txn.db
   297  	res.itBuf.txn = txn
   298  	res.itBuf.slice = new(y.Slice)
   299  	return res
   300  }
   301  
   302  // Item returns pointer to the current key-value pair.
   303  // This item is only valid until it.Next() gets called.
   304  func (it *Iterator) Item() *Item {
   305  	tx := it.txn
   306  	if tx.update {
   307  		// Track reads if this is an update txn.
   308  		tx.reads = append(tx.reads, farm.Fingerprint64(it.item.Key()))
   309  	}
   310  	return it.item
   311  }
   312  
   313  // Valid returns false when iteration is done.
   314  func (it *Iterator) Valid() bool { return it.item != nil }
   315  
   316  // ValidForPrefix returns false when iteration is done
   317  // or when the current key is not prefixed by the specified prefix.
   318  func (it *Iterator) ValidForPrefix(prefix []byte) bool {
   319  	return it.item != nil && bytes.HasPrefix(it.item.key.UserKey, prefix)
   320  }
   321  
   322  // Close would close the iterator. It is important to call this when you're done with iteration.
   323  func (it *Iterator) Close() {
   324  	if it.closed {
   325  		return
   326  	}
   327  	it.closed = true
   328  	it.iitr.Close()
   329  	atomic.AddInt32(&it.txn.numIterators, -1)
   330  }
   331  
   332  // Next would advance the iterator by one. Always check it.Valid() after a Next()
   333  // to ensure you have access to a valid it.Item().
   334  func (it *Iterator) Next() {
   335  	if it.opt.AllVersions && it.Valid() && it.iitr.NextVersion() {
   336  		it.updateItem()
   337  		return
   338  	}
   339  	it.iitr.Next()
   340  	it.parseItem()
   341  	return
   342  }
   343  
   344  func (it *Iterator) updateItem() {
   345  	it.iitr.FillValue(&it.vs)
   346  	item := &it.itBuf
   347  	item.key = it.iitr.Key()
   348  	item.meta = it.vs.Meta
   349  	item.userMeta = it.vs.UserMeta
   350  	item.vptr = it.vs.Value
   351  	it.item = item
   352  }
   353  
   354  func (it *Iterator) parseItem() {
   355  	iitr := it.iitr
   356  	for iitr.Valid() {
   357  		key := iitr.Key()
   358  		if !it.opt.internalAccess && key.UserKey[0] == '!' {
   359  			iitr.Next()
   360  			continue
   361  		}
   362  		if key.Version > it.readTs {
   363  			if !y.SeekToVersion(iitr, it.readTs) {
   364  				iitr.Next()
   365  				continue
   366  			}
   367  		}
   368  		it.updateItem()
   369  		if !it.opt.AllVersions && isDeleted(it.vs.Meta) {
   370  			iitr.Next()
   371  			continue
   372  		}
   373  		return
   374  	}
   375  	it.item = nil
   376  }
   377  
   378  func isDeleted(meta byte) bool {
   379  	return meta&bitDelete > 0
   380  }
   381  
   382  // Seek would seek to the provided key if present. If absent, it would seek to the next smallest key
   383  // greater than provided if iterating in the forward direction. Behavior would be reversed is
   384  // iterating backwards.
   385  func (it *Iterator) Seek(key []byte) {
   386  	if !it.opt.Reverse {
   387  		it.iitr.Seek(key)
   388  	} else {
   389  		if len(key) == 0 {
   390  			it.iitr.Rewind()
   391  		} else {
   392  			it.iitr.Seek(key)
   393  		}
   394  	}
   395  	it.parseItem()
   396  }
   397  
   398  // Rewind would rewind the iterator cursor all the way to zero-th position, which would be the
   399  // smallest key if iterating forward, and largest if iterating backward. It does not keep track of
   400  // whether the cursor started with a Seek().
   401  func (it *Iterator) Rewind() {
   402  	it.iitr.Rewind()
   403  	it.parseItem()
   404  }
   405  
   406  func (it *Iterator) SetAllVersions(allVersions bool) {
   407  	it.opt.AllVersions = allVersions
   408  }