github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/reader_iter_single_lvl.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"unsafe"
    11  
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/cockroachdb/pebble/internal/invariants"
    14  	"github.com/cockroachdb/pebble/objstorage"
    15  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    16  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
    17  )
    18  
    19  // singleLevelIterator iterates over an entire table of data. To seek for a given
    20  // key, it first looks in the index for the block that contains that key, and then
    21  // looks inside that block.
    22  type singleLevelIterator struct {
    23  	ctx context.Context
    24  	cmp Compare
    25  	// Global lower/upper bound for the iterator.
    26  	lower []byte
    27  	upper []byte
    28  	bpfs  *BlockPropertiesFilterer
    29  	// Per-block lower/upper bound. Nil if the bound does not apply to the block
    30  	// because we determined the block lies completely within the bound.
    31  	blockLower []byte
    32  	blockUpper []byte
    33  	reader     *Reader
    34  	// vState will be set iff the iterator is constructed for virtual sstable
    35  	// iteration.
    36  	vState *virtualState
    37  	// endKeyInclusive is set to force the iterator to treat the upper field as
    38  	// inclusive while iterating instead of exclusive.
    39  	endKeyInclusive bool
    40  	index           blockIter
    41  	data            blockIter
    42  	dataRH          objstorage.ReadHandle
    43  	dataRHPrealloc  objstorageprovider.PreallocatedReadHandle
    44  	// dataBH refers to the last data block that the iterator considered
    45  	// loading. It may not actually have loaded the block, due to an error or
    46  	// because it was considered irrelevant.
    47  	dataBH   BlockHandle
    48  	vbReader *valueBlockReader
    49  	// vbRH is the read handle for value blocks, which are in a different
    50  	// part of the sstable than data blocks.
    51  	vbRH         objstorage.ReadHandle
    52  	vbRHPrealloc objstorageprovider.PreallocatedReadHandle
    53  	err          error
    54  	closeHook    func(i Iterator) error
    55  	stats        *base.InternalIteratorStats
    56  	bufferPool   *BufferPool
    57  
    58  	// boundsCmp and positionedUsingLatestBounds are for optimizing iteration
    59  	// that uses multiple adjacent bounds. The seek after setting a new bound
    60  	// can use the fact that the iterator is either within the previous bounds
    61  	// or exactly one key before or after the bounds. If the new bounds is
    62  	// after/before the previous bounds, and we are already positioned at a
    63  	// block that is relevant for the new bounds, we can try to first position
    64  	// using Next/Prev (repeatedly) instead of doing a more expensive seek.
    65  	//
    66  	// When there are wide files at higher levels that match the bounds
    67  	// but don't have any data for the bound, we will already be
    68  	// positioned at the key beyond the bounds and won't need to do much
    69  	// work -- given that most data is in L6, such files are likely to
    70  	// dominate the performance of the mergingIter, and may be the main
    71  	// benefit of this performance optimization (of course it also helps
    72  	// when the file that has the data has successive seeks that stay in
    73  	// the same block).
    74  	//
    75  	// Specifically, boundsCmp captures the relationship between the previous
    76  	// and current bounds, if the iterator had been positioned after setting
    77  	// the previous bounds. If it was not positioned, i.e., Seek/First/Last
    78  	// were not called, we don't know where it is positioned and cannot
    79  	// optimize.
    80  	//
    81  	// Example: Bounds moving forward, and iterator exhausted in forward direction.
    82  	//      bounds = [f, h), ^ shows block iterator position
    83  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
    84  	//                                       ^
    85  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
    86  	//  set to +1. SeekGE(j) can use next (the optimization also requires that j
    87  	//  is within the block, but that is not for correctness, but to limit the
    88  	//  optimization to when it will actually be an optimization).
    89  	//
    90  	// Example: Bounds moving forward.
    91  	//      bounds = [f, h), ^ shows block iterator position
    92  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
    93  	//                                 ^
    94  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
    95  	//  set to +1. SeekGE(j) can use next.
    96  	//
    97  	// Example: Bounds moving forward, but iterator not positioned using previous
    98  	//  bounds.
    99  	//      bounds = [f, h), ^ shows block iterator position
   100  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
   101  	//                                             ^
   102  	//  new bounds = [i, j). Iterator is at j since it was never positioned using
   103  	//  [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
   104  	//  SeekGE(i) will not use next.
   105  	//
   106  	// Example: Bounds moving forward and sparse file
   107  	//      bounds = [f, h), ^ shows block iterator position
   108  	//  file contents [ a z ]
   109  	//                    ^
   110  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
   111  	//  set to +1. SeekGE(j) notices that the iterator is already past j and does
   112  	//  not need to do anything.
   113  	//
   114  	// Similar examples can be constructed for backward iteration.
   115  	//
   116  	// This notion of exactly one key before or after the bounds is not quite
   117  	// true when block properties are used to ignore blocks. In that case we
   118  	// can't stop precisely at the first block that is past the bounds since
   119  	// we are using the index entries to enforce the bounds.
   120  	//
   121  	// e.g. 3 blocks with keys [b, c]  [f, g], [i, j, k] with index entries d,
   122  	// h, l. And let the lower bound be k, and we are reverse iterating. If
   123  	// the block [i, j, k] is ignored due to the block interval annotations we
   124  	// do need to move the index to block [f, g] since the index entry for the
   125  	// [i, j, k] block is l which is not less than the lower bound of k. So we
   126  	// have passed the entries i, j.
   127  	//
   128  	// This behavior is harmless since the block property filters are fixed
   129  	// for the lifetime of the iterator so i, j are irrelevant. In addition,
   130  	// the current code will not load the [f, g] block, so the seek
   131  	// optimization that attempts to use Next/Prev do not apply anyway.
   132  	boundsCmp                   int
   133  	positionedUsingLatestBounds bool
   134  
   135  	// exhaustedBounds represents whether the iterator is exhausted for
   136  	// iteration by reaching the upper or lower bound. +1 when exhausted
   137  	// the upper bound, -1 when exhausted the lower bound, and 0 when
   138  	// neither. exhaustedBounds is also used for the TrySeekUsingNext
   139  	// optimization in twoLevelIterator and singleLevelIterator. Care should be
   140  	// taken in setting this in twoLevelIterator before calling into
   141  	// singleLevelIterator, given that these two iterators share this field.
   142  	exhaustedBounds int8
   143  
   144  	// maybeFilteredKeysSingleLevel indicates whether the last iterator
   145  	// positioning operation may have skipped any data blocks due to
   146  	// block-property filters when positioning the index.
   147  	maybeFilteredKeysSingleLevel bool
   148  
   149  	// useFilter specifies whether the filter block in this sstable, if present,
   150  	// should be used for prefix seeks or not. In some cases it is beneficial
   151  	// to skip a filter block even if it exists (eg. if probability of a match
   152  	// is high).
   153  	useFilter              bool
   154  	lastBloomFilterMatched bool
   155  
   156  	hideObsoletePoints bool
   157  }
   158  
   159  // singleLevelIterator implements the base.InternalIterator interface.
   160  var _ base.InternalIterator = (*singleLevelIterator)(nil)
   161  
   162  // init initializes a singleLevelIterator for reading from the table. It is
   163  // synonmous with Reader.NewIter, but allows for reusing of the iterator
   164  // between different Readers.
   165  //
   166  // Note that lower, upper passed into init has nothing to do with virtual sstable
   167  // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
   168  // will be enforced.
   169  func (i *singleLevelIterator) init(
   170  	ctx context.Context,
   171  	r *Reader,
   172  	v *virtualState,
   173  	lower, upper []byte,
   174  	filterer *BlockPropertiesFilterer,
   175  	useFilter, hideObsoletePoints bool,
   176  	stats *base.InternalIteratorStats,
   177  	rp ReaderProvider,
   178  	bufferPool *BufferPool,
   179  ) error {
   180  	if r.err != nil {
   181  		return r.err
   182  	}
   183  	indexH, err := r.readIndex(ctx, stats)
   184  	if err != nil {
   185  		return err
   186  	}
   187  	if v != nil {
   188  		i.vState = v
   189  		i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
   190  	}
   191  
   192  	i.ctx = ctx
   193  	i.lower = lower
   194  	i.upper = upper
   195  	i.bpfs = filterer
   196  	i.useFilter = useFilter
   197  	i.reader = r
   198  	i.cmp = r.Compare
   199  	i.stats = stats
   200  	i.hideObsoletePoints = hideObsoletePoints
   201  	i.bufferPool = bufferPool
   202  	err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
   203  	if err != nil {
   204  		// blockIter.Close releases indexH and always returns a nil error
   205  		_ = i.index.Close()
   206  		return err
   207  	}
   208  	i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
   209  	if r.tableFormat >= TableFormatPebblev3 {
   210  		if r.Properties.NumValueBlocks > 0 {
   211  			// NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
   212  			// can outlive the singleLevelIterator due to be being embedded in a
   213  			// LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
   214  			// should only optimize this if it shows up as significant in end-to-end
   215  			// CockroachDB benchmarks, since it is tricky to do so. One possibility
   216  			// is that if many sstable iterators only get positioned at latest
   217  			// versions of keys, and therefore never expose a LazyValue that is
   218  			// separated to their callers, they can put this valueBlockReader into a
   219  			// sync.Pool.
   220  			i.vbReader = &valueBlockReader{
   221  				ctx:    ctx,
   222  				bpOpen: i,
   223  				rp:     rp,
   224  				vbih:   r.valueBIH,
   225  				stats:  stats,
   226  			}
   227  			i.data.lazyValueHandling.vbr = i.vbReader
   228  			i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
   229  		}
   230  		i.data.lazyValueHandling.hasValuePrefix = true
   231  	}
   232  	return nil
   233  }
   234  
   235  // Helper function to check if keys returned from iterator are within global and virtual bounds.
   236  func (i *singleLevelIterator) maybeVerifyKey(
   237  	iKey *InternalKey, val base.LazyValue,
   238  ) (*InternalKey, base.LazyValue) {
   239  	// maybeVerify key is only used for virtual sstable iterators.
   240  	if invariants.Enabled && i.vState != nil && iKey != nil {
   241  		key := iKey.UserKey
   242  
   243  		uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
   244  		lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
   245  
   246  		if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
   247  			panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
   248  		}
   249  	}
   250  	return iKey, val
   251  }
   252  
   253  // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
   254  // Currently, it skips readahead ramp-up. It should be called after init is called.
   255  func (i *singleLevelIterator) setupForCompaction() {
   256  	i.dataRH.SetupForCompaction()
   257  	if i.vbRH != nil {
   258  		i.vbRH.SetupForCompaction()
   259  	}
   260  }
   261  
   262  func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
   263  	return singleLevelIterator{
   264  		index: i.index.resetForReuse(),
   265  		data:  i.data.resetForReuse(),
   266  	}
   267  }
   268  
   269  func (i *singleLevelIterator) initBounds() {
   270  	// Trim the iteration bounds for the current block. We don't have to check
   271  	// the bounds on each iteration if the block is entirely contained within the
   272  	// iteration bounds.
   273  	i.blockLower = i.lower
   274  	if i.blockLower != nil {
   275  		key, _ := i.data.First()
   276  		if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
   277  			// The lower-bound is less than the first key in the block. No need
   278  			// to check the lower-bound again for this block.
   279  			i.blockLower = nil
   280  		}
   281  	}
   282  	i.blockUpper = i.upper
   283  	if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
   284  		// The upper-bound is greater than the index key which itself is greater
   285  		// than or equal to every key in the block. No need to check the
   286  		// upper-bound again for this block. Even if blockUpper is inclusive
   287  		// because of upper being inclusive, we can still safely set blockUpper
   288  		// to nil here.
   289  		//
   290  		// TODO(bananabrick): We could also set blockUpper to nil for the >=
   291  		// case, if blockUpper is inclusive.
   292  		i.blockUpper = nil
   293  	}
   294  }
   295  
   296  // Deterministic disabling of the bounds-based optimization that avoids seeking.
   297  // Uses the iterator pointer, since we want diversity in iterator behavior for
   298  // the same SetBounds call. Used for tests.
   299  func disableBoundsOpt(bound []byte, ptr uintptr) bool {
   300  	// Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
   301  	simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
   302  	return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
   303  }
   304  
   305  // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
   306  // optimizations performed by disableBoundsOpt for tests that require
   307  // deterministic iterator behavior. Some unit tests examine internal iterator
   308  // state and require this behavior to be deterministic.
   309  var ensureBoundsOptDeterminism bool
   310  
   311  // SetBounds implements internalIterator.SetBounds, as documented in the pebble
   312  // package. Note that the upper field is exclusive.
   313  func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
   314  	i.boundsCmp = 0
   315  	if i.vState != nil {
   316  		// If the reader is constructed for a virtual sstable, then we must
   317  		// constrain the bounds of the reader. For physical sstables, the bounds
   318  		// can be wider than the actual sstable's bounds because we won't
   319  		// accidentally expose additional keys as there are no additional keys.
   320  		i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
   321  			lower, upper, false,
   322  		)
   323  	} else {
   324  		// TODO(bananabrick): Figure out the logic here to enable the boundsCmp
   325  		// optimization for virtual sstables.
   326  		if i.positionedUsingLatestBounds {
   327  			if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
   328  				i.boundsCmp = +1
   329  				if invariants.Enabled && !ensureBoundsOptDeterminism &&
   330  					disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
   331  					i.boundsCmp = 0
   332  				}
   333  			} else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
   334  				i.boundsCmp = -1
   335  				if invariants.Enabled && !ensureBoundsOptDeterminism &&
   336  					disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
   337  					i.boundsCmp = 0
   338  				}
   339  			}
   340  		}
   341  	}
   342  
   343  	i.positionedUsingLatestBounds = false
   344  	i.lower = lower
   345  	i.upper = upper
   346  	i.blockLower = nil
   347  	i.blockUpper = nil
   348  }
   349  
   350  // loadBlock loads the block at the current index position and leaves i.data
   351  // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
   352  // may be nil if we have simply exhausted the entire table.
   353  func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
   354  	if !i.index.valid() {
   355  		// Ensure the data block iterator is invalidated even if loading of the block
   356  		// fails.
   357  		i.data.invalidate()
   358  		return loadBlockFailed
   359  	}
   360  	// Load the next block.
   361  	v := i.index.value()
   362  	bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
   363  	if i.dataBH == bhp.BlockHandle && i.data.valid() {
   364  		// We're already at the data block we want to load. Reset bounds in case
   365  		// they changed since the last seek, but don't reload the block from cache
   366  		// or disk.
   367  		//
   368  		// It's safe to leave i.data in its original state here, as all callers to
   369  		// loadBlock make an absolute positioning call (i.e. a seek, first, or last)
   370  		// to `i.data` right after loadBlock returns loadBlockOK.
   371  		i.initBounds()
   372  		return loadBlockOK
   373  	}
   374  	// Ensure the data block iterator is invalidated even if loading of the block
   375  	// fails.
   376  	i.data.invalidate()
   377  	i.dataBH = bhp.BlockHandle
   378  	if err != nil {
   379  		i.err = errCorruptIndexEntry
   380  		return loadBlockFailed
   381  	}
   382  	if i.bpfs != nil {
   383  		intersects, err := i.bpfs.intersects(bhp.Props)
   384  		if err != nil {
   385  			i.err = errCorruptIndexEntry
   386  			return loadBlockFailed
   387  		}
   388  		if intersects == blockMaybeExcluded {
   389  			intersects = i.resolveMaybeExcluded(dir)
   390  		}
   391  		if intersects == blockExcluded {
   392  			i.maybeFilteredKeysSingleLevel = true
   393  			return loadBlockIrrelevant
   394  		}
   395  		// blockIntersects
   396  	}
   397  	ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
   398  	block, err := i.reader.readBlock(ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, i.bufferPool)
   399  	if err != nil {
   400  		i.err = err
   401  		return loadBlockFailed
   402  	}
   403  	i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
   404  	if i.err != nil {
   405  		// The block is partially loaded, and we don't want it to appear valid.
   406  		i.data.invalidate()
   407  		return loadBlockFailed
   408  	}
   409  	i.initBounds()
   410  	return loadBlockOK
   411  }
   412  
   413  // readBlockForVBR implements the blockProviderWhenOpen interface for use by
   414  // the valueBlockReader.
   415  func (i *singleLevelIterator) readBlockForVBR(
   416  	ctx context.Context, h BlockHandle, stats *base.InternalIteratorStats,
   417  ) (bufferHandle, error) {
   418  	ctx = objiotracing.WithBlockType(ctx, objiotracing.ValueBlock)
   419  	return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, i.bufferPool)
   420  }
   421  
   422  // resolveMaybeExcluded is invoked when the block-property filterer has found
   423  // that a block is excluded according to its properties but only if its bounds
   424  // fall within the filter's current bounds.  This function consults the
   425  // apprioriate bound, depending on the iteration direction, and returns either
   426  // `blockIntersects` or `blockMaybeExcluded`.
   427  func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
   428  	// TODO(jackson): We could first try comparing to top-level index block's
   429  	// key, and if within bounds avoid per-data block key comparisons.
   430  
   431  	// This iterator is configured with a bound-limited block property
   432  	// filter. The bpf determined this block could be excluded from
   433  	// iteration based on the property encoded in the block handle.
   434  	// However, we still need to determine if the block is wholly
   435  	// contained within the filter's key bounds.
   436  	//
   437  	// External guarantees ensure all the block's keys are ≥ the
   438  	// filter's lower bound during forward iteration, and that all the
   439  	// block's keys are < the filter's upper bound during backward
   440  	// iteration. We only need to determine if the opposite bound is
   441  	// also met.
   442  	//
   443  	// The index separator in index.Key() provides an inclusive
   444  	// upper-bound for the data block's keys, guaranteeing that all its
   445  	// keys are ≤ index.Key(). For forward iteration, this is all we
   446  	// need.
   447  	if dir > 0 {
   448  		// Forward iteration.
   449  		if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
   450  			return blockExcluded
   451  		}
   452  		return blockIntersects
   453  	}
   454  
   455  	// Reverse iteration.
   456  	//
   457  	// Because we're iterating in the reverse direction, we don't yet have
   458  	// enough context available to determine if the block is wholly contained
   459  	// within its bounds. This case arises only during backward iteration,
   460  	// because of the way the index is structured.
   461  	//
   462  	// Consider a bound-limited bpf limited to the bounds [b,d), loading the
   463  	// block with separator `c`. During reverse iteration, the guarantee that
   464  	// all the block's keys are < `d` is externally provided, but no guarantee
   465  	// is made on the bpf's lower bound. The separator `c` only provides an
   466  	// inclusive upper bound on the block's keys, indicating that the
   467  	// corresponding block handle points to a block containing only keys ≤ `c`.
   468  	//
   469  	// To establish a lower bound, we step the index backwards to read the
   470  	// previous block's separator, which provides an inclusive lower bound on
   471  	// the original block's keys. Afterwards, we step forward to restore our
   472  	// index position.
   473  	if peekKey, _ := i.index.Prev(); peekKey == nil {
   474  		// The original block points to the first block of this index block. If
   475  		// there's a two-level index, it could potentially provide a lower
   476  		// bound, but the code refactoring necessary to read it doesn't seem
   477  		// worth the payoff. We fall through to loading the block.
   478  	} else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
   479  		// The lower-bound on the original block falls within the filter's
   480  		// bounds, and we can skip the block (after restoring our current index
   481  		// position).
   482  		_, _ = i.index.Next()
   483  		return blockExcluded
   484  	}
   485  	_, _ = i.index.Next()
   486  	return blockIntersects
   487  }
   488  
   489  func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
   490  	if i.data.getFirstUserKey() == nil {
   491  		panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
   492  	}
   493  	i.blockLower = i.lower
   494  	if i.blockLower != nil {
   495  		firstUserKey := i.data.getFirstUserKey()
   496  		if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
   497  			// The lower-bound is less than the first key in the block. No need
   498  			// to check the lower-bound again for this block.
   499  			i.blockLower = nil
   500  		}
   501  	}
   502  	i.blockUpper = i.upper
   503  	if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
   504  		// The upper-bound is greater than the index key which itself is greater
   505  		// than or equal to every key in the block. No need to check the
   506  		// upper-bound again for this block.
   507  		i.blockUpper = nil
   508  	}
   509  }
   510  
   511  // The number of times to call Next/Prev in a block before giving up and seeking.
   512  // The value of 4 is arbitrary.
   513  // TODO(sumeer): experiment with dynamic adjustment based on the history of
   514  // seeks for a particular iterator.
   515  const numStepsBeforeSeek = 4
   516  
   517  func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
   518  	key []byte,
   519  ) (k *InternalKey, v base.LazyValue, done bool) {
   520  	k, v = i.data.Key(), i.data.value()
   521  	for j := 0; j < numStepsBeforeSeek; j++ {
   522  		curKeyCmp := i.cmp(k.UserKey, key)
   523  		if curKeyCmp >= 0 {
   524  			if i.blockUpper != nil {
   525  				cmp := i.cmp(k.UserKey, i.blockUpper)
   526  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   527  					i.exhaustedBounds = +1
   528  					return nil, base.LazyValue{}, true
   529  				}
   530  			}
   531  			return k, v, true
   532  		}
   533  		k, v = i.data.Next()
   534  		if k == nil {
   535  			break
   536  		}
   537  	}
   538  	return k, v, false
   539  }
   540  
   541  func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
   542  	key []byte,
   543  ) (k *InternalKey, v base.LazyValue, done bool) {
   544  	k, v = i.data.Key(), i.data.value()
   545  	for j := 0; j < numStepsBeforeSeek; j++ {
   546  		curKeyCmp := i.cmp(k.UserKey, key)
   547  		if curKeyCmp < 0 {
   548  			if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
   549  				i.exhaustedBounds = -1
   550  				return nil, base.LazyValue{}, true
   551  			}
   552  			return k, v, true
   553  		}
   554  		k, v = i.data.Prev()
   555  		if k == nil {
   556  			break
   557  		}
   558  	}
   559  	return k, v, false
   560  }
   561  
   562  func (i *singleLevelIterator) recordOffset() uint64 {
   563  	offset := i.dataBH.Offset
   564  	if i.data.valid() {
   565  		// - i.dataBH.Length/len(i.data.data) is the compression ratio. If
   566  		//   uncompressed, this is 1.
   567  		// - i.data.nextOffset is the uncompressed position of the current record
   568  		//   in the block.
   569  		// - i.dataBH.Offset is the offset of the block in the sstable before
   570  		//   decompression.
   571  		offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
   572  	} else {
   573  		// Last entry in the block must increment bytes iterated by the size of the block trailer
   574  		// and restart points.
   575  		offset += i.dataBH.Length + blockTrailerLen
   576  	}
   577  	return offset
   578  }
   579  
   580  // SeekGE implements internalIterator.SeekGE, as documented in the pebble
   581  // package. Note that SeekGE only checks the upper bound. It is up to the
   582  // caller to ensure that key is greater than or equal to the lower bound.
   583  func (i *singleLevelIterator) SeekGE(
   584  	key []byte, flags base.SeekGEFlags,
   585  ) (*InternalKey, base.LazyValue) {
   586  	if i.vState != nil {
   587  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   588  		// have to internally restrict the bounds.
   589  		//
   590  		// TODO(bananabrick): We can optimize this check away for the level iter
   591  		// if necessary.
   592  		if i.cmp(key, i.lower) < 0 {
   593  			key = i.lower
   594  		}
   595  	}
   596  
   597  	if flags.TrySeekUsingNext() {
   598  		// The i.exhaustedBounds comparison indicates that the upper bound was
   599  		// reached. The i.data.isDataInvalidated() indicates that the sstable was
   600  		// exhausted.
   601  		if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
   602  			// Already exhausted, so return nil.
   603  			return nil, base.LazyValue{}
   604  		}
   605  		if i.err != nil {
   606  			// The current iterator position cannot be used.
   607  			flags = flags.DisableTrySeekUsingNext()
   608  		}
   609  		// INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
   610  		// !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
   611  		// data-exhausted and bounds-exhausted, as defined earlier, are both
   612  		// false. Ths makes it safe to clear out i.exhaustedBounds and i.err
   613  		// before calling into seekGEHelper.
   614  	}
   615  
   616  	i.exhaustedBounds = 0
   617  	i.err = nil // clear cached iteration error
   618  	boundsCmp := i.boundsCmp
   619  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   620  	i.boundsCmp = 0
   621  	i.positionedUsingLatestBounds = true
   622  	return i.seekGEHelper(key, boundsCmp, flags)
   623  }
   624  
   625  // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
   626  func (i *singleLevelIterator) seekGEHelper(
   627  	key []byte, boundsCmp int, flags base.SeekGEFlags,
   628  ) (*InternalKey, base.LazyValue) {
   629  	// Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
   630  
   631  	// SeekGE performs various step-instead-of-seeking optimizations: eg enabled
   632  	// by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
   633  	// Care must be taken to ensure that when performing these optimizations and
   634  	// the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
   635  	// Consider a previous SeekGE that filtered keys from k until the current
   636  	// iterator position.
   637  	//
   638  	// If the previous SeekGE exhausted the iterator, it's possible keys greater
   639  	// than or equal to the current search key were filtered. We must not reuse
   640  	// the current iterator position without remembering the previous value of
   641  	// maybeFilteredKeys.
   642  
   643  	var dontSeekWithinBlock bool
   644  	if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
   645  		boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
   646  		// Fast-path: The bounds have moved forward and this SeekGE is
   647  		// respecting the lower bound (guaranteed by Iterator). We know that
   648  		// the iterator must already be positioned within or just outside the
   649  		// previous bounds. Therefore it cannot be positioned at a block (or
   650  		// the position within that block) that is ahead of the seek position.
   651  		// However it can be positioned at an earlier block. This fast-path to
   652  		// use Next() on the block is only applied when we are already at the
   653  		// block that the slow-path (the else-clause) would load -- this is
   654  		// the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
   655  		// predicate.
   656  		i.initBoundsForAlreadyLoadedBlock()
   657  		ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
   658  		if done {
   659  			return ikey, val
   660  		}
   661  		if ikey == nil {
   662  			// Done with this block.
   663  			dontSeekWithinBlock = true
   664  		}
   665  	} else {
   666  		// Cannot use bounds monotonicity. But may be able to optimize if
   667  		// caller claimed externally known invariant represented by
   668  		// flags.TrySeekUsingNext().
   669  		if flags.TrySeekUsingNext() {
   670  			// seekPrefixGE or SeekGE has already ensured
   671  			// !i.data.isDataInvalidated() && i.exhaustedBounds != +1
   672  			currKey := i.data.Key()
   673  			value := i.data.value()
   674  			less := i.cmp(currKey.UserKey, key) < 0
   675  			// We could be more sophisticated and confirm that the seek
   676  			// position is within the current block before applying this
   677  			// optimization. But there may be some benefit even if it is in
   678  			// the next block, since we can avoid seeking i.index.
   679  			for j := 0; less && j < numStepsBeforeSeek; j++ {
   680  				currKey, value = i.Next()
   681  				if currKey == nil {
   682  					return nil, base.LazyValue{}
   683  				}
   684  				less = i.cmp(currKey.UserKey, key) < 0
   685  			}
   686  			if !less {
   687  				if i.blockUpper != nil {
   688  					cmp := i.cmp(currKey.UserKey, i.blockUpper)
   689  					if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   690  						i.exhaustedBounds = +1
   691  						return nil, base.LazyValue{}
   692  					}
   693  				}
   694  				return currKey, value
   695  			}
   696  		}
   697  
   698  		// Slow-path.
   699  		// Since we're re-seeking the iterator, the previous value of
   700  		// maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
   701  		// during seeking, loadBlock will set it to true.
   702  		i.maybeFilteredKeysSingleLevel = false
   703  
   704  		var ikey *InternalKey
   705  		if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
   706  			// The target key is greater than any key in the index block.
   707  			// Invalidate the block iterator so that a subsequent call to Prev()
   708  			// will return the last key in the table.
   709  			i.data.invalidate()
   710  			return nil, base.LazyValue{}
   711  		}
   712  		result := i.loadBlock(+1)
   713  		if result == loadBlockFailed {
   714  			return nil, base.LazyValue{}
   715  		}
   716  		if result == loadBlockIrrelevant {
   717  			// Enforce the upper bound here since don't want to bother moving
   718  			// to the next block if upper bound is already exceeded. Note that
   719  			// the next block starts with keys >= ikey.UserKey since even
   720  			// though this is the block separator, the same user key can span
   721  			// multiple blocks. If upper is exclusive we use >= below, else
   722  			// we use >.
   723  			if i.upper != nil {
   724  				cmp := i.cmp(ikey.UserKey, i.upper)
   725  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   726  					i.exhaustedBounds = +1
   727  					return nil, base.LazyValue{}
   728  				}
   729  			}
   730  			// Want to skip to the next block.
   731  			dontSeekWithinBlock = true
   732  		}
   733  	}
   734  	if !dontSeekWithinBlock {
   735  		if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
   736  			if i.blockUpper != nil {
   737  				cmp := i.cmp(ikey.UserKey, i.blockUpper)
   738  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   739  					i.exhaustedBounds = +1
   740  					return nil, base.LazyValue{}
   741  				}
   742  			}
   743  			return ikey, val
   744  		}
   745  	}
   746  	return i.skipForward()
   747  }
   748  
   749  // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
   750  // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
   751  // to the caller to ensure that key is greater than or equal to the lower bound.
   752  func (i *singleLevelIterator) SeekPrefixGE(
   753  	prefix, key []byte, flags base.SeekGEFlags,
   754  ) (*base.InternalKey, base.LazyValue) {
   755  	if i.vState != nil {
   756  		// Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
   757  		// we may have to internally restrict the bounds.
   758  		//
   759  		// TODO(bananabrick): We can optimize away this check for the level iter
   760  		// if necessary.
   761  		if i.cmp(key, i.lower) < 0 {
   762  			key = i.lower
   763  		}
   764  	}
   765  	return i.seekPrefixGE(prefix, key, flags, i.useFilter)
   766  }
   767  
   768  func (i *singleLevelIterator) seekPrefixGE(
   769  	prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
   770  ) (k *InternalKey, value base.LazyValue) {
   771  	// NOTE: prefix is only used for bloom filter checking and not later work in
   772  	// this method. Hence, we can use the existing iterator position if the last
   773  	// SeekPrefixGE did not fail bloom filter matching.
   774  
   775  	err := i.err
   776  	i.err = nil // clear cached iteration error
   777  	if checkFilter && i.reader.tableFilter != nil {
   778  		if !i.lastBloomFilterMatched {
   779  			// Iterator is not positioned based on last seek.
   780  			flags = flags.DisableTrySeekUsingNext()
   781  		}
   782  		i.lastBloomFilterMatched = false
   783  		// Check prefix bloom filter.
   784  		var dataH bufferHandle
   785  		dataH, i.err = i.reader.readFilter(i.ctx, i.stats)
   786  		if i.err != nil {
   787  			i.data.invalidate()
   788  			return nil, base.LazyValue{}
   789  		}
   790  		mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
   791  		dataH.Release()
   792  		if !mayContain {
   793  			// This invalidation may not be necessary for correctness, and may
   794  			// be a place to optimize later by reusing the already loaded
   795  			// block. It was necessary in earlier versions of the code since
   796  			// the caller was allowed to call Next when SeekPrefixGE returned
   797  			// nil. This is no longer allowed.
   798  			i.data.invalidate()
   799  			return nil, base.LazyValue{}
   800  		}
   801  		i.lastBloomFilterMatched = true
   802  	}
   803  	if flags.TrySeekUsingNext() {
   804  		// The i.exhaustedBounds comparison indicates that the upper bound was
   805  		// reached. The i.data.isDataInvalidated() indicates that the sstable was
   806  		// exhausted.
   807  		if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
   808  			// Already exhausted, so return nil.
   809  			return nil, base.LazyValue{}
   810  		}
   811  		if err != nil {
   812  			// The current iterator position cannot be used.
   813  			flags = flags.DisableTrySeekUsingNext()
   814  		}
   815  		// INVARIANT: flags.TrySeekUsingNext() => err == nil &&
   816  		// !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
   817  		// data-exhausted and bounds-exhausted, as defined earlier, are both
   818  		// false. Ths makes it safe to clear out i.exhaustedBounds and i.err
   819  		// before calling into seekGEHelper.
   820  	}
   821  	// Bloom filter matches, or skipped, so this method will position the
   822  	// iterator.
   823  	i.exhaustedBounds = 0
   824  	boundsCmp := i.boundsCmp
   825  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   826  	i.boundsCmp = 0
   827  	i.positionedUsingLatestBounds = true
   828  	k, value = i.seekGEHelper(key, boundsCmp, flags)
   829  	return i.maybeVerifyKey(k, value)
   830  }
   831  
   832  // virtualLast should only be called if i.vReader != nil.
   833  func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
   834  	if i.vState == nil {
   835  		panic("pebble: invalid call to virtualLast")
   836  	}
   837  
   838  	// Seek to the first internal key.
   839  	ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
   840  	if i.endKeyInclusive {
   841  		// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
   842  		// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
   843  		// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
   844  		// the last key with userkey == i.upper.
   845  		//
   846  		// TODO(bananabrick): Think about how to improve this. If many internal keys
   847  		// with the same user key at the upper bound then this could be slow, but
   848  		// maybe the odds of having many internal keys with the same user key at the
   849  		// upper bound are low.
   850  		for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
   851  			ikey, _ = i.Next()
   852  		}
   853  		return i.Prev()
   854  	}
   855  
   856  	// We seeked to the first key >= i.upper.
   857  	return i.Prev()
   858  }
   859  
   860  // SeekLT implements internalIterator.SeekLT, as documented in the pebble
   861  // package. Note that SeekLT only checks the lower bound. It is up to the
   862  // caller to ensure that key is less than or equal to the upper bound.
   863  func (i *singleLevelIterator) SeekLT(
   864  	key []byte, flags base.SeekLTFlags,
   865  ) (*InternalKey, base.LazyValue) {
   866  	if i.vState != nil {
   867  		// Might have to fix upper bound since virtual sstable bounds are not
   868  		// known to callers of SeekLT.
   869  		//
   870  		// TODO(bananabrick): We can optimize away this check for the level iter
   871  		// if necessary.
   872  		cmp := i.cmp(key, i.upper)
   873  		// key == i.upper is fine. We'll do the right thing and return the
   874  		// first internal key with user key < key.
   875  		if cmp > 0 {
   876  			// Return the last key in the virtual sstable.
   877  			return i.virtualLast()
   878  		}
   879  	}
   880  
   881  	i.exhaustedBounds = 0
   882  	i.err = nil // clear cached iteration error
   883  	boundsCmp := i.boundsCmp
   884  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   885  	i.boundsCmp = 0
   886  
   887  	// Seeking operations perform various step-instead-of-seeking optimizations:
   888  	// eg by considering monotonically increasing bounds (i.boundsCmp). Care
   889  	// must be taken to ensure that when performing these optimizations and the
   890  	// iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
   891  	// appropriately.  Consider a previous SeekLT that filtered keys from k
   892  	// until the current iterator position.
   893  	//
   894  	// If the previous SeekLT did exhausted the iterator, it's possible keys
   895  	// less than the current search key were filtered. We must not reuse the
   896  	// current iterator position without remembering the previous value of
   897  	// maybeFilteredKeysSingleLevel.
   898  
   899  	i.positionedUsingLatestBounds = true
   900  
   901  	var dontSeekWithinBlock bool
   902  	if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
   903  		boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
   904  		// Fast-path: The bounds have moved backward, and this SeekLT is
   905  		// respecting the upper bound (guaranteed by Iterator). We know that
   906  		// the iterator must already be positioned within or just outside the
   907  		// previous bounds. Therefore it cannot be positioned at a block (or
   908  		// the position within that block) that is behind the seek position.
   909  		// However it can be positioned at a later block. This fast-path to
   910  		// use Prev() on the block is only applied when we are already at the
   911  		// block that can satisfy this seek -- this is the motivation for the
   912  		// the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
   913  		i.initBoundsForAlreadyLoadedBlock()
   914  		ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
   915  		if done {
   916  			return ikey, val
   917  		}
   918  		if ikey == nil {
   919  			// Done with this block.
   920  			dontSeekWithinBlock = true
   921  		}
   922  	} else {
   923  		// Slow-path.
   924  		i.maybeFilteredKeysSingleLevel = false
   925  		var ikey *InternalKey
   926  
   927  		// NB: If a bound-limited block property filter is configured, it's
   928  		// externally ensured that the filter is disabled (through returning
   929  		// Intersects=false irrespective of the block props provided) during
   930  		// seeks.
   931  		if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
   932  			ikey, _ = i.index.Last()
   933  			if ikey == nil {
   934  				return nil, base.LazyValue{}
   935  			}
   936  		}
   937  		// INVARIANT: ikey != nil.
   938  		result := i.loadBlock(-1)
   939  		if result == loadBlockFailed {
   940  			return nil, base.LazyValue{}
   941  		}
   942  		if result == loadBlockIrrelevant {
   943  			// Enforce the lower bound here since don't want to bother moving
   944  			// to the previous block if lower bound is already exceeded. Note
   945  			// that the previous block starts with keys <= ikey.UserKey since
   946  			// even though this is the current block's separator, the same
   947  			// user key can span multiple blocks.
   948  			if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   949  				i.exhaustedBounds = -1
   950  				return nil, base.LazyValue{}
   951  			}
   952  			// Want to skip to the previous block.
   953  			dontSeekWithinBlock = true
   954  		}
   955  	}
   956  	if !dontSeekWithinBlock {
   957  		if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
   958  			if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
   959  				i.exhaustedBounds = -1
   960  				return nil, base.LazyValue{}
   961  			}
   962  			return ikey, val
   963  		}
   964  	}
   965  	// The index contains separator keys which may lie between
   966  	// user-keys. Consider the user-keys:
   967  	//
   968  	//   complete
   969  	// ---- new block ---
   970  	//   complexion
   971  	//
   972  	// If these two keys end one block and start the next, the index key may
   973  	// be chosen as "compleu". The SeekGE in the index block will then point
   974  	// us to the block containing "complexion". If this happens, we want the
   975  	// last key from the previous data block.
   976  	return i.maybeVerifyKey(i.skipBackward())
   977  }
   978  
   979  // First implements internalIterator.First, as documented in the pebble
   980  // package. Note that First only checks the upper bound. It is up to the caller
   981  // to ensure that key is greater than or equal to the lower bound (e.g. via a
   982  // call to SeekGE(lower)).
   983  func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
   984  	// If the iterator was created on a virtual sstable, we will SeekGE to the
   985  	// lower bound instead of using First, because First does not respect
   986  	// bounds.
   987  	if i.vState != nil {
   988  		return i.SeekGE(i.lower, base.SeekGEFlagsNone)
   989  	}
   990  
   991  	if i.lower != nil {
   992  		panic("singleLevelIterator.First() used despite lower bound")
   993  	}
   994  	i.positionedUsingLatestBounds = true
   995  	i.maybeFilteredKeysSingleLevel = false
   996  
   997  	return i.firstInternal()
   998  }
   999  
  1000  // firstInternal is a helper used for absolute positioning in a single-level
  1001  // index file, or for positioning in the second-level index in a two-level
  1002  // index file. For the latter, one cannot make any claims about absolute
  1003  // positioning.
  1004  func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
  1005  	i.exhaustedBounds = 0
  1006  	i.err = nil // clear cached iteration error
  1007  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1008  	i.boundsCmp = 0
  1009  
  1010  	var ikey *InternalKey
  1011  	if ikey, _ = i.index.First(); ikey == nil {
  1012  		i.data.invalidate()
  1013  		return nil, base.LazyValue{}
  1014  	}
  1015  	result := i.loadBlock(+1)
  1016  	if result == loadBlockFailed {
  1017  		return nil, base.LazyValue{}
  1018  	}
  1019  	if result == loadBlockOK {
  1020  		if ikey, val := i.data.First(); ikey != nil {
  1021  			if i.blockUpper != nil {
  1022  				cmp := i.cmp(ikey.UserKey, i.blockUpper)
  1023  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1024  					i.exhaustedBounds = +1
  1025  					return nil, base.LazyValue{}
  1026  				}
  1027  			}
  1028  			return ikey, val
  1029  		}
  1030  		// Else fall through to skipForward.
  1031  	} else {
  1032  		// result == loadBlockIrrelevant. Enforce the upper bound here since
  1033  		// don't want to bother moving to the next block if upper bound is
  1034  		// already exceeded. Note that the next block starts with keys >=
  1035  		// ikey.UserKey since even though this is the block separator, the
  1036  		// same user key can span multiple blocks. If upper is exclusive we
  1037  		// use >= below, else we use >.
  1038  		if i.upper != nil {
  1039  			cmp := i.cmp(ikey.UserKey, i.upper)
  1040  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1041  				i.exhaustedBounds = +1
  1042  				return nil, base.LazyValue{}
  1043  			}
  1044  		}
  1045  		// Else fall through to skipForward.
  1046  	}
  1047  
  1048  	return i.skipForward()
  1049  }
  1050  
  1051  // Last implements internalIterator.Last, as documented in the pebble
  1052  // package. Note that Last only checks the lower bound. It is up to the caller
  1053  // to ensure that key is less than the upper bound (e.g. via a call to
  1054  // SeekLT(upper))
  1055  func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
  1056  	if i.vState != nil {
  1057  		return i.virtualLast()
  1058  	}
  1059  
  1060  	if i.upper != nil {
  1061  		panic("singleLevelIterator.Last() used despite upper bound")
  1062  	}
  1063  	i.positionedUsingLatestBounds = true
  1064  	i.maybeFilteredKeysSingleLevel = false
  1065  	return i.lastInternal()
  1066  }
  1067  
  1068  // lastInternal is a helper used for absolute positioning in a single-level
  1069  // index file, or for positioning in the second-level index in a two-level
  1070  // index file. For the latter, one cannot make any claims about absolute
  1071  // positioning.
  1072  func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
  1073  	i.exhaustedBounds = 0
  1074  	i.err = nil // clear cached iteration error
  1075  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1076  	i.boundsCmp = 0
  1077  
  1078  	var ikey *InternalKey
  1079  	if ikey, _ = i.index.Last(); ikey == nil {
  1080  		i.data.invalidate()
  1081  		return nil, base.LazyValue{}
  1082  	}
  1083  	result := i.loadBlock(-1)
  1084  	if result == loadBlockFailed {
  1085  		return nil, base.LazyValue{}
  1086  	}
  1087  	if result == loadBlockOK {
  1088  		if ikey, val := i.data.Last(); ikey != nil {
  1089  			if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
  1090  				i.exhaustedBounds = -1
  1091  				return nil, base.LazyValue{}
  1092  			}
  1093  			return ikey, val
  1094  		}
  1095  		// Else fall through to skipBackward.
  1096  	} else {
  1097  		// result == loadBlockIrrelevant. Enforce the lower bound here since
  1098  		// don't want to bother moving to the previous block if lower bound is
  1099  		// already exceeded. Note that the previous block starts with keys <=
  1100  		// key.UserKey since even though this is the current block's
  1101  		// separator, the same user key can span multiple blocks.
  1102  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
  1103  			i.exhaustedBounds = -1
  1104  			return nil, base.LazyValue{}
  1105  		}
  1106  	}
  1107  
  1108  	return i.skipBackward()
  1109  }
  1110  
  1111  // Next implements internalIterator.Next, as documented in the pebble
  1112  // package.
  1113  // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
  1114  // due to performance. Keep the two in sync.
  1115  func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
  1116  	if i.exhaustedBounds == +1 {
  1117  		panic("Next called even though exhausted upper bound")
  1118  	}
  1119  	i.exhaustedBounds = 0
  1120  	i.maybeFilteredKeysSingleLevel = false
  1121  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1122  	i.boundsCmp = 0
  1123  
  1124  	if i.err != nil {
  1125  		return nil, base.LazyValue{}
  1126  	}
  1127  	if key, val := i.data.Next(); key != nil {
  1128  		if i.blockUpper != nil {
  1129  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1130  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1131  				i.exhaustedBounds = +1
  1132  				return nil, base.LazyValue{}
  1133  			}
  1134  		}
  1135  		return key, val
  1136  	}
  1137  	return i.skipForward()
  1138  }
  1139  
  1140  // NextPrefix implements (base.InternalIterator).NextPrefix.
  1141  func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
  1142  	if i.exhaustedBounds == +1 {
  1143  		panic("NextPrefix called even though exhausted upper bound")
  1144  	}
  1145  	i.exhaustedBounds = 0
  1146  	i.maybeFilteredKeysSingleLevel = false
  1147  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1148  	i.boundsCmp = 0
  1149  	if i.err != nil {
  1150  		return nil, base.LazyValue{}
  1151  	}
  1152  	if key, val := i.data.NextPrefix(succKey); key != nil {
  1153  		if i.blockUpper != nil {
  1154  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1155  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1156  				i.exhaustedBounds = +1
  1157  				return nil, base.LazyValue{}
  1158  			}
  1159  		}
  1160  		return key, val
  1161  	}
  1162  	// Did not find prefix in the existing data block. This is the slow-path
  1163  	// where we effectively seek the iterator.
  1164  	var ikey *InternalKey
  1165  	// The key is likely to be in the next data block, so try one step.
  1166  	if ikey, _ = i.index.Next(); ikey == nil {
  1167  		// The target key is greater than any key in the index block.
  1168  		// Invalidate the block iterator so that a subsequent call to Prev()
  1169  		// will return the last key in the table.
  1170  		i.data.invalidate()
  1171  		return nil, base.LazyValue{}
  1172  	}
  1173  	if i.cmp(succKey, ikey.UserKey) > 0 {
  1174  		// Not in the next data block, so seek the index.
  1175  		if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
  1176  			// The target key is greater than any key in the index block.
  1177  			// Invalidate the block iterator so that a subsequent call to Prev()
  1178  			// will return the last key in the table.
  1179  			i.data.invalidate()
  1180  			return nil, base.LazyValue{}
  1181  		}
  1182  	}
  1183  	result := i.loadBlock(+1)
  1184  	if result == loadBlockFailed {
  1185  		return nil, base.LazyValue{}
  1186  	}
  1187  	if result == loadBlockIrrelevant {
  1188  		// Enforce the upper bound here since don't want to bother moving
  1189  		// to the next block if upper bound is already exceeded. Note that
  1190  		// the next block starts with keys >= ikey.UserKey since even
  1191  		// though this is the block separator, the same user key can span
  1192  		// multiple blocks. If upper is exclusive we use >= below, else we use
  1193  		// >.
  1194  		if i.upper != nil {
  1195  			cmp := i.cmp(ikey.UserKey, i.upper)
  1196  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1197  				i.exhaustedBounds = +1
  1198  				return nil, base.LazyValue{}
  1199  			}
  1200  		}
  1201  	} else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
  1202  		if i.blockUpper != nil {
  1203  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1204  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1205  				i.exhaustedBounds = +1
  1206  				return nil, base.LazyValue{}
  1207  			}
  1208  		}
  1209  		return i.maybeVerifyKey(key, val)
  1210  	}
  1211  
  1212  	return i.skipForward()
  1213  }
  1214  
  1215  // Prev implements internalIterator.Prev, as documented in the pebble
  1216  // package.
  1217  func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
  1218  	if i.exhaustedBounds == -1 {
  1219  		panic("Prev called even though exhausted lower bound")
  1220  	}
  1221  	i.exhaustedBounds = 0
  1222  	i.maybeFilteredKeysSingleLevel = false
  1223  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1224  	i.boundsCmp = 0
  1225  
  1226  	if i.err != nil {
  1227  		return nil, base.LazyValue{}
  1228  	}
  1229  	if key, val := i.data.Prev(); key != nil {
  1230  		if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
  1231  			i.exhaustedBounds = -1
  1232  			return nil, base.LazyValue{}
  1233  		}
  1234  		return key, val
  1235  	}
  1236  	return i.skipBackward()
  1237  }
  1238  
  1239  func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
  1240  	for {
  1241  		var key *InternalKey
  1242  		if key, _ = i.index.Next(); key == nil {
  1243  			i.data.invalidate()
  1244  			break
  1245  		}
  1246  		result := i.loadBlock(+1)
  1247  		if result != loadBlockOK {
  1248  			if i.err != nil {
  1249  				break
  1250  			}
  1251  			if result == loadBlockFailed {
  1252  				// We checked that i.index was at a valid entry, so
  1253  				// loadBlockFailed could not have happened due to to i.index
  1254  				// being exhausted, and must be due to an error.
  1255  				panic("loadBlock should not have failed with no error")
  1256  			}
  1257  			// result == loadBlockIrrelevant. Enforce the upper bound here
  1258  			// since don't want to bother moving to the next block if upper
  1259  			// bound is already exceeded. Note that the next block starts with
  1260  			// keys >= key.UserKey since even though this is the block
  1261  			// separator, the same user key can span multiple blocks. If upper
  1262  			// is exclusive we use >= below, else we use >.
  1263  			if i.upper != nil {
  1264  				cmp := i.cmp(key.UserKey, i.upper)
  1265  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1266  					i.exhaustedBounds = +1
  1267  					return nil, base.LazyValue{}
  1268  				}
  1269  			}
  1270  			continue
  1271  		}
  1272  		if key, val := i.data.First(); key != nil {
  1273  			if i.blockUpper != nil {
  1274  				cmp := i.cmp(key.UserKey, i.blockUpper)
  1275  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1276  					i.exhaustedBounds = +1
  1277  					return nil, base.LazyValue{}
  1278  				}
  1279  			}
  1280  			return i.maybeVerifyKey(key, val)
  1281  		}
  1282  	}
  1283  	return nil, base.LazyValue{}
  1284  }
  1285  
  1286  func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
  1287  	for {
  1288  		var key *InternalKey
  1289  		if key, _ = i.index.Prev(); key == nil {
  1290  			i.data.invalidate()
  1291  			break
  1292  		}
  1293  		result := i.loadBlock(-1)
  1294  		if result != loadBlockOK {
  1295  			if i.err != nil {
  1296  				break
  1297  			}
  1298  			if result == loadBlockFailed {
  1299  				// We checked that i.index was at a valid entry, so
  1300  				// loadBlockFailed could not have happened due to to i.index
  1301  				// being exhausted, and must be due to an error.
  1302  				panic("loadBlock should not have failed with no error")
  1303  			}
  1304  			// result == loadBlockIrrelevant. Enforce the lower bound here
  1305  			// since don't want to bother moving to the previous block if lower
  1306  			// bound is already exceeded. Note that the previous block starts with
  1307  			// keys <= key.UserKey since even though this is the current block's
  1308  			// separator, the same user key can span multiple blocks.
  1309  			if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
  1310  				i.exhaustedBounds = -1
  1311  				return nil, base.LazyValue{}
  1312  			}
  1313  			continue
  1314  		}
  1315  		key, val := i.data.Last()
  1316  		if key == nil {
  1317  			return nil, base.LazyValue{}
  1318  		}
  1319  		if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
  1320  			i.exhaustedBounds = -1
  1321  			return nil, base.LazyValue{}
  1322  		}
  1323  		return i.maybeVerifyKey(key, val)
  1324  	}
  1325  	return nil, base.LazyValue{}
  1326  }
  1327  
  1328  // Error implements internalIterator.Error, as documented in the pebble
  1329  // package.
  1330  func (i *singleLevelIterator) Error() error {
  1331  	if err := i.data.Error(); err != nil {
  1332  		return err
  1333  	}
  1334  	return i.err
  1335  }
  1336  
  1337  // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
  1338  // whether or not the last positioning method may have skipped any keys due to
  1339  // block-property filters.
  1340  func (i *singleLevelIterator) MaybeFilteredKeys() bool {
  1341  	return i.maybeFilteredKeysSingleLevel
  1342  }
  1343  
  1344  // SetCloseHook sets a function that will be called when the iterator is
  1345  // closed.
  1346  func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
  1347  	i.closeHook = fn
  1348  }
  1349  
  1350  func firstError(err0, err1 error) error {
  1351  	if err0 != nil {
  1352  		return err0
  1353  	}
  1354  	return err1
  1355  }
  1356  
  1357  // Close implements internalIterator.Close, as documented in the pebble
  1358  // package.
  1359  func (i *singleLevelIterator) Close() error {
  1360  	var err error
  1361  	if i.closeHook != nil {
  1362  		err = firstError(err, i.closeHook(i))
  1363  	}
  1364  	err = firstError(err, i.data.Close())
  1365  	err = firstError(err, i.index.Close())
  1366  	if i.dataRH != nil {
  1367  		err = firstError(err, i.dataRH.Close())
  1368  		i.dataRH = nil
  1369  	}
  1370  	err = firstError(err, i.err)
  1371  	if i.bpfs != nil {
  1372  		releaseBlockPropertiesFilterer(i.bpfs)
  1373  	}
  1374  	if i.vbReader != nil {
  1375  		i.vbReader.close()
  1376  	}
  1377  	if i.vbRH != nil {
  1378  		err = firstError(err, i.vbRH.Close())
  1379  		i.vbRH = nil
  1380  	}
  1381  	*i = i.resetForReuse()
  1382  	singleLevelIterPool.Put(i)
  1383  	return err
  1384  }
  1385  
  1386  func (i *singleLevelIterator) String() string {
  1387  	if i.vState != nil {
  1388  		return i.vState.fileNum.String()
  1389  	}
  1390  	return i.reader.fileNum.String()
  1391  }