github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/reader_iter_single_lvl.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"unsafe"
    11  
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/cockroachdb/pebble/internal/invariants"
    14  	"github.com/cockroachdb/pebble/objstorage"
    15  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    16  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
    17  )
    18  
    19  // singleLevelIterator iterates over an entire table of data. To seek for a given
    20  // key, it first looks in the index for the block that contains that key, and then
    21  // looks inside that block.
    22  type singleLevelIterator struct {
    23  	ctx context.Context
    24  	cmp Compare
    25  	// Global lower/upper bound for the iterator.
    26  	lower []byte
    27  	upper []byte
    28  	bpfs  *BlockPropertiesFilterer
    29  	// Per-block lower/upper bound. Nil if the bound does not apply to the block
    30  	// because we determined the block lies completely within the bound.
    31  	blockLower []byte
    32  	blockUpper []byte
    33  	reader     *Reader
    34  	// vState will be set iff the iterator is constructed for virtual sstable
    35  	// iteration.
    36  	vState *virtualState
    37  	// endKeyInclusive is set to force the iterator to treat the upper field as
    38  	// inclusive while iterating instead of exclusive.
    39  	endKeyInclusive bool
    40  	index           blockIter
    41  	data            blockIter
    42  	dataRH          objstorage.ReadHandle
    43  	dataRHPrealloc  objstorageprovider.PreallocatedReadHandle
    44  	// dataBH refers to the last data block that the iterator considered
    45  	// loading. It may not actually have loaded the block, due to an error or
    46  	// because it was considered irrelevant.
    47  	dataBH   BlockHandle
    48  	vbReader *valueBlockReader
    49  	// vbRH is the read handle for value blocks, which are in a different
    50  	// part of the sstable than data blocks.
    51  	vbRH         objstorage.ReadHandle
    52  	vbRHPrealloc objstorageprovider.PreallocatedReadHandle
    53  	err          error
    54  	closeHook    func(i Iterator) error
    55  	// stats and iterStats are slightly different. stats is a shared struct
    56  	// supplied from the outside, and represents stats for the whole iterator
    57  	// tree and can be reset from the outside (e.g. when the pebble.Iterator is
    58  	// being reused). It is currently only provided when the iterator tree is
    59  	// rooted at pebble.Iterator. iterStats is this sstable iterator's private
    60  	// stats that are reported to a CategoryStatsCollector when this iterator is
    61  	// closed. More paths are instrumented with this as the
    62  	// CategoryStatsCollector needed for this is provided by the
    63  	// tableCacheContainer (which is more universally used).
    64  	stats      *base.InternalIteratorStats
    65  	iterStats  iterStatsAccumulator
    66  	bufferPool *BufferPool
    67  
    68  	// boundsCmp and positionedUsingLatestBounds are for optimizing iteration
    69  	// that uses multiple adjacent bounds. The seek after setting a new bound
    70  	// can use the fact that the iterator is either within the previous bounds
    71  	// or exactly one key before or after the bounds. If the new bounds is
    72  	// after/before the previous bounds, and we are already positioned at a
    73  	// block that is relevant for the new bounds, we can try to first position
    74  	// using Next/Prev (repeatedly) instead of doing a more expensive seek.
    75  	//
    76  	// When there are wide files at higher levels that match the bounds
    77  	// but don't have any data for the bound, we will already be
    78  	// positioned at the key beyond the bounds and won't need to do much
    79  	// work -- given that most data is in L6, such files are likely to
    80  	// dominate the performance of the mergingIter, and may be the main
    81  	// benefit of this performance optimization (of course it also helps
    82  	// when the file that has the data has successive seeks that stay in
    83  	// the same block).
    84  	//
    85  	// Specifically, boundsCmp captures the relationship between the previous
    86  	// and current bounds, if the iterator had been positioned after setting
    87  	// the previous bounds. If it was not positioned, i.e., Seek/First/Last
    88  	// were not called, we don't know where it is positioned and cannot
    89  	// optimize.
    90  	//
    91  	// Example: Bounds moving forward, and iterator exhausted in forward direction.
    92  	//      bounds = [f, h), ^ shows block iterator position
    93  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
    94  	//                                       ^
    95  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
    96  	//  set to +1. SeekGE(j) can use next (the optimization also requires that j
    97  	//  is within the block, but that is not for correctness, but to limit the
    98  	//  optimization to when it will actually be an optimization).
    99  	//
   100  	// Example: Bounds moving forward.
   101  	//      bounds = [f, h), ^ shows block iterator position
   102  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
   103  	//                                 ^
   104  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
   105  	//  set to +1. SeekGE(j) can use next.
   106  	//
   107  	// Example: Bounds moving forward, but iterator not positioned using previous
   108  	//  bounds.
   109  	//      bounds = [f, h), ^ shows block iterator position
   110  	//  file contents [ a  b  c  d  e  f  g  h  i  j  k ]
   111  	//                                             ^
   112  	//  new bounds = [i, j). Iterator is at j since it was never positioned using
   113  	//  [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0.
   114  	//  SeekGE(i) will not use next.
   115  	//
   116  	// Example: Bounds moving forward and sparse file
   117  	//      bounds = [f, h), ^ shows block iterator position
   118  	//  file contents [ a z ]
   119  	//                    ^
   120  	//  new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is
   121  	//  set to +1. SeekGE(j) notices that the iterator is already past j and does
   122  	//  not need to do anything.
   123  	//
   124  	// Similar examples can be constructed for backward iteration.
   125  	//
   126  	// This notion of exactly one key before or after the bounds is not quite
   127  	// true when block properties are used to ignore blocks. In that case we
   128  	// can't stop precisely at the first block that is past the bounds since
   129  	// we are using the index entries to enforce the bounds.
   130  	//
   131  	// e.g. 3 blocks with keys [b, c]  [f, g], [i, j, k] with index entries d,
   132  	// h, l. And let the lower bound be k, and we are reverse iterating. If
   133  	// the block [i, j, k] is ignored due to the block interval annotations we
   134  	// do need to move the index to block [f, g] since the index entry for the
   135  	// [i, j, k] block is l which is not less than the lower bound of k. So we
   136  	// have passed the entries i, j.
   137  	//
   138  	// This behavior is harmless since the block property filters are fixed
   139  	// for the lifetime of the iterator so i, j are irrelevant. In addition,
   140  	// the current code will not load the [f, g] block, so the seek
   141  	// optimization that attempts to use Next/Prev do not apply anyway.
   142  	boundsCmp                   int
   143  	positionedUsingLatestBounds bool
   144  
   145  	// exhaustedBounds represents whether the iterator is exhausted for
   146  	// iteration by reaching the upper or lower bound. +1 when exhausted
   147  	// the upper bound, -1 when exhausted the lower bound, and 0 when
   148  	// neither. exhaustedBounds is also used for the TrySeekUsingNext
   149  	// optimization in twoLevelIterator and singleLevelIterator. Care should be
   150  	// taken in setting this in twoLevelIterator before calling into
   151  	// singleLevelIterator, given that these two iterators share this field.
   152  	exhaustedBounds int8
   153  
   154  	// maybeFilteredKeysSingleLevel indicates whether the last iterator
   155  	// positioning operation may have skipped any data blocks due to
   156  	// block-property filters when positioning the index.
   157  	maybeFilteredKeysSingleLevel bool
   158  
   159  	// useFilter specifies whether the filter block in this sstable, if present,
   160  	// should be used for prefix seeks or not. In some cases it is beneficial
   161  	// to skip a filter block even if it exists (eg. if probability of a match
   162  	// is high).
   163  	useFilter              bool
   164  	lastBloomFilterMatched bool
   165  
   166  	hideObsoletePoints bool
   167  }
   168  
   169  // singleLevelIterator implements the base.InternalIterator interface.
   170  var _ base.InternalIterator = (*singleLevelIterator)(nil)
   171  
   172  // init initializes a singleLevelIterator for reading from the table. It is
   173  // synonmous with Reader.NewIter, but allows for reusing of the iterator
   174  // between different Readers.
   175  //
   176  // Note that lower, upper passed into init has nothing to do with virtual sstable
   177  // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
   178  // will be enforced.
   179  func (i *singleLevelIterator) init(
   180  	ctx context.Context,
   181  	r *Reader,
   182  	v *virtualState,
   183  	lower, upper []byte,
   184  	filterer *BlockPropertiesFilterer,
   185  	useFilter, hideObsoletePoints bool,
   186  	stats *base.InternalIteratorStats,
   187  	categoryAndQoS CategoryAndQoS,
   188  	statsCollector *CategoryStatsCollector,
   189  	rp ReaderProvider,
   190  	bufferPool *BufferPool,
   191  ) error {
   192  	if r.err != nil {
   193  		return r.err
   194  	}
   195  	i.iterStats.init(categoryAndQoS, statsCollector)
   196  	indexH, err := r.readIndex(ctx, stats, &i.iterStats)
   197  	if err != nil {
   198  		return err
   199  	}
   200  	if v != nil {
   201  		i.vState = v
   202  		i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
   203  	}
   204  
   205  	i.ctx = ctx
   206  	i.lower = lower
   207  	i.upper = upper
   208  	i.bpfs = filterer
   209  	i.useFilter = useFilter
   210  	i.reader = r
   211  	i.cmp = r.Compare
   212  	i.stats = stats
   213  	i.hideObsoletePoints = hideObsoletePoints
   214  	i.bufferPool = bufferPool
   215  	err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false)
   216  	if err != nil {
   217  		// blockIter.Close releases indexH and always returns a nil error
   218  		_ = i.index.Close()
   219  		return err
   220  	}
   221  	i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc)
   222  	if r.tableFormat >= TableFormatPebblev3 {
   223  		if r.Properties.NumValueBlocks > 0 {
   224  			// NB: we cannot avoid this ~248 byte allocation, since valueBlockReader
   225  			// can outlive the singleLevelIterator due to be being embedded in a
   226  			// LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we
   227  			// should only optimize this if it shows up as significant in end-to-end
   228  			// CockroachDB benchmarks, since it is tricky to do so. One possibility
   229  			// is that if many sstable iterators only get positioned at latest
   230  			// versions of keys, and therefore never expose a LazyValue that is
   231  			// separated to their callers, they can put this valueBlockReader into a
   232  			// sync.Pool.
   233  			i.vbReader = &valueBlockReader{
   234  				bpOpen: i,
   235  				rp:     rp,
   236  				vbih:   r.valueBIH,
   237  				stats:  stats,
   238  			}
   239  			i.data.lazyValueHandling.vbr = i.vbReader
   240  			i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc)
   241  		}
   242  		i.data.lazyValueHandling.hasValuePrefix = true
   243  	}
   244  	return nil
   245  }
   246  
   247  // Helper function to check if keys returned from iterator are within global and virtual bounds.
   248  func (i *singleLevelIterator) maybeVerifyKey(
   249  	iKey *InternalKey, val base.LazyValue,
   250  ) (*InternalKey, base.LazyValue) {
   251  	// maybeVerify key is only used for virtual sstable iterators.
   252  	if invariants.Enabled && i.vState != nil && iKey != nil {
   253  		key := iKey.UserKey
   254  
   255  		uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey)
   256  		lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey)
   257  
   258  		if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 {
   259  			panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key))
   260  		}
   261  	}
   262  	return iKey, val
   263  }
   264  
   265  // setupForCompaction sets up the singleLevelIterator for use with compactionIter.
   266  // Currently, it skips readahead ramp-up. It should be called after init is called.
   267  func (i *singleLevelIterator) setupForCompaction() {
   268  	i.dataRH.SetupForCompaction()
   269  	if i.vbRH != nil {
   270  		i.vbRH.SetupForCompaction()
   271  	}
   272  }
   273  
   274  func (i *singleLevelIterator) resetForReuse() singleLevelIterator {
   275  	return singleLevelIterator{
   276  		index: i.index.resetForReuse(),
   277  		data:  i.data.resetForReuse(),
   278  	}
   279  }
   280  
   281  func (i *singleLevelIterator) initBounds() {
   282  	// Trim the iteration bounds for the current block. We don't have to check
   283  	// the bounds on each iteration if the block is entirely contained within the
   284  	// iteration bounds.
   285  	i.blockLower = i.lower
   286  	if i.blockLower != nil {
   287  		key, _ := i.data.First()
   288  		if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 {
   289  			// The lower-bound is less than the first key in the block. No need
   290  			// to check the lower-bound again for this block.
   291  			i.blockLower = nil
   292  		}
   293  	}
   294  	i.blockUpper = i.upper
   295  	if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
   296  		// The upper-bound is greater than the index key which itself is greater
   297  		// than or equal to every key in the block. No need to check the
   298  		// upper-bound again for this block. Even if blockUpper is inclusive
   299  		// because of upper being inclusive, we can still safely set blockUpper
   300  		// to nil here.
   301  		//
   302  		// TODO(bananabrick): We could also set blockUpper to nil for the >=
   303  		// case, if blockUpper is inclusive.
   304  		i.blockUpper = nil
   305  	}
   306  }
   307  
   308  // Deterministic disabling of the bounds-based optimization that avoids seeking.
   309  // Uses the iterator pointer, since we want diversity in iterator behavior for
   310  // the same SetBounds call. Used for tests.
   311  func disableBoundsOpt(bound []byte, ptr uintptr) bool {
   312  	// Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
   313  	simpleHash := (11400714819323198485 * uint64(ptr)) >> 63
   314  	return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0
   315  }
   316  
   317  // ensureBoundsOptDeterminism provides a facility for disabling of the bounds
   318  // optimizations performed by disableBoundsOpt for tests that require
   319  // deterministic iterator behavior. Some unit tests examine internal iterator
   320  // state and require this behavior to be deterministic.
   321  var ensureBoundsOptDeterminism bool
   322  
   323  // SetBounds implements internalIterator.SetBounds, as documented in the pebble
   324  // package. Note that the upper field is exclusive.
   325  func (i *singleLevelIterator) SetBounds(lower, upper []byte) {
   326  	i.boundsCmp = 0
   327  	if i.vState != nil {
   328  		// If the reader is constructed for a virtual sstable, then we must
   329  		// constrain the bounds of the reader. For physical sstables, the bounds
   330  		// can be wider than the actual sstable's bounds because we won't
   331  		// accidentally expose additional keys as there are no additional keys.
   332  		i.endKeyInclusive, lower, upper = i.vState.constrainBounds(
   333  			lower, upper, false,
   334  		)
   335  	} else {
   336  		// TODO(bananabrick): Figure out the logic here to enable the boundsCmp
   337  		// optimization for virtual sstables.
   338  		if i.positionedUsingLatestBounds {
   339  			if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 {
   340  				i.boundsCmp = +1
   341  				if invariants.Enabled && !ensureBoundsOptDeterminism &&
   342  					disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) {
   343  					i.boundsCmp = 0
   344  				}
   345  			} else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 {
   346  				i.boundsCmp = -1
   347  				if invariants.Enabled && !ensureBoundsOptDeterminism &&
   348  					disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) {
   349  					i.boundsCmp = 0
   350  				}
   351  			}
   352  		}
   353  	}
   354  
   355  	i.positionedUsingLatestBounds = false
   356  	i.lower = lower
   357  	i.upper = upper
   358  	i.blockLower = nil
   359  	i.blockUpper = nil
   360  }
   361  
   362  func (i *singleLevelIterator) SetContext(ctx context.Context) {
   363  	i.ctx = ctx
   364  }
   365  
   366  // loadBlock loads the block at the current index position and leaves i.data
   367  // unpositioned. If unsuccessful, it sets i.err to any error encountered, which
   368  // may be nil if we have simply exhausted the entire table.
   369  func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult {
   370  	if !i.index.valid() {
   371  		// Ensure the data block iterator is invalidated even if loading of the block
   372  		// fails.
   373  		i.data.invalidate()
   374  		return loadBlockFailed
   375  	}
   376  	// Load the next block.
   377  	v := i.index.value()
   378  	bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
   379  	if i.dataBH == bhp.BlockHandle && i.data.valid() {
   380  		// We're already at the data block we want to load. Reset bounds in case
   381  		// they changed since the last seek, but don't reload the block from cache
   382  		// or disk.
   383  		//
   384  		// It's safe to leave i.data in its original state here, as all callers to
   385  		// loadBlock make an absolute positioning call (i.e. a seek, first, or last)
   386  		// to `i.data` right after loadBlock returns loadBlockOK.
   387  		i.initBounds()
   388  		return loadBlockOK
   389  	}
   390  	// Ensure the data block iterator is invalidated even if loading of the block
   391  	// fails.
   392  	i.data.invalidate()
   393  	i.dataBH = bhp.BlockHandle
   394  	if err != nil {
   395  		i.err = errCorruptIndexEntry
   396  		return loadBlockFailed
   397  	}
   398  	if i.bpfs != nil {
   399  		intersects, err := i.bpfs.intersects(bhp.Props)
   400  		if err != nil {
   401  			i.err = errCorruptIndexEntry
   402  			return loadBlockFailed
   403  		}
   404  		if intersects == blockMaybeExcluded {
   405  			intersects = i.resolveMaybeExcluded(dir)
   406  		}
   407  		if intersects == blockExcluded {
   408  			i.maybeFilteredKeysSingleLevel = true
   409  			return loadBlockIrrelevant
   410  		}
   411  		// blockIntersects
   412  	}
   413  	ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock)
   414  	block, err := i.reader.readBlock(
   415  		ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool)
   416  	if err != nil {
   417  		i.err = err
   418  		return loadBlockFailed
   419  	}
   420  	i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints)
   421  	if i.err != nil {
   422  		// The block is partially loaded, and we don't want it to appear valid.
   423  		i.data.invalidate()
   424  		return loadBlockFailed
   425  	}
   426  	i.initBounds()
   427  	return loadBlockOK
   428  }
   429  
   430  // readBlockForVBR implements the blockProviderWhenOpen interface for use by
   431  // the valueBlockReader.
   432  func (i *singleLevelIterator) readBlockForVBR(
   433  	h BlockHandle, stats *base.InternalIteratorStats,
   434  ) (bufferHandle, error) {
   435  	ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock)
   436  	return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool)
   437  }
   438  
   439  // resolveMaybeExcluded is invoked when the block-property filterer has found
   440  // that a block is excluded according to its properties but only if its bounds
   441  // fall within the filter's current bounds.  This function consults the
   442  // apprioriate bound, depending on the iteration direction, and returns either
   443  // `blockIntersects` or `blockMaybeExcluded`.
   444  func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
   445  	// TODO(jackson): We could first try comparing to top-level index block's
   446  	// key, and if within bounds avoid per-data block key comparisons.
   447  
   448  	// This iterator is configured with a bound-limited block property
   449  	// filter. The bpf determined this block could be excluded from
   450  	// iteration based on the property encoded in the block handle.
   451  	// However, we still need to determine if the block is wholly
   452  	// contained within the filter's key bounds.
   453  	//
   454  	// External guarantees ensure all the block's keys are ≥ the
   455  	// filter's lower bound during forward iteration, and that all the
   456  	// block's keys are < the filter's upper bound during backward
   457  	// iteration. We only need to determine if the opposite bound is
   458  	// also met.
   459  	//
   460  	// The index separator in index.Key() provides an inclusive
   461  	// upper-bound for the data block's keys, guaranteeing that all its
   462  	// keys are ≤ index.Key(). For forward iteration, this is all we
   463  	// need.
   464  	if dir > 0 {
   465  		// Forward iteration.
   466  		if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) {
   467  			return blockExcluded
   468  		}
   469  		return blockIntersects
   470  	}
   471  
   472  	// Reverse iteration.
   473  	//
   474  	// Because we're iterating in the reverse direction, we don't yet have
   475  	// enough context available to determine if the block is wholly contained
   476  	// within its bounds. This case arises only during backward iteration,
   477  	// because of the way the index is structured.
   478  	//
   479  	// Consider a bound-limited bpf limited to the bounds [b,d), loading the
   480  	// block with separator `c`. During reverse iteration, the guarantee that
   481  	// all the block's keys are < `d` is externally provided, but no guarantee
   482  	// is made on the bpf's lower bound. The separator `c` only provides an
   483  	// inclusive upper bound on the block's keys, indicating that the
   484  	// corresponding block handle points to a block containing only keys ≤ `c`.
   485  	//
   486  	// To establish a lower bound, we step the index backwards to read the
   487  	// previous block's separator, which provides an inclusive lower bound on
   488  	// the original block's keys. Afterwards, we step forward to restore our
   489  	// index position.
   490  	if peekKey, _ := i.index.Prev(); peekKey == nil {
   491  		// The original block points to the first block of this index block. If
   492  		// there's a two-level index, it could potentially provide a lower
   493  		// bound, but the code refactoring necessary to read it doesn't seem
   494  		// worth the payoff. We fall through to loading the block.
   495  	} else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
   496  		// The lower-bound on the original block falls within the filter's
   497  		// bounds, and we can skip the block (after restoring our current index
   498  		// position).
   499  		_, _ = i.index.Next()
   500  		return blockExcluded
   501  	}
   502  	_, _ = i.index.Next()
   503  	return blockIntersects
   504  }
   505  
   506  func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() {
   507  	if i.data.getFirstUserKey() == nil {
   508  		panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block")
   509  	}
   510  	i.blockLower = i.lower
   511  	if i.blockLower != nil {
   512  		firstUserKey := i.data.getFirstUserKey()
   513  		if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 {
   514  			// The lower-bound is less than the first key in the block. No need
   515  			// to check the lower-bound again for this block.
   516  			i.blockLower = nil
   517  		}
   518  	}
   519  	i.blockUpper = i.upper
   520  	if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 {
   521  		// The upper-bound is greater than the index key which itself is greater
   522  		// than or equal to every key in the block. No need to check the
   523  		// upper-bound again for this block.
   524  		i.blockUpper = nil
   525  	}
   526  }
   527  
   528  // The number of times to call Next/Prev in a block before giving up and seeking.
   529  // The value of 4 is arbitrary.
   530  // TODO(sumeer): experiment with dynamic adjustment based on the history of
   531  // seeks for a particular iterator.
   532  const numStepsBeforeSeek = 4
   533  
   534  func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock(
   535  	key []byte,
   536  ) (k *InternalKey, v base.LazyValue, done bool) {
   537  	k, v = i.data.Key(), i.data.value()
   538  	for j := 0; j < numStepsBeforeSeek; j++ {
   539  		curKeyCmp := i.cmp(k.UserKey, key)
   540  		if curKeyCmp >= 0 {
   541  			if i.blockUpper != nil {
   542  				cmp := i.cmp(k.UserKey, i.blockUpper)
   543  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   544  					i.exhaustedBounds = +1
   545  					return nil, base.LazyValue{}, true
   546  				}
   547  			}
   548  			return k, v, true
   549  		}
   550  		k, v = i.data.Next()
   551  		if k == nil {
   552  			break
   553  		}
   554  	}
   555  	return k, v, false
   556  }
   557  
   558  func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock(
   559  	key []byte,
   560  ) (k *InternalKey, v base.LazyValue, done bool) {
   561  	k, v = i.data.Key(), i.data.value()
   562  	for j := 0; j < numStepsBeforeSeek; j++ {
   563  		curKeyCmp := i.cmp(k.UserKey, key)
   564  		if curKeyCmp < 0 {
   565  			if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 {
   566  				i.exhaustedBounds = -1
   567  				return nil, base.LazyValue{}, true
   568  			}
   569  			return k, v, true
   570  		}
   571  		k, v = i.data.Prev()
   572  		if k == nil {
   573  			break
   574  		}
   575  	}
   576  	return k, v, false
   577  }
   578  
   579  func (i *singleLevelIterator) recordOffset() uint64 {
   580  	offset := i.dataBH.Offset
   581  	if i.data.valid() {
   582  		// - i.dataBH.Length/len(i.data.data) is the compression ratio. If
   583  		//   uncompressed, this is 1.
   584  		// - i.data.nextOffset is the uncompressed position of the current record
   585  		//   in the block.
   586  		// - i.dataBH.Offset is the offset of the block in the sstable before
   587  		//   decompression.
   588  		offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data))
   589  	} else {
   590  		// Last entry in the block must increment bytes iterated by the size of the block trailer
   591  		// and restart points.
   592  		offset += i.dataBH.Length + blockTrailerLen
   593  	}
   594  	return offset
   595  }
   596  
   597  // SeekGE implements internalIterator.SeekGE, as documented in the pebble
   598  // package. Note that SeekGE only checks the upper bound. It is up to the
   599  // caller to ensure that key is greater than or equal to the lower bound.
   600  func (i *singleLevelIterator) SeekGE(
   601  	key []byte, flags base.SeekGEFlags,
   602  ) (*InternalKey, base.LazyValue) {
   603  	if i.vState != nil {
   604  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   605  		// have to internally restrict the bounds.
   606  		//
   607  		// TODO(bananabrick): We can optimize this check away for the level iter
   608  		// if necessary.
   609  		if i.cmp(key, i.lower) < 0 {
   610  			key = i.lower
   611  		}
   612  	}
   613  
   614  	if flags.TrySeekUsingNext() {
   615  		// The i.exhaustedBounds comparison indicates that the upper bound was
   616  		// reached. The i.data.isDataInvalidated() indicates that the sstable was
   617  		// exhausted.
   618  		if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil {
   619  			// Already exhausted, so return nil.
   620  			return nil, base.LazyValue{}
   621  		}
   622  		if i.err != nil {
   623  			// The current iterator position cannot be used.
   624  			flags = flags.DisableTrySeekUsingNext()
   625  		}
   626  		// INVARIANT: flags.TrySeekUsingNext() => i.err == nil &&
   627  		// !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
   628  		// data-exhausted and bounds-exhausted, as defined earlier, are both
   629  		// false. Ths makes it safe to clear out i.exhaustedBounds and i.err
   630  		// before calling into seekGEHelper.
   631  	}
   632  
   633  	i.exhaustedBounds = 0
   634  	i.err = nil // clear cached iteration error
   635  	boundsCmp := i.boundsCmp
   636  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   637  	i.boundsCmp = 0
   638  	i.positionedUsingLatestBounds = true
   639  	return i.seekGEHelper(key, boundsCmp, flags)
   640  }
   641  
   642  // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE.
   643  func (i *singleLevelIterator) seekGEHelper(
   644  	key []byte, boundsCmp int, flags base.SeekGEFlags,
   645  ) (*InternalKey, base.LazyValue) {
   646  	// Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1
   647  
   648  	// SeekGE performs various step-instead-of-seeking optimizations: eg enabled
   649  	// by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
   650  	// Care must be taken to ensure that when performing these optimizations and
   651  	// the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
   652  	// Consider a previous SeekGE that filtered keys from k until the current
   653  	// iterator position.
   654  	//
   655  	// If the previous SeekGE exhausted the iterator, it's possible keys greater
   656  	// than or equal to the current search key were filtered. We must not reuse
   657  	// the current iterator position without remembering the previous value of
   658  	// maybeFilteredKeys.
   659  
   660  	var dontSeekWithinBlock bool
   661  	if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
   662  		boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 {
   663  		// Fast-path: The bounds have moved forward and this SeekGE is
   664  		// respecting the lower bound (guaranteed by Iterator). We know that
   665  		// the iterator must already be positioned within or just outside the
   666  		// previous bounds. Therefore it cannot be positioned at a block (or
   667  		// the position within that block) that is ahead of the seek position.
   668  		// However it can be positioned at an earlier block. This fast-path to
   669  		// use Next() on the block is only applied when we are already at the
   670  		// block that the slow-path (the else-clause) would load -- this is
   671  		// the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0
   672  		// predicate.
   673  		i.initBoundsForAlreadyLoadedBlock()
   674  		ikey, val, done := i.trySeekGEUsingNextWithinBlock(key)
   675  		if done {
   676  			return ikey, val
   677  		}
   678  		if ikey == nil {
   679  			// Done with this block.
   680  			dontSeekWithinBlock = true
   681  		}
   682  	} else {
   683  		// Cannot use bounds monotonicity. But may be able to optimize if
   684  		// caller claimed externally known invariant represented by
   685  		// flags.TrySeekUsingNext().
   686  		if flags.TrySeekUsingNext() {
   687  			// seekPrefixGE or SeekGE has already ensured
   688  			// !i.data.isDataInvalidated() && i.exhaustedBounds != +1
   689  			currKey := i.data.Key()
   690  			value := i.data.value()
   691  			less := i.cmp(currKey.UserKey, key) < 0
   692  			// We could be more sophisticated and confirm that the seek
   693  			// position is within the current block before applying this
   694  			// optimization. But there may be some benefit even if it is in
   695  			// the next block, since we can avoid seeking i.index.
   696  			for j := 0; less && j < numStepsBeforeSeek; j++ {
   697  				currKey, value = i.Next()
   698  				if currKey == nil {
   699  					return nil, base.LazyValue{}
   700  				}
   701  				less = i.cmp(currKey.UserKey, key) < 0
   702  			}
   703  			if !less {
   704  				if i.blockUpper != nil {
   705  					cmp := i.cmp(currKey.UserKey, i.blockUpper)
   706  					if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   707  						i.exhaustedBounds = +1
   708  						return nil, base.LazyValue{}
   709  					}
   710  				}
   711  				return currKey, value
   712  			}
   713  		}
   714  
   715  		// Slow-path.
   716  		// Since we're re-seeking the iterator, the previous value of
   717  		// maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks
   718  		// during seeking, loadBlock will set it to true.
   719  		i.maybeFilteredKeysSingleLevel = false
   720  
   721  		var ikey *InternalKey
   722  		if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil {
   723  			// The target key is greater than any key in the index block.
   724  			// Invalidate the block iterator so that a subsequent call to Prev()
   725  			// will return the last key in the table.
   726  			i.data.invalidate()
   727  			return nil, base.LazyValue{}
   728  		}
   729  		result := i.loadBlock(+1)
   730  		if result == loadBlockFailed {
   731  			return nil, base.LazyValue{}
   732  		}
   733  		if result == loadBlockIrrelevant {
   734  			// Enforce the upper bound here since don't want to bother moving
   735  			// to the next block if upper bound is already exceeded. Note that
   736  			// the next block starts with keys >= ikey.UserKey since even
   737  			// though this is the block separator, the same user key can span
   738  			// multiple blocks. If upper is exclusive we use >= below, else
   739  			// we use >.
   740  			if i.upper != nil {
   741  				cmp := i.cmp(ikey.UserKey, i.upper)
   742  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   743  					i.exhaustedBounds = +1
   744  					return nil, base.LazyValue{}
   745  				}
   746  			}
   747  			// Want to skip to the next block.
   748  			dontSeekWithinBlock = true
   749  		}
   750  	}
   751  	if !dontSeekWithinBlock {
   752  		if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil {
   753  			if i.blockUpper != nil {
   754  				cmp := i.cmp(ikey.UserKey, i.blockUpper)
   755  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   756  					i.exhaustedBounds = +1
   757  					return nil, base.LazyValue{}
   758  				}
   759  			}
   760  			return ikey, val
   761  		}
   762  	}
   763  	return i.skipForward()
   764  }
   765  
   766  // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
   767  // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
   768  // to the caller to ensure that key is greater than or equal to the lower bound.
   769  func (i *singleLevelIterator) SeekPrefixGE(
   770  	prefix, key []byte, flags base.SeekGEFlags,
   771  ) (*base.InternalKey, base.LazyValue) {
   772  	if i.vState != nil {
   773  		// Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so
   774  		// we may have to internally restrict the bounds.
   775  		//
   776  		// TODO(bananabrick): We can optimize away this check for the level iter
   777  		// if necessary.
   778  		if i.cmp(key, i.lower) < 0 {
   779  			key = i.lower
   780  		}
   781  	}
   782  	return i.seekPrefixGE(prefix, key, flags, i.useFilter)
   783  }
   784  
   785  func (i *singleLevelIterator) seekPrefixGE(
   786  	prefix, key []byte, flags base.SeekGEFlags, checkFilter bool,
   787  ) (k *InternalKey, value base.LazyValue) {
   788  	// NOTE: prefix is only used for bloom filter checking and not later work in
   789  	// this method. Hence, we can use the existing iterator position if the last
   790  	// SeekPrefixGE did not fail bloom filter matching.
   791  
   792  	err := i.err
   793  	i.err = nil // clear cached iteration error
   794  	if checkFilter && i.reader.tableFilter != nil {
   795  		if !i.lastBloomFilterMatched {
   796  			// Iterator is not positioned based on last seek.
   797  			flags = flags.DisableTrySeekUsingNext()
   798  		}
   799  		i.lastBloomFilterMatched = false
   800  		// Check prefix bloom filter.
   801  		var dataH bufferHandle
   802  		dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats)
   803  		if i.err != nil {
   804  			i.data.invalidate()
   805  			return nil, base.LazyValue{}
   806  		}
   807  		mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
   808  		dataH.Release()
   809  		if !mayContain {
   810  			// This invalidation may not be necessary for correctness, and may
   811  			// be a place to optimize later by reusing the already loaded
   812  			// block. It was necessary in earlier versions of the code since
   813  			// the caller was allowed to call Next when SeekPrefixGE returned
   814  			// nil. This is no longer allowed.
   815  			i.data.invalidate()
   816  			return nil, base.LazyValue{}
   817  		}
   818  		i.lastBloomFilterMatched = true
   819  	}
   820  	if flags.TrySeekUsingNext() {
   821  		// The i.exhaustedBounds comparison indicates that the upper bound was
   822  		// reached. The i.data.isDataInvalidated() indicates that the sstable was
   823  		// exhausted.
   824  		if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil {
   825  			// Already exhausted, so return nil.
   826  			return nil, base.LazyValue{}
   827  		}
   828  		if err != nil {
   829  			// The current iterator position cannot be used.
   830  			flags = flags.DisableTrySeekUsingNext()
   831  		}
   832  		// INVARIANT: flags.TrySeekUsingNext() => err == nil &&
   833  		// !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is,
   834  		// data-exhausted and bounds-exhausted, as defined earlier, are both
   835  		// false. Ths makes it safe to clear out i.exhaustedBounds and i.err
   836  		// before calling into seekGEHelper.
   837  	}
   838  	// Bloom filter matches, or skipped, so this method will position the
   839  	// iterator.
   840  	i.exhaustedBounds = 0
   841  	boundsCmp := i.boundsCmp
   842  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   843  	i.boundsCmp = 0
   844  	i.positionedUsingLatestBounds = true
   845  	k, value = i.seekGEHelper(key, boundsCmp, flags)
   846  	return i.maybeVerifyKey(k, value)
   847  }
   848  
   849  // virtualLast should only be called if i.vReader != nil.
   850  func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
   851  	if i.vState == nil {
   852  		panic("pebble: invalid call to virtualLast")
   853  	}
   854  
   855  	// Seek to the first internal key.
   856  	ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
   857  	if i.endKeyInclusive {
   858  		// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
   859  		// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
   860  		// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
   861  		// the last key with userkey == i.upper.
   862  		//
   863  		// TODO(bananabrick): Think about how to improve this. If many internal keys
   864  		// with the same user key at the upper bound then this could be slow, but
   865  		// maybe the odds of having many internal keys with the same user key at the
   866  		// upper bound are low.
   867  		for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
   868  			ikey, _ = i.Next()
   869  		}
   870  		return i.Prev()
   871  	}
   872  
   873  	// We seeked to the first key >= i.upper.
   874  	return i.Prev()
   875  }
   876  
   877  // SeekLT implements internalIterator.SeekLT, as documented in the pebble
   878  // package. Note that SeekLT only checks the lower bound. It is up to the
   879  // caller to ensure that key is less than or equal to the upper bound.
   880  func (i *singleLevelIterator) SeekLT(
   881  	key []byte, flags base.SeekLTFlags,
   882  ) (*InternalKey, base.LazyValue) {
   883  	if i.vState != nil {
   884  		// Might have to fix upper bound since virtual sstable bounds are not
   885  		// known to callers of SeekLT.
   886  		//
   887  		// TODO(bananabrick): We can optimize away this check for the level iter
   888  		// if necessary.
   889  		cmp := i.cmp(key, i.upper)
   890  		// key == i.upper is fine. We'll do the right thing and return the
   891  		// first internal key with user key < key.
   892  		if cmp > 0 {
   893  			// Return the last key in the virtual sstable.
   894  			return i.virtualLast()
   895  		}
   896  	}
   897  
   898  	i.exhaustedBounds = 0
   899  	i.err = nil // clear cached iteration error
   900  	boundsCmp := i.boundsCmp
   901  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   902  	i.boundsCmp = 0
   903  
   904  	// Seeking operations perform various step-instead-of-seeking optimizations:
   905  	// eg by considering monotonically increasing bounds (i.boundsCmp). Care
   906  	// must be taken to ensure that when performing these optimizations and the
   907  	// iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set
   908  	// appropriately.  Consider a previous SeekLT that filtered keys from k
   909  	// until the current iterator position.
   910  	//
   911  	// If the previous SeekLT did exhausted the iterator, it's possible keys
   912  	// less than the current search key were filtered. We must not reuse the
   913  	// current iterator position without remembering the previous value of
   914  	// maybeFilteredKeysSingleLevel.
   915  
   916  	i.positionedUsingLatestBounds = true
   917  
   918  	var dontSeekWithinBlock bool
   919  	if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() &&
   920  		boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 {
   921  		// Fast-path: The bounds have moved backward, and this SeekLT is
   922  		// respecting the upper bound (guaranteed by Iterator). We know that
   923  		// the iterator must already be positioned within or just outside the
   924  		// previous bounds. Therefore it cannot be positioned at a block (or
   925  		// the position within that block) that is behind the seek position.
   926  		// However it can be positioned at a later block. This fast-path to
   927  		// use Prev() on the block is only applied when we are already at the
   928  		// block that can satisfy this seek -- this is the motivation for the
   929  		// the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate.
   930  		i.initBoundsForAlreadyLoadedBlock()
   931  		ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key)
   932  		if done {
   933  			return ikey, val
   934  		}
   935  		if ikey == nil {
   936  			// Done with this block.
   937  			dontSeekWithinBlock = true
   938  		}
   939  	} else {
   940  		// Slow-path.
   941  		i.maybeFilteredKeysSingleLevel = false
   942  		var ikey *InternalKey
   943  
   944  		// NB: If a bound-limited block property filter is configured, it's
   945  		// externally ensured that the filter is disabled (through returning
   946  		// Intersects=false irrespective of the block props provided) during
   947  		// seeks.
   948  		if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
   949  			ikey, _ = i.index.Last()
   950  			if ikey == nil {
   951  				return nil, base.LazyValue{}
   952  			}
   953  		}
   954  		// INVARIANT: ikey != nil.
   955  		result := i.loadBlock(-1)
   956  		if result == loadBlockFailed {
   957  			return nil, base.LazyValue{}
   958  		}
   959  		if result == loadBlockIrrelevant {
   960  			// Enforce the lower bound here since don't want to bother moving
   961  			// to the previous block if lower bound is already exceeded. Note
   962  			// that the previous block starts with keys <= ikey.UserKey since
   963  			// even though this is the current block's separator, the same
   964  			// user key can span multiple blocks.
   965  			if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   966  				i.exhaustedBounds = -1
   967  				return nil, base.LazyValue{}
   968  			}
   969  			// Want to skip to the previous block.
   970  			dontSeekWithinBlock = true
   971  		}
   972  	}
   973  	if !dontSeekWithinBlock {
   974  		if ikey, val := i.data.SeekLT(key, flags); ikey != nil {
   975  			if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
   976  				i.exhaustedBounds = -1
   977  				return nil, base.LazyValue{}
   978  			}
   979  			return ikey, val
   980  		}
   981  	}
   982  	// The index contains separator keys which may lie between
   983  	// user-keys. Consider the user-keys:
   984  	//
   985  	//   complete
   986  	// ---- new block ---
   987  	//   complexion
   988  	//
   989  	// If these two keys end one block and start the next, the index key may
   990  	// be chosen as "compleu". The SeekGE in the index block will then point
   991  	// us to the block containing "complexion". If this happens, we want the
   992  	// last key from the previous data block.
   993  	return i.maybeVerifyKey(i.skipBackward())
   994  }
   995  
   996  // First implements internalIterator.First, as documented in the pebble
   997  // package. Note that First only checks the upper bound. It is up to the caller
   998  // to ensure that key is greater than or equal to the lower bound (e.g. via a
   999  // call to SeekGE(lower)).
  1000  func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) {
  1001  	// If the iterator was created on a virtual sstable, we will SeekGE to the
  1002  	// lower bound instead of using First, because First does not respect
  1003  	// bounds.
  1004  	if i.vState != nil {
  1005  		return i.SeekGE(i.lower, base.SeekGEFlagsNone)
  1006  	}
  1007  
  1008  	if i.lower != nil {
  1009  		panic("singleLevelIterator.First() used despite lower bound")
  1010  	}
  1011  	i.positionedUsingLatestBounds = true
  1012  	i.maybeFilteredKeysSingleLevel = false
  1013  
  1014  	return i.firstInternal()
  1015  }
  1016  
  1017  // firstInternal is a helper used for absolute positioning in a single-level
  1018  // index file, or for positioning in the second-level index in a two-level
  1019  // index file. For the latter, one cannot make any claims about absolute
  1020  // positioning.
  1021  func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) {
  1022  	i.exhaustedBounds = 0
  1023  	i.err = nil // clear cached iteration error
  1024  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1025  	i.boundsCmp = 0
  1026  
  1027  	var ikey *InternalKey
  1028  	if ikey, _ = i.index.First(); ikey == nil {
  1029  		i.data.invalidate()
  1030  		return nil, base.LazyValue{}
  1031  	}
  1032  	result := i.loadBlock(+1)
  1033  	if result == loadBlockFailed {
  1034  		return nil, base.LazyValue{}
  1035  	}
  1036  	if result == loadBlockOK {
  1037  		if ikey, val := i.data.First(); ikey != nil {
  1038  			if i.blockUpper != nil {
  1039  				cmp := i.cmp(ikey.UserKey, i.blockUpper)
  1040  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1041  					i.exhaustedBounds = +1
  1042  					return nil, base.LazyValue{}
  1043  				}
  1044  			}
  1045  			return ikey, val
  1046  		}
  1047  		// Else fall through to skipForward.
  1048  	} else {
  1049  		// result == loadBlockIrrelevant. Enforce the upper bound here since
  1050  		// don't want to bother moving to the next block if upper bound is
  1051  		// already exceeded. Note that the next block starts with keys >=
  1052  		// ikey.UserKey since even though this is the block separator, the
  1053  		// same user key can span multiple blocks. If upper is exclusive we
  1054  		// use >= below, else we use >.
  1055  		if i.upper != nil {
  1056  			cmp := i.cmp(ikey.UserKey, i.upper)
  1057  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1058  				i.exhaustedBounds = +1
  1059  				return nil, base.LazyValue{}
  1060  			}
  1061  		}
  1062  		// Else fall through to skipForward.
  1063  	}
  1064  
  1065  	return i.skipForward()
  1066  }
  1067  
  1068  // Last implements internalIterator.Last, as documented in the pebble
  1069  // package. Note that Last only checks the lower bound. It is up to the caller
  1070  // to ensure that key is less than the upper bound (e.g. via a call to
  1071  // SeekLT(upper))
  1072  func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) {
  1073  	if i.vState != nil {
  1074  		return i.virtualLast()
  1075  	}
  1076  
  1077  	if i.upper != nil {
  1078  		panic("singleLevelIterator.Last() used despite upper bound")
  1079  	}
  1080  	i.positionedUsingLatestBounds = true
  1081  	i.maybeFilteredKeysSingleLevel = false
  1082  	return i.lastInternal()
  1083  }
  1084  
  1085  // lastInternal is a helper used for absolute positioning in a single-level
  1086  // index file, or for positioning in the second-level index in a two-level
  1087  // index file. For the latter, one cannot make any claims about absolute
  1088  // positioning.
  1089  func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) {
  1090  	i.exhaustedBounds = 0
  1091  	i.err = nil // clear cached iteration error
  1092  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1093  	i.boundsCmp = 0
  1094  
  1095  	var ikey *InternalKey
  1096  	if ikey, _ = i.index.Last(); ikey == nil {
  1097  		i.data.invalidate()
  1098  		return nil, base.LazyValue{}
  1099  	}
  1100  	result := i.loadBlock(-1)
  1101  	if result == loadBlockFailed {
  1102  		return nil, base.LazyValue{}
  1103  	}
  1104  	if result == loadBlockOK {
  1105  		if ikey, val := i.data.Last(); ikey != nil {
  1106  			if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 {
  1107  				i.exhaustedBounds = -1
  1108  				return nil, base.LazyValue{}
  1109  			}
  1110  			return ikey, val
  1111  		}
  1112  		// Else fall through to skipBackward.
  1113  	} else {
  1114  		// result == loadBlockIrrelevant. Enforce the lower bound here since
  1115  		// don't want to bother moving to the previous block if lower bound is
  1116  		// already exceeded. Note that the previous block starts with keys <=
  1117  		// key.UserKey since even though this is the current block's
  1118  		// separator, the same user key can span multiple blocks.
  1119  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
  1120  			i.exhaustedBounds = -1
  1121  			return nil, base.LazyValue{}
  1122  		}
  1123  	}
  1124  
  1125  	return i.skipBackward()
  1126  }
  1127  
  1128  // Next implements internalIterator.Next, as documented in the pebble
  1129  // package.
  1130  // Note: compactionIterator.Next mirrors the implementation of Iterator.Next
  1131  // due to performance. Keep the two in sync.
  1132  func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) {
  1133  	if i.exhaustedBounds == +1 {
  1134  		panic("Next called even though exhausted upper bound")
  1135  	}
  1136  	i.exhaustedBounds = 0
  1137  	i.maybeFilteredKeysSingleLevel = false
  1138  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1139  	i.boundsCmp = 0
  1140  
  1141  	if i.err != nil {
  1142  		// TODO(jackson): Can this case be turned into a panic? Once an error is
  1143  		// encountered, the iterator must be re-seeked.
  1144  		return nil, base.LazyValue{}
  1145  	}
  1146  	if key, val := i.data.Next(); key != nil {
  1147  		if i.blockUpper != nil {
  1148  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1149  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1150  				i.exhaustedBounds = +1
  1151  				return nil, base.LazyValue{}
  1152  			}
  1153  		}
  1154  		return key, val
  1155  	}
  1156  	return i.skipForward()
  1157  }
  1158  
  1159  // NextPrefix implements (base.InternalIterator).NextPrefix.
  1160  func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
  1161  	if i.exhaustedBounds == +1 {
  1162  		panic("NextPrefix called even though exhausted upper bound")
  1163  	}
  1164  	i.exhaustedBounds = 0
  1165  	i.maybeFilteredKeysSingleLevel = false
  1166  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1167  	i.boundsCmp = 0
  1168  	if i.err != nil {
  1169  		// TODO(jackson): Can this case be turned into a panic? Once an error is
  1170  		// encountered, the iterator must be re-seeked.
  1171  		return nil, base.LazyValue{}
  1172  	}
  1173  	if key, val := i.data.NextPrefix(succKey); key != nil {
  1174  		if i.blockUpper != nil {
  1175  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1176  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1177  				i.exhaustedBounds = +1
  1178  				return nil, base.LazyValue{}
  1179  			}
  1180  		}
  1181  		return key, val
  1182  	}
  1183  	// Did not find prefix in the existing data block. This is the slow-path
  1184  	// where we effectively seek the iterator.
  1185  	var ikey *InternalKey
  1186  	// The key is likely to be in the next data block, so try one step.
  1187  	if ikey, _ = i.index.Next(); ikey == nil {
  1188  		// The target key is greater than any key in the index block.
  1189  		// Invalidate the block iterator so that a subsequent call to Prev()
  1190  		// will return the last key in the table.
  1191  		i.data.invalidate()
  1192  		return nil, base.LazyValue{}
  1193  	}
  1194  	if i.cmp(succKey, ikey.UserKey) > 0 {
  1195  		// Not in the next data block, so seek the index.
  1196  		if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
  1197  			// The target key is greater than any key in the index block.
  1198  			// Invalidate the block iterator so that a subsequent call to Prev()
  1199  			// will return the last key in the table.
  1200  			i.data.invalidate()
  1201  			return nil, base.LazyValue{}
  1202  		}
  1203  	}
  1204  	result := i.loadBlock(+1)
  1205  	if result == loadBlockFailed {
  1206  		return nil, base.LazyValue{}
  1207  	}
  1208  	if result == loadBlockIrrelevant {
  1209  		// Enforce the upper bound here since don't want to bother moving
  1210  		// to the next block if upper bound is already exceeded. Note that
  1211  		// the next block starts with keys >= ikey.UserKey since even
  1212  		// though this is the block separator, the same user key can span
  1213  		// multiple blocks. If upper is exclusive we use >= below, else we use
  1214  		// >.
  1215  		if i.upper != nil {
  1216  			cmp := i.cmp(ikey.UserKey, i.upper)
  1217  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1218  				i.exhaustedBounds = +1
  1219  				return nil, base.LazyValue{}
  1220  			}
  1221  		}
  1222  	} else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
  1223  		if i.blockUpper != nil {
  1224  			cmp := i.cmp(key.UserKey, i.blockUpper)
  1225  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1226  				i.exhaustedBounds = +1
  1227  				return nil, base.LazyValue{}
  1228  			}
  1229  		}
  1230  		return i.maybeVerifyKey(key, val)
  1231  	}
  1232  
  1233  	return i.skipForward()
  1234  }
  1235  
  1236  // Prev implements internalIterator.Prev, as documented in the pebble
  1237  // package.
  1238  func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) {
  1239  	if i.exhaustedBounds == -1 {
  1240  		panic("Prev called even though exhausted lower bound")
  1241  	}
  1242  	i.exhaustedBounds = 0
  1243  	i.maybeFilteredKeysSingleLevel = false
  1244  	// Seek optimization only applies until iterator is first positioned after SetBounds.
  1245  	i.boundsCmp = 0
  1246  
  1247  	if i.err != nil {
  1248  		return nil, base.LazyValue{}
  1249  	}
  1250  	if key, val := i.data.Prev(); key != nil {
  1251  		if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
  1252  			i.exhaustedBounds = -1
  1253  			return nil, base.LazyValue{}
  1254  		}
  1255  		return key, val
  1256  	}
  1257  	return i.skipBackward()
  1258  }
  1259  
  1260  func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
  1261  	for {
  1262  		var key *InternalKey
  1263  		if key, _ = i.index.Next(); key == nil {
  1264  			i.data.invalidate()
  1265  			break
  1266  		}
  1267  		result := i.loadBlock(+1)
  1268  		if result != loadBlockOK {
  1269  			if i.err != nil {
  1270  				break
  1271  			}
  1272  			if result == loadBlockFailed {
  1273  				// We checked that i.index was at a valid entry, so
  1274  				// loadBlockFailed could not have happened due to to i.index
  1275  				// being exhausted, and must be due to an error.
  1276  				panic("loadBlock should not have failed with no error")
  1277  			}
  1278  			// result == loadBlockIrrelevant. Enforce the upper bound here
  1279  			// since don't want to bother moving to the next block if upper
  1280  			// bound is already exceeded. Note that the next block starts with
  1281  			// keys >= key.UserKey since even though this is the block
  1282  			// separator, the same user key can span multiple blocks. If upper
  1283  			// is exclusive we use >= below, else we use >.
  1284  			if i.upper != nil {
  1285  				cmp := i.cmp(key.UserKey, i.upper)
  1286  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1287  					i.exhaustedBounds = +1
  1288  					return nil, base.LazyValue{}
  1289  				}
  1290  			}
  1291  			continue
  1292  		}
  1293  		if key, val := i.data.First(); key != nil {
  1294  			if i.blockUpper != nil {
  1295  				cmp := i.cmp(key.UserKey, i.blockUpper)
  1296  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
  1297  					i.exhaustedBounds = +1
  1298  					return nil, base.LazyValue{}
  1299  				}
  1300  			}
  1301  			return i.maybeVerifyKey(key, val)
  1302  		}
  1303  	}
  1304  	return nil, base.LazyValue{}
  1305  }
  1306  
  1307  func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
  1308  	for {
  1309  		var key *InternalKey
  1310  		if key, _ = i.index.Prev(); key == nil {
  1311  			i.data.invalidate()
  1312  			break
  1313  		}
  1314  		result := i.loadBlock(-1)
  1315  		if result != loadBlockOK {
  1316  			if i.err != nil {
  1317  				break
  1318  			}
  1319  			if result == loadBlockFailed {
  1320  				// We checked that i.index was at a valid entry, so
  1321  				// loadBlockFailed could not have happened due to to i.index
  1322  				// being exhausted, and must be due to an error.
  1323  				panic("loadBlock should not have failed with no error")
  1324  			}
  1325  			// result == loadBlockIrrelevant. Enforce the lower bound here
  1326  			// since don't want to bother moving to the previous block if lower
  1327  			// bound is already exceeded. Note that the previous block starts with
  1328  			// keys <= key.UserKey since even though this is the current block's
  1329  			// separator, the same user key can span multiple blocks.
  1330  			if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 {
  1331  				i.exhaustedBounds = -1
  1332  				return nil, base.LazyValue{}
  1333  			}
  1334  			continue
  1335  		}
  1336  		key, val := i.data.Last()
  1337  		if key == nil {
  1338  			return nil, base.LazyValue{}
  1339  		}
  1340  		if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 {
  1341  			i.exhaustedBounds = -1
  1342  			return nil, base.LazyValue{}
  1343  		}
  1344  		return i.maybeVerifyKey(key, val)
  1345  	}
  1346  	return nil, base.LazyValue{}
  1347  }
  1348  
  1349  // Error implements internalIterator.Error, as documented in the pebble
  1350  // package.
  1351  func (i *singleLevelIterator) Error() error {
  1352  	if err := i.data.Error(); err != nil {
  1353  		return err
  1354  	}
  1355  	return i.err
  1356  }
  1357  
  1358  // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
  1359  // whether or not the last positioning method may have skipped any keys due to
  1360  // block-property filters.
  1361  func (i *singleLevelIterator) MaybeFilteredKeys() bool {
  1362  	return i.maybeFilteredKeysSingleLevel
  1363  }
  1364  
  1365  // SetCloseHook sets a function that will be called when the iterator is
  1366  // closed.
  1367  func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) {
  1368  	i.closeHook = fn
  1369  }
  1370  
  1371  func firstError(err0, err1 error) error {
  1372  	if err0 != nil {
  1373  		return err0
  1374  	}
  1375  	return err1
  1376  }
  1377  
  1378  // Close implements internalIterator.Close, as documented in the pebble
  1379  // package.
  1380  func (i *singleLevelIterator) Close() error {
  1381  	i.iterStats.close()
  1382  	var err error
  1383  	if i.closeHook != nil {
  1384  		err = firstError(err, i.closeHook(i))
  1385  	}
  1386  	err = firstError(err, i.data.Close())
  1387  	err = firstError(err, i.index.Close())
  1388  	if i.dataRH != nil {
  1389  		err = firstError(err, i.dataRH.Close())
  1390  		i.dataRH = nil
  1391  	}
  1392  	err = firstError(err, i.err)
  1393  	if i.bpfs != nil {
  1394  		releaseBlockPropertiesFilterer(i.bpfs)
  1395  	}
  1396  	if i.vbReader != nil {
  1397  		i.vbReader.close()
  1398  	}
  1399  	if i.vbRH != nil {
  1400  		err = firstError(err, i.vbRH.Close())
  1401  		i.vbRH = nil
  1402  	}
  1403  	*i = i.resetForReuse()
  1404  	singleLevelIterPool.Put(i)
  1405  	return err
  1406  }
  1407  
  1408  func (i *singleLevelIterator) String() string {
  1409  	if i.vState != nil {
  1410  		return i.vState.fileNum.String()
  1411  	}
  1412  	return i.reader.fileNum.String()
  1413  }