github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/reader_iter_two_lvl.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  
    11  	"github.com/cockroachdb/pebble/internal/base"
    12  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
    13  )
    14  
    15  type twoLevelIterator struct {
    16  	singleLevelIterator
    17  	// maybeFilteredKeysSingleLevel indicates whether the last iterator
    18  	// positioning operation may have skipped any index blocks due to
    19  	// block-property filters when positioning the top-level-index.
    20  	maybeFilteredKeysTwoLevel bool
    21  	topLevelIndex             blockIter
    22  }
    23  
    24  // twoLevelIterator implements the base.InternalIterator interface.
    25  var _ base.InternalIterator = (*twoLevelIterator)(nil)
    26  
    27  // loadIndex loads the index block at the current top level index position and
    28  // leaves i.index unpositioned. If unsuccessful, it gets i.err to any error
    29  // encountered, which may be nil if we have simply exhausted the entire table.
    30  // This is used for two level indexes.
    31  func (i *twoLevelIterator) loadIndex(dir int8) loadBlockResult {
    32  	// Ensure the index data block iterators are invalidated even if loading of
    33  	// the index fails.
    34  	i.data.invalidate()
    35  	i.index.invalidate()
    36  	if !i.topLevelIndex.valid() {
    37  		i.index.offset = 0
    38  		i.index.restarts = 0
    39  		return loadBlockFailed
    40  	}
    41  	v := i.topLevelIndex.value()
    42  	bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
    43  	if err != nil {
    44  		i.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry")
    45  		return loadBlockFailed
    46  	}
    47  	if i.bpfs != nil {
    48  		intersects, err := i.bpfs.intersects(bhp.Props)
    49  		if err != nil {
    50  			i.err = errCorruptIndexEntry
    51  			return loadBlockFailed
    52  		}
    53  		if intersects == blockMaybeExcluded {
    54  			intersects = i.resolveMaybeExcluded(dir)
    55  		}
    56  		if intersects == blockExcluded {
    57  			i.maybeFilteredKeysTwoLevel = true
    58  			return loadBlockIrrelevant
    59  		}
    60  		// blockIntersects
    61  	}
    62  	ctx := objiotracing.WithBlockType(i.ctx, objiotracing.MetadataBlock)
    63  	indexBlock, err := i.reader.readBlock(
    64  		ctx, bhp.BlockHandle, nil /* transform */, nil /* readHandle */, i.stats, &i.iterStats, i.bufferPool)
    65  	if err != nil {
    66  		i.err = err
    67  		return loadBlockFailed
    68  	}
    69  	if i.err = i.index.initHandle(i.cmp, indexBlock, i.reader.Properties.GlobalSeqNum, false); i.err == nil {
    70  		return loadBlockOK
    71  	}
    72  	return loadBlockFailed
    73  }
    74  
    75  // resolveMaybeExcluded is invoked when the block-property filterer has found
    76  // that an index block is excluded according to its properties but only if its
    77  // bounds fall within the filter's current bounds. This function consults the
    78  // apprioriate bound, depending on the iteration direction, and returns either
    79  // `blockIntersects` or
    80  // `blockMaybeExcluded`.
    81  func (i *twoLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
    82  	// This iterator is configured with a bound-limited block property filter.
    83  	// The bpf determined this entire index block could be excluded from
    84  	// iteration based on the property encoded in the block handle. However, we
    85  	// still need to determine if the index block is wholly contained within the
    86  	// filter's key bounds.
    87  	//
    88  	// External guarantees ensure all its data blocks' keys are ≥ the filter's
    89  	// lower bound during forward iteration, and that all its data blocks' keys
    90  	// are < the filter's upper bound during backward iteration. We only need to
    91  	// determine if the opposite bound is also met.
    92  	//
    93  	// The index separator in topLevelIndex.Key() provides an inclusive
    94  	// upper-bound for the index block's keys, guaranteeing that all its keys
    95  	// are ≤ topLevelIndex.Key(). For forward iteration, this is all we need.
    96  	if dir > 0 {
    97  		// Forward iteration.
    98  		if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.topLevelIndex.Key().UserKey) {
    99  			return blockExcluded
   100  		}
   101  		return blockIntersects
   102  	}
   103  
   104  	// Reverse iteration.
   105  	//
   106  	// Because we're iterating in the reverse direction, we don't yet have
   107  	// enough context available to determine if the block is wholly contained
   108  	// within its bounds. This case arises only during backward iteration,
   109  	// because of the way the index is structured.
   110  	//
   111  	// Consider a bound-limited bpf limited to the bounds [b,d), loading the
   112  	// block with separator `c`. During reverse iteration, the guarantee that
   113  	// all the block's keys are < `d` is externally provided, but no guarantee
   114  	// is made on the bpf's lower bound. The separator `c` only provides an
   115  	// inclusive upper bound on the block's keys, indicating that the
   116  	// corresponding block handle points to a block containing only keys ≤ `c`.
   117  	//
   118  	// To establish a lower bound, we step the top-level index backwards to read
   119  	// the previous block's separator, which provides an inclusive lower bound
   120  	// on the original index block's keys. Afterwards, we step forward to
   121  	// restore our top-level index position.
   122  	if peekKey, _ := i.topLevelIndex.Prev(); peekKey == nil {
   123  		// The original block points to the first index block of this table. If
   124  		// we knew the lower bound for the entire table, it could provide a
   125  		// lower bound, but the code refactoring necessary to read it doesn't
   126  		// seem worth the payoff. We fall through to loading the block.
   127  	} else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
   128  		// The lower-bound on the original index block falls within the filter's
   129  		// bounds, and we can skip the block (after restoring our current
   130  		// top-level index position).
   131  		_, _ = i.topLevelIndex.Next()
   132  		return blockExcluded
   133  	}
   134  	_, _ = i.topLevelIndex.Next()
   135  	return blockIntersects
   136  }
   137  
   138  // Note that lower, upper passed into init has nothing to do with virtual sstable
   139  // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
   140  // will be enforced.
   141  func (i *twoLevelIterator) init(
   142  	ctx context.Context,
   143  	r *Reader,
   144  	v *virtualState,
   145  	lower, upper []byte,
   146  	filterer *BlockPropertiesFilterer,
   147  	useFilter, hideObsoletePoints bool,
   148  	stats *base.InternalIteratorStats,
   149  	categoryAndQoS CategoryAndQoS,
   150  	statsCollector *CategoryStatsCollector,
   151  	rp ReaderProvider,
   152  	bufferPool *BufferPool,
   153  ) error {
   154  	if r.err != nil {
   155  		return r.err
   156  	}
   157  	i.iterStats.init(categoryAndQoS, statsCollector)
   158  	topLevelIndexH, err := r.readIndex(ctx, stats, &i.iterStats)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	if v != nil {
   163  		i.vState = v
   164  		// Note that upper is exclusive here.
   165  		i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
   166  	}
   167  
   168  	i.ctx = ctx
   169  	i.lower = lower
   170  	i.upper = upper
   171  	i.bpfs = filterer
   172  	i.useFilter = useFilter
   173  	i.reader = r
   174  	i.cmp = r.Compare
   175  	i.stats = stats
   176  	i.hideObsoletePoints = hideObsoletePoints
   177  	i.bufferPool = bufferPool
   178  	err = i.topLevelIndex.initHandle(i.cmp, topLevelIndexH, r.Properties.GlobalSeqNum, false)
   179  	if err != nil {
   180  		// blockIter.Close releases topLevelIndexH and always returns a nil error
   181  		_ = i.topLevelIndex.Close()
   182  		return err
   183  	}
   184  	i.dataRH = r.readable.NewReadHandle(ctx)
   185  	if r.tableFormat >= TableFormatPebblev3 {
   186  		if r.Properties.NumValueBlocks > 0 {
   187  			i.vbReader = &valueBlockReader{
   188  				bpOpen: i,
   189  				rp:     rp,
   190  				vbih:   r.valueBIH,
   191  				stats:  stats,
   192  			}
   193  			i.data.lazyValueHandling.vbr = i.vbReader
   194  			i.vbRH = r.readable.NewReadHandle(ctx)
   195  		}
   196  		i.data.lazyValueHandling.hasValuePrefix = true
   197  	}
   198  	return nil
   199  }
   200  
   201  func (i *twoLevelIterator) String() string {
   202  	if i.vState != nil {
   203  		return i.vState.fileNum.String()
   204  	}
   205  	return i.reader.fileNum.String()
   206  }
   207  
   208  // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
   209  // whether or not the last positioning method may have skipped any keys due to
   210  // block-property filters.
   211  func (i *twoLevelIterator) MaybeFilteredKeys() bool {
   212  	// While reading sstables with two-level indexes, knowledge of whether we've
   213  	// filtered keys is tracked separately for each index level. The
   214  	// seek-using-next optimizations have different criteria. We can only reset
   215  	// maybeFilteredKeys back to false during a seek when NOT using the
   216  	// fast-path that uses the current iterator position.
   217  	//
   218  	// If either level might have filtered keys to arrive at the current
   219  	// iterator position, return MaybeFilteredKeys=true.
   220  	return i.maybeFilteredKeysTwoLevel || i.maybeFilteredKeysSingleLevel
   221  }
   222  
   223  // SeekGE implements internalIterator.SeekGE, as documented in the pebble
   224  // package. Note that SeekGE only checks the upper bound. It is up to the
   225  // caller to ensure that key is greater than or equal to the lower bound.
   226  func (i *twoLevelIterator) SeekGE(
   227  	key []byte, flags base.SeekGEFlags,
   228  ) (*InternalKey, base.LazyValue) {
   229  	if i.vState != nil {
   230  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   231  		// have to internally restrict the bounds.
   232  		//
   233  		// TODO(bananabrick): We can optimize away this check for the level iter
   234  		// if necessary.
   235  		if i.cmp(key, i.lower) < 0 {
   236  			key = i.lower
   237  		}
   238  	}
   239  
   240  	err := i.err
   241  	i.err = nil // clear cached iteration error
   242  
   243  	// The twoLevelIterator could be already exhausted. Utilize that when
   244  	// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
   245  	// bounds-exhausted near the top of the file.
   246  	if flags.TrySeekUsingNext() &&
   247  		(i.exhaustedBounds == +1 || (i.data.isDataInvalidated() && i.index.isDataInvalidated())) &&
   248  		err == nil {
   249  		// Already exhausted, so return nil.
   250  		return nil, base.LazyValue{}
   251  	}
   252  
   253  	// SeekGE performs various step-instead-of-seeking optimizations: eg enabled
   254  	// by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
   255  	// Care must be taken to ensure that when performing these optimizations and
   256  	// the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
   257  	// Consider a previous SeekGE that filtered keys from k until the current
   258  	// iterator position.
   259  	//
   260  	// If the previous SeekGE exhausted the iterator while seeking within the
   261  	// two-level index, it's possible keys greater than or equal to the current
   262  	// search key were filtered through skipped index blocks. We must not reuse
   263  	// the position of the two-level index iterator without remembering the
   264  	// previous value of maybeFilteredKeys.
   265  
   266  	// We fall into the slow path if i.index.isDataInvalidated() even if the
   267  	// top-level iterator is already positioned correctly and all other
   268  	// conditions are met. An alternative structure could reuse topLevelIndex's
   269  	// current position and reload the index block to which it points. Arguably,
   270  	// an index block load is expensive and the index block may still be earlier
   271  	// than the index block containing the sought key, resulting in a wasteful
   272  	// block load.
   273  
   274  	var dontSeekWithinSingleLevelIter bool
   275  	if i.topLevelIndex.isDataInvalidated() || !i.topLevelIndex.valid() || i.index.isDataInvalidated() || err != nil ||
   276  		(i.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 {
   277  		// Slow-path: need to position the topLevelIndex.
   278  
   279  		// The previous exhausted state of singleLevelIterator is no longer
   280  		// relevant, since we may be moving to a different index block.
   281  		i.exhaustedBounds = 0
   282  		i.maybeFilteredKeysTwoLevel = false
   283  		flags = flags.DisableTrySeekUsingNext()
   284  		var ikey *InternalKey
   285  		if ikey, _ = i.topLevelIndex.SeekGE(key, flags); ikey == nil {
   286  			i.data.invalidate()
   287  			i.index.invalidate()
   288  			return nil, base.LazyValue{}
   289  		}
   290  
   291  		result := i.loadIndex(+1)
   292  		if result == loadBlockFailed {
   293  			i.boundsCmp = 0
   294  			return nil, base.LazyValue{}
   295  		}
   296  		if result == loadBlockIrrelevant {
   297  			// Enforce the upper bound here since don't want to bother moving
   298  			// to the next entry in the top level index if upper bound is
   299  			// already exceeded. Note that the next entry starts with keys >=
   300  			// ikey.UserKey since even though this is the block separator, the
   301  			// same user key can span multiple index blocks. If upper is
   302  			// exclusive we use >= below, else we use >.
   303  			if i.upper != nil {
   304  				cmp := i.cmp(ikey.UserKey, i.upper)
   305  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   306  					i.exhaustedBounds = +1
   307  				}
   308  			}
   309  			// Fall through to skipForward.
   310  			dontSeekWithinSingleLevelIter = true
   311  			// Clear boundsCmp.
   312  			//
   313  			// In the typical cases where dontSeekWithinSingleLevelIter=false,
   314  			// the singleLevelIterator.SeekGE call will clear boundsCmp.
   315  			// However, in this case where dontSeekWithinSingleLevelIter=true,
   316  			// we never seek on the single-level iterator. This call will fall
   317  			// through to skipForward, which may improperly leave boundsCmp=+1
   318  			// unless we clear it here.
   319  			i.boundsCmp = 0
   320  		}
   321  	} else {
   322  		// INVARIANT: err == nil.
   323  		//
   324  		// Else fast-path: There are two possible cases, from
   325  		// (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
   326  		//
   327  		// 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
   328  		// respecting the lower bound (guaranteed by Iterator). We know that the
   329  		// iterator must already be positioned within or just outside the previous
   330  		// bounds. Therefore, the topLevelIndex iter cannot be positioned at an
   331  		// entry ahead of the seek position (though it can be positioned behind).
   332  		// The !i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 confirms that it is
   333  		// not behind. Since it is not ahead and not behind it must be at the
   334  		// right position.
   335  		//
   336  		// 2) This SeekGE will land on a key that is greater than the key we are
   337  		// currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
   338  		// i.topLevelIndex.Key().UserKey) <= 0, we are at the correct lower level
   339  		// index block. No need to reset the state of singleLevelIterator.
   340  		//
   341  		// Note that cases 1 and 2 never overlap, and one of them must be true,
   342  		// but we have some test code (TestIterRandomizedMaybeFilteredKeys) that
   343  		// sets both to true, so we fix things here and then do an invariant
   344  		// check.
   345  		//
   346  		// This invariant checking is important enough that we do not gate it
   347  		// behind invariants.Enabled.
   348  		if i.boundsCmp > 0 {
   349  			// TODO(sumeer): fix TestIterRandomizedMaybeFilteredKeys so as to not
   350  			// need this behavior.
   351  			flags = flags.DisableTrySeekUsingNext()
   352  		}
   353  		if i.boundsCmp > 0 == flags.TrySeekUsingNext() {
   354  			panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
   355  				i.boundsCmp > 0, flags.TrySeekUsingNext()))
   356  		}
   357  
   358  		if !flags.TrySeekUsingNext() {
   359  			// Case 1. Bounds have changed so the previous exhausted bounds state is
   360  			// irrelevant.
   361  			// WARNING-data-exhausted: this is safe to do only because the monotonic
   362  			// bounds optimizations only work when !data-exhausted. If they also
   363  			// worked with data-exhausted, we have made it unclear whether
   364  			// data-exhausted is actually true. See the comment at the top of the
   365  			// file.
   366  			i.exhaustedBounds = 0
   367  		}
   368  		// Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
   369  		// preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
   370  		// bug https://github.com/cockroachdb/pebble/issues/2036.
   371  	}
   372  
   373  	if !dontSeekWithinSingleLevelIter {
   374  		// Note that while trySeekUsingNext could be false here, singleLevelIterator
   375  		// could do its own boundsCmp-based optimization to seek using next.
   376  		if ikey, val := i.singleLevelIterator.SeekGE(key, flags); ikey != nil {
   377  			return ikey, val
   378  		}
   379  	}
   380  	return i.skipForward()
   381  }
   382  
   383  // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
   384  // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
   385  // to the caller to ensure that key is greater than or equal to the lower bound.
   386  func (i *twoLevelIterator) SeekPrefixGE(
   387  	prefix, key []byte, flags base.SeekGEFlags,
   388  ) (*base.InternalKey, base.LazyValue) {
   389  	if i.vState != nil {
   390  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   391  		// have to internally restrict the bounds.
   392  		//
   393  		// TODO(bananabrick): We can optimize away this check for the level iter
   394  		// if necessary.
   395  		if i.cmp(key, i.lower) < 0 {
   396  			key = i.lower
   397  		}
   398  	}
   399  
   400  	// NOTE: prefix is only used for bloom filter checking and not later work in
   401  	// this method. Hence, we can use the existing iterator position if the last
   402  	// SeekPrefixGE did not fail bloom filter matching.
   403  
   404  	err := i.err
   405  	i.err = nil // clear cached iteration error
   406  
   407  	// The twoLevelIterator could be already exhausted. Utilize that when
   408  	// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
   409  	// bounds-exhausted near the top of the file.
   410  	filterUsedAndDidNotMatch :=
   411  		i.reader.tableFilter != nil && i.useFilter && !i.lastBloomFilterMatched
   412  	if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
   413  		(i.exhaustedBounds == +1 || (i.data.isDataInvalidated() && i.index.isDataInvalidated())) &&
   414  		err == nil {
   415  		// Already exhausted, so return nil.
   416  		return nil, base.LazyValue{}
   417  	}
   418  
   419  	// Check prefix bloom filter.
   420  	if i.reader.tableFilter != nil && i.useFilter {
   421  		if !i.lastBloomFilterMatched {
   422  			// Iterator is not positioned based on last seek.
   423  			flags = flags.DisableTrySeekUsingNext()
   424  		}
   425  		i.lastBloomFilterMatched = false
   426  		var dataH bufferHandle
   427  		dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats)
   428  		if i.err != nil {
   429  			i.data.invalidate()
   430  			return nil, base.LazyValue{}
   431  		}
   432  		mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
   433  		dataH.Release()
   434  		if !mayContain {
   435  			// This invalidation may not be necessary for correctness, and may
   436  			// be a place to optimize later by reusing the already loaded
   437  			// block. It was necessary in earlier versions of the code since
   438  			// the caller was allowed to call Next when SeekPrefixGE returned
   439  			// nil. This is no longer allowed.
   440  			i.data.invalidate()
   441  			return nil, base.LazyValue{}
   442  		}
   443  		i.lastBloomFilterMatched = true
   444  	}
   445  
   446  	// Bloom filter matches.
   447  
   448  	// SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
   449  	// enabled by trySeekUsingNext, or by monotonically increasing bounds
   450  	// (i.boundsCmp).  Care must be taken to ensure that when performing these
   451  	// optimizations and the iterator becomes exhausted,
   452  	// i.maybeFilteredKeysTwoLevel is set appropriately.  Consider a previous
   453  	// SeekPrefixGE that filtered keys from k until the current iterator
   454  	// position.
   455  	//
   456  	// If the previous SeekPrefixGE exhausted the iterator while seeking within
   457  	// the two-level index, it's possible keys greater than or equal to the
   458  	// current search key were filtered through skipped index blocks. We must
   459  	// not reuse the position of the two-level index iterator without
   460  	// remembering the previous value of maybeFilteredKeysTwoLevel.
   461  
   462  	// We fall into the slow path if i.index.isDataInvalidated() even if the
   463  	// top-level iterator is already positioned correctly and all other
   464  	// conditions are met. An alternative structure could reuse topLevelIndex's
   465  	// current position and reload the index block to which it points. Arguably,
   466  	// an index block load is expensive and the index block may still be earlier
   467  	// than the index block containing the sought key, resulting in a wasteful
   468  	// block load.
   469  
   470  	var dontSeekWithinSingleLevelIter bool
   471  	if i.topLevelIndex.isDataInvalidated() || !i.topLevelIndex.valid() || i.index.isDataInvalidated() || err != nil ||
   472  		(i.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 {
   473  		// Slow-path: need to position the topLevelIndex.
   474  
   475  		// The previous exhausted state of singleLevelIterator is no longer
   476  		// relevant, since we may be moving to a different index block.
   477  		i.exhaustedBounds = 0
   478  		i.maybeFilteredKeysTwoLevel = false
   479  		flags = flags.DisableTrySeekUsingNext()
   480  		var ikey *InternalKey
   481  		if ikey, _ = i.topLevelIndex.SeekGE(key, flags); ikey == nil {
   482  			i.data.invalidate()
   483  			i.index.invalidate()
   484  			return nil, base.LazyValue{}
   485  		}
   486  
   487  		result := i.loadIndex(+1)
   488  		if result == loadBlockFailed {
   489  			i.boundsCmp = 0
   490  			return nil, base.LazyValue{}
   491  		}
   492  		if result == loadBlockIrrelevant {
   493  			// Enforce the upper bound here since don't want to bother moving
   494  			// to the next entry in the top level index if upper bound is
   495  			// already exceeded. Note that the next entry starts with keys >=
   496  			// ikey.UserKey since even though this is the block separator, the
   497  			// same user key can span multiple index blocks. If upper is
   498  			// exclusive we use >= below, else we use >.
   499  			if i.upper != nil {
   500  				cmp := i.cmp(ikey.UserKey, i.upper)
   501  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   502  					i.exhaustedBounds = +1
   503  				}
   504  			}
   505  			// Fall through to skipForward.
   506  			dontSeekWithinSingleLevelIter = true
   507  			// Clear boundsCmp.
   508  			//
   509  			// In the typical cases where dontSeekWithinSingleLevelIter=false,
   510  			// the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
   511  			// However, in this case where dontSeekWithinSingleLevelIter=true,
   512  			// we never seek on the single-level iterator. This call will fall
   513  			// through to skipForward, which may improperly leave boundsCmp=+1
   514  			// unless we clear it here.
   515  			i.boundsCmp = 0
   516  		}
   517  	} else {
   518  		// INVARIANT: err == nil.
   519  		//
   520  		// Else fast-path: There are two possible cases, from
   521  		// (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
   522  		//
   523  		// 1) The bounds have moved forward (i.boundsCmp > 0) and this
   524  		// SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
   525  		// know that the iterator must already be positioned within or just
   526  		// outside the previous bounds. Therefore, the topLevelIndex iter cannot
   527  		// be positioned at an entry ahead of the seek position (though it can be
   528  		// positioned behind). The !i.cmp(key, i.topLevelIndex.Key().UserKey) > 0
   529  		// confirms that it is not behind. Since it is not ahead and not behind it
   530  		// must be at the right position.
   531  		//
   532  		// 2) This SeekPrefixGE will land on a key that is greater than the key we
   533  		// are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
   534  		// i.topLevelIndex.Key().UserKey) <= 0, we are at the correct lower level
   535  		// index block. No need to reset the state of singleLevelIterator.
   536  		//
   537  		// Note that cases 1 and 2 never overlap, and one of them must be true.
   538  		// This invariant checking is important enough that we do not gate it
   539  		// behind invariants.Enabled.
   540  		if i.boundsCmp > 0 == flags.TrySeekUsingNext() {
   541  			panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
   542  				i.boundsCmp > 0, flags.TrySeekUsingNext()))
   543  		}
   544  
   545  		if !flags.TrySeekUsingNext() {
   546  			// Case 1. Bounds have changed so the previous exhausted bounds state is
   547  			// irrelevant.
   548  			// WARNING-data-exhausted: this is safe to do only because the monotonic
   549  			// bounds optimizations only work when !data-exhausted. If they also
   550  			// worked with data-exhausted, we have made it unclear whether
   551  			// data-exhausted is actually true. See the comment at the top of the
   552  			// file.
   553  			i.exhaustedBounds = 0
   554  		}
   555  		// Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
   556  		// preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
   557  		// bug https://github.com/cockroachdb/pebble/issues/2036.
   558  	}
   559  
   560  	if !dontSeekWithinSingleLevelIter {
   561  		if ikey, val := i.singleLevelIterator.seekPrefixGE(
   562  			prefix, key, flags, false /* checkFilter */); ikey != nil {
   563  			return ikey, val
   564  		}
   565  	}
   566  	// NB: skipForward checks whether exhaustedBounds is already +1.
   567  	return i.skipForward()
   568  }
   569  
   570  // virtualLast should only be called if i.vReader != nil and i.endKeyInclusive
   571  // is true.
   572  func (i *twoLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
   573  	if i.vState == nil {
   574  		panic("pebble: invalid call to virtualLast")
   575  	}
   576  
   577  	// Seek to the first internal key.
   578  	ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
   579  	if i.endKeyInclusive {
   580  		// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
   581  		// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
   582  		// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
   583  		// the last key with userkey == i.upper.
   584  		//
   585  		// TODO(bananabrick): Think about how to improve this. If many internal keys
   586  		// with the same user key at the upper bound then this could be slow, but
   587  		// maybe the odds of having many internal keys with the same user key at the
   588  		// upper bound are low.
   589  		for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
   590  			ikey, _ = i.Next()
   591  		}
   592  		return i.Prev()
   593  	}
   594  	// We seeked to the first key >= i.upper.
   595  	return i.Prev()
   596  }
   597  
   598  // SeekLT implements internalIterator.SeekLT, as documented in the pebble
   599  // package. Note that SeekLT only checks the lower bound. It is up to the
   600  // caller to ensure that key is less than the upper bound.
   601  func (i *twoLevelIterator) SeekLT(
   602  	key []byte, flags base.SeekLTFlags,
   603  ) (*InternalKey, base.LazyValue) {
   604  	if i.vState != nil {
   605  		// Might have to fix upper bound since virtual sstable bounds are not
   606  		// known to callers of SeekLT.
   607  		//
   608  		// TODO(bananabrick): We can optimize away this check for the level iter
   609  		// if necessary.
   610  		cmp := i.cmp(key, i.upper)
   611  		// key == i.upper is fine. We'll do the right thing and return the
   612  		// first internal key with user key < key.
   613  		if cmp > 0 {
   614  			return i.virtualLast()
   615  		}
   616  	}
   617  
   618  	i.exhaustedBounds = 0
   619  	i.err = nil // clear cached iteration error
   620  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   621  	i.boundsCmp = 0
   622  
   623  	var result loadBlockResult
   624  	var ikey *InternalKey
   625  	// NB: Unlike SeekGE, we don't have a fast-path here since we don't know
   626  	// whether the topLevelIndex is positioned after the position that would
   627  	// be returned by doing i.topLevelIndex.SeekGE(). To know this we would
   628  	// need to know the index key preceding the current one.
   629  	// NB: If a bound-limited block property filter is configured, it's
   630  	// externally ensured that the filter is disabled (through returning
   631  	// Intersects=false irrespective of the block props provided) during seeks.
   632  	i.maybeFilteredKeysTwoLevel = false
   633  	if ikey, _ = i.topLevelIndex.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
   634  		if ikey, _ = i.topLevelIndex.Last(); ikey == nil {
   635  			i.data.invalidate()
   636  			i.index.invalidate()
   637  			return nil, base.LazyValue{}
   638  		}
   639  
   640  		result = i.loadIndex(-1)
   641  		if result == loadBlockFailed {
   642  			return nil, base.LazyValue{}
   643  		}
   644  		if result == loadBlockOK {
   645  			if ikey, val := i.singleLevelIterator.lastInternal(); ikey != nil {
   646  				return i.maybeVerifyKey(ikey, val)
   647  			}
   648  			// Fall through to skipBackward since the singleLevelIterator did
   649  			// not have any blocks that satisfy the block interval
   650  			// constraints, or the lower bound was reached.
   651  		}
   652  		// Else loadBlockIrrelevant, so fall through.
   653  	} else {
   654  		result = i.loadIndex(-1)
   655  		if result == loadBlockFailed {
   656  			return nil, base.LazyValue{}
   657  		}
   658  		if result == loadBlockOK {
   659  			if ikey, val := i.singleLevelIterator.SeekLT(key, flags); ikey != nil {
   660  				return i.maybeVerifyKey(ikey, val)
   661  			}
   662  			// Fall through to skipBackward since the singleLevelIterator did
   663  			// not have any blocks that satisfy the block interval
   664  			// constraint, or the lower bound was reached.
   665  		}
   666  		// Else loadBlockIrrelevant, so fall through.
   667  	}
   668  	if result == loadBlockIrrelevant {
   669  		// Enforce the lower bound here since don't want to bother moving to
   670  		// the previous entry in the top level index if lower bound is already
   671  		// exceeded. Note that the previous entry starts with keys <=
   672  		// ikey.UserKey since even though this is the current block's
   673  		// separator, the same user key can span multiple index blocks.
   674  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   675  			i.exhaustedBounds = -1
   676  		}
   677  	}
   678  	// NB: skipBackward checks whether exhaustedBounds is already -1.
   679  	return i.skipBackward()
   680  }
   681  
   682  // First implements internalIterator.First, as documented in the pebble
   683  // package. Note that First only checks the upper bound. It is up to the caller
   684  // to ensure that key is greater than or equal to the lower bound (e.g. via a
   685  // call to SeekGE(lower)).
   686  func (i *twoLevelIterator) First() (*InternalKey, base.LazyValue) {
   687  	// If the iterator was created on a virtual sstable, we will SeekGE to the
   688  	// lower bound instead of using First, because First does not respect
   689  	// bounds.
   690  	if i.vState != nil {
   691  		return i.SeekGE(i.lower, base.SeekGEFlagsNone)
   692  	}
   693  
   694  	if i.lower != nil {
   695  		panic("twoLevelIterator.First() used despite lower bound")
   696  	}
   697  	i.exhaustedBounds = 0
   698  	i.maybeFilteredKeysTwoLevel = false
   699  	i.err = nil // clear cached iteration error
   700  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   701  	i.boundsCmp = 0
   702  
   703  	var ikey *InternalKey
   704  	if ikey, _ = i.topLevelIndex.First(); ikey == nil {
   705  		return nil, base.LazyValue{}
   706  	}
   707  
   708  	result := i.loadIndex(+1)
   709  	if result == loadBlockFailed {
   710  		return nil, base.LazyValue{}
   711  	}
   712  	if result == loadBlockOK {
   713  		if ikey, val := i.singleLevelIterator.First(); ikey != nil {
   714  			return ikey, val
   715  		}
   716  		// Else fall through to skipForward.
   717  	} else {
   718  		// result == loadBlockIrrelevant. Enforce the upper bound here since
   719  		// don't want to bother moving to the next entry in the top level
   720  		// index if upper bound is already exceeded. Note that the next entry
   721  		// starts with keys >= ikey.UserKey since even though this is the
   722  		// block separator, the same user key can span multiple index blocks.
   723  		// If upper is exclusive we use >= below, else we use >.
   724  		if i.upper != nil {
   725  			cmp := i.cmp(ikey.UserKey, i.upper)
   726  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   727  				i.exhaustedBounds = +1
   728  			}
   729  		}
   730  	}
   731  	// NB: skipForward checks whether exhaustedBounds is already +1.
   732  	return i.skipForward()
   733  }
   734  
   735  // Last implements internalIterator.Last, as documented in the pebble
   736  // package. Note that Last only checks the lower bound. It is up to the caller
   737  // to ensure that key is less than the upper bound (e.g. via a call to
   738  // SeekLT(upper))
   739  func (i *twoLevelIterator) Last() (*InternalKey, base.LazyValue) {
   740  	if i.vState != nil {
   741  		if i.endKeyInclusive {
   742  			return i.virtualLast()
   743  		}
   744  		return i.SeekLT(i.upper, base.SeekLTFlagsNone)
   745  	}
   746  
   747  	if i.upper != nil {
   748  		panic("twoLevelIterator.Last() used despite upper bound")
   749  	}
   750  	i.exhaustedBounds = 0
   751  	i.maybeFilteredKeysTwoLevel = false
   752  	i.err = nil // clear cached iteration error
   753  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   754  	i.boundsCmp = 0
   755  
   756  	var ikey *InternalKey
   757  	if ikey, _ = i.topLevelIndex.Last(); ikey == nil {
   758  		return nil, base.LazyValue{}
   759  	}
   760  
   761  	result := i.loadIndex(-1)
   762  	if result == loadBlockFailed {
   763  		return nil, base.LazyValue{}
   764  	}
   765  	if result == loadBlockOK {
   766  		if ikey, val := i.singleLevelIterator.Last(); ikey != nil {
   767  			return ikey, val
   768  		}
   769  		// Else fall through to skipBackward.
   770  	} else {
   771  		// result == loadBlockIrrelevant. Enforce the lower bound here
   772  		// since don't want to bother moving to the previous entry in the
   773  		// top level index if lower bound is already exceeded. Note that
   774  		// the previous entry starts with keys <= ikey.UserKey since even
   775  		// though this is the current block's separator, the same user key
   776  		// can span multiple index blocks.
   777  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   778  			i.exhaustedBounds = -1
   779  		}
   780  	}
   781  	// NB: skipBackward checks whether exhaustedBounds is already -1.
   782  	return i.skipBackward()
   783  }
   784  
   785  // Next implements internalIterator.Next, as documented in the pebble
   786  // package.
   787  // Note: twoLevelCompactionIterator.Next mirrors the implementation of
   788  // twoLevelIterator.Next due to performance. Keep the two in sync.
   789  func (i *twoLevelIterator) Next() (*InternalKey, base.LazyValue) {
   790  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   791  	i.boundsCmp = 0
   792  	i.maybeFilteredKeysTwoLevel = false
   793  	if i.err != nil {
   794  		// TODO(jackson): Can this case be turned into a panic? Once an error is
   795  		// encountered, the iterator must be re-seeked.
   796  		return nil, base.LazyValue{}
   797  	}
   798  	if key, val := i.singleLevelIterator.Next(); key != nil {
   799  		return key, val
   800  	}
   801  	return i.skipForward()
   802  }
   803  
   804  // NextPrefix implements (base.InternalIterator).NextPrefix.
   805  func (i *twoLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
   806  	if i.exhaustedBounds == +1 {
   807  		panic("Next called even though exhausted upper bound")
   808  	}
   809  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   810  	i.boundsCmp = 0
   811  	i.maybeFilteredKeysTwoLevel = false
   812  	if i.err != nil {
   813  		// TODO(jackson): Can this case be turned into a panic? Once an error is
   814  		// encountered, the iterator must be re-seeked.
   815  		return nil, base.LazyValue{}
   816  	}
   817  	if key, val := i.singleLevelIterator.NextPrefix(succKey); key != nil {
   818  		return key, val
   819  	}
   820  	// key == nil
   821  	if i.err != nil {
   822  		return nil, base.LazyValue{}
   823  	}
   824  
   825  	// Did not find prefix in the existing second-level index block. This is the
   826  	// slow-path where we seek the iterator.
   827  	var ikey *InternalKey
   828  	if ikey, _ = i.topLevelIndex.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
   829  		i.data.invalidate()
   830  		i.index.invalidate()
   831  		return nil, base.LazyValue{}
   832  	}
   833  	result := i.loadIndex(+1)
   834  	if result == loadBlockFailed {
   835  		return nil, base.LazyValue{}
   836  	}
   837  	if result == loadBlockIrrelevant {
   838  		// Enforce the upper bound here since don't want to bother moving to the
   839  		// next entry in the top level index if upper bound is already exceeded.
   840  		// Note that the next entry starts with keys >= ikey.UserKey since even
   841  		// though this is the block separator, the same user key can span multiple
   842  		// index blocks. If upper is exclusive we use >= below, else we use >.
   843  		if i.upper != nil {
   844  			cmp := i.cmp(ikey.UserKey, i.upper)
   845  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   846  				i.exhaustedBounds = +1
   847  			}
   848  		}
   849  	} else if key, val := i.singleLevelIterator.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
   850  		return i.maybeVerifyKey(key, val)
   851  	}
   852  	return i.skipForward()
   853  }
   854  
   855  // Prev implements internalIterator.Prev, as documented in the pebble
   856  // package.
   857  func (i *twoLevelIterator) Prev() (*InternalKey, base.LazyValue) {
   858  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   859  	i.boundsCmp = 0
   860  	i.maybeFilteredKeysTwoLevel = false
   861  	if i.err != nil {
   862  		return nil, base.LazyValue{}
   863  	}
   864  	if key, val := i.singleLevelIterator.Prev(); key != nil {
   865  		return key, val
   866  	}
   867  	return i.skipBackward()
   868  }
   869  
   870  func (i *twoLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
   871  	for {
   872  		if i.err != nil || i.exhaustedBounds > 0 {
   873  			return nil, base.LazyValue{}
   874  		}
   875  		i.exhaustedBounds = 0
   876  		var ikey *InternalKey
   877  		if ikey, _ = i.topLevelIndex.Next(); ikey == nil {
   878  			i.data.invalidate()
   879  			i.index.invalidate()
   880  			return nil, base.LazyValue{}
   881  		}
   882  		result := i.loadIndex(+1)
   883  		if result == loadBlockFailed {
   884  			return nil, base.LazyValue{}
   885  		}
   886  		if result == loadBlockOK {
   887  			if ikey, val := i.singleLevelIterator.firstInternal(); ikey != nil {
   888  				return i.maybeVerifyKey(ikey, val)
   889  			}
   890  			// Next iteration will return if singleLevelIterator set
   891  			// exhaustedBounds = +1.
   892  		} else {
   893  			// result == loadBlockIrrelevant. Enforce the upper bound here
   894  			// since don't want to bother moving to the next entry in the top
   895  			// level index if upper bound is already exceeded. Note that the
   896  			// next entry starts with keys >= ikey.UserKey since even though
   897  			// this is the block separator, the same user key can span
   898  			// multiple index blocks. If upper is exclusive we use >=
   899  			// below, else we use >.
   900  			if i.upper != nil {
   901  				cmp := i.cmp(ikey.UserKey, i.upper)
   902  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   903  					i.exhaustedBounds = +1
   904  					// Next iteration will return.
   905  				}
   906  			}
   907  		}
   908  	}
   909  }
   910  
   911  func (i *twoLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
   912  	for {
   913  		if i.err != nil || i.exhaustedBounds < 0 {
   914  			return nil, base.LazyValue{}
   915  		}
   916  		i.exhaustedBounds = 0
   917  		var ikey *InternalKey
   918  		if ikey, _ = i.topLevelIndex.Prev(); ikey == nil {
   919  			i.data.invalidate()
   920  			i.index.invalidate()
   921  			return nil, base.LazyValue{}
   922  		}
   923  		result := i.loadIndex(-1)
   924  		if result == loadBlockFailed {
   925  			return nil, base.LazyValue{}
   926  		}
   927  		if result == loadBlockOK {
   928  			if ikey, val := i.singleLevelIterator.lastInternal(); ikey != nil {
   929  				return i.maybeVerifyKey(ikey, val)
   930  			}
   931  			// Next iteration will return if singleLevelIterator set
   932  			// exhaustedBounds = -1.
   933  		} else {
   934  			// result == loadBlockIrrelevant. Enforce the lower bound here
   935  			// since don't want to bother moving to the previous entry in the
   936  			// top level index if lower bound is already exceeded. Note that
   937  			// the previous entry starts with keys <= ikey.UserKey since even
   938  			// though this is the current block's separator, the same user key
   939  			// can span multiple index blocks.
   940  			if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   941  				i.exhaustedBounds = -1
   942  				// Next iteration will return.
   943  			}
   944  		}
   945  	}
   946  }
   947  
   948  // Close implements internalIterator.Close, as documented in the pebble
   949  // package.
   950  func (i *twoLevelIterator) Close() error {
   951  	i.iterStats.close()
   952  	var err error
   953  	if i.closeHook != nil {
   954  		err = firstError(err, i.closeHook(i))
   955  	}
   956  	err = firstError(err, i.data.Close())
   957  	err = firstError(err, i.index.Close())
   958  	err = firstError(err, i.topLevelIndex.Close())
   959  	if i.dataRH != nil {
   960  		err = firstError(err, i.dataRH.Close())
   961  		i.dataRH = nil
   962  	}
   963  	err = firstError(err, i.err)
   964  	if i.bpfs != nil {
   965  		releaseBlockPropertiesFilterer(i.bpfs)
   966  	}
   967  	if i.vbReader != nil {
   968  		i.vbReader.close()
   969  	}
   970  	if i.vbRH != nil {
   971  		err = firstError(err, i.vbRH.Close())
   972  		i.vbRH = nil
   973  	}
   974  	*i = twoLevelIterator{
   975  		singleLevelIterator: i.singleLevelIterator.resetForReuse(),
   976  		topLevelIndex:       i.topLevelIndex.resetForReuse(),
   977  	}
   978  	twoLevelIterPool.Put(i)
   979  	return err
   980  }
   981  
   982  // Note: twoLevelCompactionIterator and compactionIterator are very similar but
   983  // were separated due to performance.
   984  type twoLevelCompactionIterator struct {
   985  	*twoLevelIterator
   986  	bytesIterated *uint64
   987  	prevOffset    uint64
   988  }
   989  
   990  // twoLevelCompactionIterator implements the base.InternalIterator interface.
   991  var _ base.InternalIterator = (*twoLevelCompactionIterator)(nil)
   992  
   993  func (i *twoLevelCompactionIterator) Close() error {
   994  	return i.twoLevelIterator.Close()
   995  }
   996  
   997  func (i *twoLevelCompactionIterator) SeekGE(
   998  	key []byte, flags base.SeekGEFlags,
   999  ) (*InternalKey, base.LazyValue) {
  1000  	panic("pebble: SeekGE unimplemented")
  1001  }
  1002  
  1003  func (i *twoLevelCompactionIterator) SeekPrefixGE(
  1004  	prefix, key []byte, flags base.SeekGEFlags,
  1005  ) (*base.InternalKey, base.LazyValue) {
  1006  	panic("pebble: SeekPrefixGE unimplemented")
  1007  }
  1008  
  1009  func (i *twoLevelCompactionIterator) SeekLT(
  1010  	key []byte, flags base.SeekLTFlags,
  1011  ) (*InternalKey, base.LazyValue) {
  1012  	panic("pebble: SeekLT unimplemented")
  1013  }
  1014  
  1015  func (i *twoLevelCompactionIterator) First() (*InternalKey, base.LazyValue) {
  1016  	i.err = nil // clear cached iteration error
  1017  	return i.skipForward(i.twoLevelIterator.First())
  1018  }
  1019  
  1020  func (i *twoLevelCompactionIterator) Last() (*InternalKey, base.LazyValue) {
  1021  	panic("pebble: Last unimplemented")
  1022  }
  1023  
  1024  // Note: twoLevelCompactionIterator.Next mirrors the implementation of
  1025  // twoLevelIterator.Next due to performance. Keep the two in sync.
  1026  func (i *twoLevelCompactionIterator) Next() (*InternalKey, base.LazyValue) {
  1027  	if i.err != nil {
  1028  		return nil, base.LazyValue{}
  1029  	}
  1030  	return i.skipForward(i.singleLevelIterator.Next())
  1031  }
  1032  
  1033  func (i *twoLevelCompactionIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
  1034  	panic("pebble: NextPrefix unimplemented")
  1035  }
  1036  
  1037  func (i *twoLevelCompactionIterator) Prev() (*InternalKey, base.LazyValue) {
  1038  	panic("pebble: Prev unimplemented")
  1039  }
  1040  
  1041  func (i *twoLevelCompactionIterator) String() string {
  1042  	if i.vState != nil {
  1043  		return i.vState.fileNum.String()
  1044  	}
  1045  	return i.reader.fileNum.String()
  1046  }
  1047  
  1048  func (i *twoLevelCompactionIterator) skipForward(
  1049  	key *InternalKey, val base.LazyValue,
  1050  ) (*InternalKey, base.LazyValue) {
  1051  	if key == nil {
  1052  		for {
  1053  			if key, _ := i.topLevelIndex.Next(); key == nil {
  1054  				break
  1055  			}
  1056  			result := i.loadIndex(+1)
  1057  			if result != loadBlockOK {
  1058  				if i.err != nil {
  1059  					break
  1060  				}
  1061  				switch result {
  1062  				case loadBlockFailed:
  1063  					// We checked that i.index was at a valid entry, so
  1064  					// loadBlockFailed could not have happened due to to i.index
  1065  					// being exhausted, and must be due to an error.
  1066  					panic("loadBlock should not have failed with no error")
  1067  				case loadBlockIrrelevant:
  1068  					panic("compactionIter should not be using block intervals for skipping")
  1069  				default:
  1070  					panic(fmt.Sprintf("unexpected case %d", result))
  1071  				}
  1072  			}
  1073  			// result == loadBlockOK
  1074  			if key, val = i.singleLevelIterator.First(); key != nil {
  1075  				break
  1076  			}
  1077  		}
  1078  	}
  1079  
  1080  	curOffset := i.recordOffset()
  1081  	*i.bytesIterated += uint64(curOffset - i.prevOffset)
  1082  	i.prevOffset = curOffset
  1083  
  1084  	if i.vState != nil && key != nil {
  1085  		cmp := i.cmp(key.UserKey, i.vState.upper.UserKey)
  1086  		if cmp > 0 || (i.vState.upper.IsExclusiveSentinel() && cmp == 0) {
  1087  			return nil, base.LazyValue{}
  1088  		}
  1089  	}
  1090  
  1091  	return key, val
  1092  }