github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/reader_iter_two_lvl.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  
    11  	"github.com/cockroachdb/pebble/internal/base"
    12  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing"
    13  )
    14  
    15  type twoLevelIterator struct {
    16  	singleLevelIterator
    17  	// maybeFilteredKeysSingleLevel indicates whether the last iterator
    18  	// positioning operation may have skipped any index blocks due to
    19  	// block-property filters when positioning the top-level-index.
    20  	maybeFilteredKeysTwoLevel bool
    21  	topLevelIndex             blockIter
    22  }
    23  
    24  // twoLevelIterator implements the base.InternalIterator interface.
    25  var _ base.InternalIterator = (*twoLevelIterator)(nil)
    26  
    27  // loadIndex loads the index block at the current top level index position and
    28  // leaves i.index unpositioned. If unsuccessful, it gets i.err to any error
    29  // encountered, which may be nil if we have simply exhausted the entire table.
    30  // This is used for two level indexes.
    31  func (i *twoLevelIterator) loadIndex(dir int8) loadBlockResult {
    32  	// Ensure the index data block iterators are invalidated even if loading of
    33  	// the index fails.
    34  	i.data.invalidate()
    35  	i.index.invalidate()
    36  	if !i.topLevelIndex.valid() {
    37  		i.index.offset = 0
    38  		i.index.restarts = 0
    39  		return loadBlockFailed
    40  	}
    41  	v := i.topLevelIndex.value()
    42  	bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
    43  	if err != nil {
    44  		i.err = base.CorruptionErrorf("pebble/table: corrupt top level index entry")
    45  		return loadBlockFailed
    46  	}
    47  	if i.bpfs != nil {
    48  		intersects, err := i.bpfs.intersects(bhp.Props)
    49  		if err != nil {
    50  			i.err = errCorruptIndexEntry
    51  			return loadBlockFailed
    52  		}
    53  		if intersects == blockMaybeExcluded {
    54  			intersects = i.resolveMaybeExcluded(dir)
    55  		}
    56  		if intersects == blockExcluded {
    57  			i.maybeFilteredKeysTwoLevel = true
    58  			return loadBlockIrrelevant
    59  		}
    60  		// blockIntersects
    61  	}
    62  	ctx := objiotracing.WithBlockType(i.ctx, objiotracing.MetadataBlock)
    63  	indexBlock, err := i.reader.readBlock(ctx, bhp.BlockHandle, nil /* transform */, nil /* readHandle */, i.stats, i.bufferPool)
    64  	if err != nil {
    65  		i.err = err
    66  		return loadBlockFailed
    67  	}
    68  	if i.err = i.index.initHandle(i.cmp, indexBlock, i.reader.Properties.GlobalSeqNum, false); i.err == nil {
    69  		return loadBlockOK
    70  	}
    71  	return loadBlockFailed
    72  }
    73  
    74  // resolveMaybeExcluded is invoked when the block-property filterer has found
    75  // that an index block is excluded according to its properties but only if its
    76  // bounds fall within the filter's current bounds. This function consults the
    77  // apprioriate bound, depending on the iteration direction, and returns either
    78  // `blockIntersects` or
    79  // `blockMaybeExcluded`.
    80  func (i *twoLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
    81  	// This iterator is configured with a bound-limited block property filter.
    82  	// The bpf determined this entire index block could be excluded from
    83  	// iteration based on the property encoded in the block handle. However, we
    84  	// still need to determine if the index block is wholly contained within the
    85  	// filter's key bounds.
    86  	//
    87  	// External guarantees ensure all its data blocks' keys are ≥ the filter's
    88  	// lower bound during forward iteration, and that all its data blocks' keys
    89  	// are < the filter's upper bound during backward iteration. We only need to
    90  	// determine if the opposite bound is also met.
    91  	//
    92  	// The index separator in topLevelIndex.Key() provides an inclusive
    93  	// upper-bound for the index block's keys, guaranteeing that all its keys
    94  	// are ≤ topLevelIndex.Key(). For forward iteration, this is all we need.
    95  	if dir > 0 {
    96  		// Forward iteration.
    97  		if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.topLevelIndex.Key().UserKey) {
    98  			return blockExcluded
    99  		}
   100  		return blockIntersects
   101  	}
   102  
   103  	// Reverse iteration.
   104  	//
   105  	// Because we're iterating in the reverse direction, we don't yet have
   106  	// enough context available to determine if the block is wholly contained
   107  	// within its bounds. This case arises only during backward iteration,
   108  	// because of the way the index is structured.
   109  	//
   110  	// Consider a bound-limited bpf limited to the bounds [b,d), loading the
   111  	// block with separator `c`. During reverse iteration, the guarantee that
   112  	// all the block's keys are < `d` is externally provided, but no guarantee
   113  	// is made on the bpf's lower bound. The separator `c` only provides an
   114  	// inclusive upper bound on the block's keys, indicating that the
   115  	// corresponding block handle points to a block containing only keys ≤ `c`.
   116  	//
   117  	// To establish a lower bound, we step the top-level index backwards to read
   118  	// the previous block's separator, which provides an inclusive lower bound
   119  	// on the original index block's keys. Afterwards, we step forward to
   120  	// restore our top-level index position.
   121  	if peekKey, _ := i.topLevelIndex.Prev(); peekKey == nil {
   122  		// The original block points to the first index block of this table. If
   123  		// we knew the lower bound for the entire table, it could provide a
   124  		// lower bound, but the code refactoring necessary to read it doesn't
   125  		// seem worth the payoff. We fall through to loading the block.
   126  	} else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) {
   127  		// The lower-bound on the original index block falls within the filter's
   128  		// bounds, and we can skip the block (after restoring our current
   129  		// top-level index position).
   130  		_, _ = i.topLevelIndex.Next()
   131  		return blockExcluded
   132  	}
   133  	_, _ = i.topLevelIndex.Next()
   134  	return blockIntersects
   135  }
   136  
   137  // Note that lower, upper passed into init has nothing to do with virtual sstable
   138  // bounds. If the virtualState passed in is not nil, then virtual sstable bounds
   139  // will be enforced.
   140  func (i *twoLevelIterator) init(
   141  	ctx context.Context,
   142  	r *Reader,
   143  	v *virtualState,
   144  	lower, upper []byte,
   145  	filterer *BlockPropertiesFilterer,
   146  	useFilter, hideObsoletePoints bool,
   147  	stats *base.InternalIteratorStats,
   148  	rp ReaderProvider,
   149  	bufferPool *BufferPool,
   150  ) error {
   151  	if r.err != nil {
   152  		return r.err
   153  	}
   154  	topLevelIndexH, err := r.readIndex(ctx, stats)
   155  	if err != nil {
   156  		return err
   157  	}
   158  	if v != nil {
   159  		i.vState = v
   160  		// Note that upper is exclusive here.
   161  		i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */)
   162  	}
   163  
   164  	i.ctx = ctx
   165  	i.lower = lower
   166  	i.upper = upper
   167  	i.bpfs = filterer
   168  	i.useFilter = useFilter
   169  	i.reader = r
   170  	i.cmp = r.Compare
   171  	i.stats = stats
   172  	i.hideObsoletePoints = hideObsoletePoints
   173  	i.bufferPool = bufferPool
   174  	err = i.topLevelIndex.initHandle(i.cmp, topLevelIndexH, r.Properties.GlobalSeqNum, false)
   175  	if err != nil {
   176  		// blockIter.Close releases topLevelIndexH and always returns a nil error
   177  		_ = i.topLevelIndex.Close()
   178  		return err
   179  	}
   180  	i.dataRH = r.readable.NewReadHandle(ctx)
   181  	if r.tableFormat >= TableFormatPebblev3 {
   182  		if r.Properties.NumValueBlocks > 0 {
   183  			i.vbReader = &valueBlockReader{
   184  				ctx:    ctx,
   185  				bpOpen: i,
   186  				rp:     rp,
   187  				vbih:   r.valueBIH,
   188  				stats:  stats,
   189  			}
   190  			i.data.lazyValueHandling.vbr = i.vbReader
   191  			i.vbRH = r.readable.NewReadHandle(ctx)
   192  		}
   193  		i.data.lazyValueHandling.hasValuePrefix = true
   194  	}
   195  	return nil
   196  }
   197  
   198  func (i *twoLevelIterator) String() string {
   199  	if i.vState != nil {
   200  		return i.vState.fileNum.String()
   201  	}
   202  	return i.reader.fileNum.String()
   203  }
   204  
   205  // MaybeFilteredKeys may be called when an iterator is exhausted to indicate
   206  // whether or not the last positioning method may have skipped any keys due to
   207  // block-property filters.
   208  func (i *twoLevelIterator) MaybeFilteredKeys() bool {
   209  	// While reading sstables with two-level indexes, knowledge of whether we've
   210  	// filtered keys is tracked separately for each index level. The
   211  	// seek-using-next optimizations have different criteria. We can only reset
   212  	// maybeFilteredKeys back to false during a seek when NOT using the
   213  	// fast-path that uses the current iterator position.
   214  	//
   215  	// If either level might have filtered keys to arrive at the current
   216  	// iterator position, return MaybeFilteredKeys=true.
   217  	return i.maybeFilteredKeysTwoLevel || i.maybeFilteredKeysSingleLevel
   218  }
   219  
   220  // SeekGE implements internalIterator.SeekGE, as documented in the pebble
   221  // package. Note that SeekGE only checks the upper bound. It is up to the
   222  // caller to ensure that key is greater than or equal to the lower bound.
   223  func (i *twoLevelIterator) SeekGE(
   224  	key []byte, flags base.SeekGEFlags,
   225  ) (*InternalKey, base.LazyValue) {
   226  	if i.vState != nil {
   227  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   228  		// have to internally restrict the bounds.
   229  		//
   230  		// TODO(bananabrick): We can optimize away this check for the level iter
   231  		// if necessary.
   232  		if i.cmp(key, i.lower) < 0 {
   233  			key = i.lower
   234  		}
   235  	}
   236  
   237  	err := i.err
   238  	i.err = nil // clear cached iteration error
   239  
   240  	// The twoLevelIterator could be already exhausted. Utilize that when
   241  	// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
   242  	// bounds-exhausted near the top of the file.
   243  	if flags.TrySeekUsingNext() &&
   244  		(i.exhaustedBounds == +1 || (i.data.isDataInvalidated() && i.index.isDataInvalidated())) &&
   245  		err == nil {
   246  		// Already exhausted, so return nil.
   247  		return nil, base.LazyValue{}
   248  	}
   249  
   250  	// SeekGE performs various step-instead-of-seeking optimizations: eg enabled
   251  	// by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp).
   252  	// Care must be taken to ensure that when performing these optimizations and
   253  	// the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately.
   254  	// Consider a previous SeekGE that filtered keys from k until the current
   255  	// iterator position.
   256  	//
   257  	// If the previous SeekGE exhausted the iterator while seeking within the
   258  	// two-level index, it's possible keys greater than or equal to the current
   259  	// search key were filtered through skipped index blocks. We must not reuse
   260  	// the position of the two-level index iterator without remembering the
   261  	// previous value of maybeFilteredKeys.
   262  
   263  	// We fall into the slow path if i.index.isDataInvalidated() even if the
   264  	// top-level iterator is already positioned correctly and all other
   265  	// conditions are met. An alternative structure could reuse topLevelIndex's
   266  	// current position and reload the index block to which it points. Arguably,
   267  	// an index block load is expensive and the index block may still be earlier
   268  	// than the index block containing the sought key, resulting in a wasteful
   269  	// block load.
   270  
   271  	var dontSeekWithinSingleLevelIter bool
   272  	if i.topLevelIndex.isDataInvalidated() || !i.topLevelIndex.valid() || i.index.isDataInvalidated() || err != nil ||
   273  		(i.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 {
   274  		// Slow-path: need to position the topLevelIndex.
   275  
   276  		// The previous exhausted state of singleLevelIterator is no longer
   277  		// relevant, since we may be moving to a different index block.
   278  		i.exhaustedBounds = 0
   279  		i.maybeFilteredKeysTwoLevel = false
   280  		flags = flags.DisableTrySeekUsingNext()
   281  		var ikey *InternalKey
   282  		if ikey, _ = i.topLevelIndex.SeekGE(key, flags); ikey == nil {
   283  			i.data.invalidate()
   284  			i.index.invalidate()
   285  			return nil, base.LazyValue{}
   286  		}
   287  
   288  		result := i.loadIndex(+1)
   289  		if result == loadBlockFailed {
   290  			i.boundsCmp = 0
   291  			return nil, base.LazyValue{}
   292  		}
   293  		if result == loadBlockIrrelevant {
   294  			// Enforce the upper bound here since don't want to bother moving
   295  			// to the next entry in the top level index if upper bound is
   296  			// already exceeded. Note that the next entry starts with keys >=
   297  			// ikey.UserKey since even though this is the block separator, the
   298  			// same user key can span multiple index blocks. If upper is
   299  			// exclusive we use >= below, else we use >.
   300  			if i.upper != nil {
   301  				cmp := i.cmp(ikey.UserKey, i.upper)
   302  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   303  					i.exhaustedBounds = +1
   304  				}
   305  			}
   306  			// Fall through to skipForward.
   307  			dontSeekWithinSingleLevelIter = true
   308  			// Clear boundsCmp.
   309  			//
   310  			// In the typical cases where dontSeekWithinSingleLevelIter=false,
   311  			// the singleLevelIterator.SeekGE call will clear boundsCmp.
   312  			// However, in this case where dontSeekWithinSingleLevelIter=true,
   313  			// we never seek on the single-level iterator. This call will fall
   314  			// through to skipForward, which may improperly leave boundsCmp=+1
   315  			// unless we clear it here.
   316  			i.boundsCmp = 0
   317  		}
   318  	} else {
   319  		// INVARIANT: err == nil.
   320  		//
   321  		// Else fast-path: There are two possible cases, from
   322  		// (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
   323  		//
   324  		// 1) The bounds have moved forward (i.boundsCmp > 0) and this SeekGE is
   325  		// respecting the lower bound (guaranteed by Iterator). We know that the
   326  		// iterator must already be positioned within or just outside the previous
   327  		// bounds. Therefore, the topLevelIndex iter cannot be positioned at an
   328  		// entry ahead of the seek position (though it can be positioned behind).
   329  		// The !i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 confirms that it is
   330  		// not behind. Since it is not ahead and not behind it must be at the
   331  		// right position.
   332  		//
   333  		// 2) This SeekGE will land on a key that is greater than the key we are
   334  		// currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
   335  		// i.topLevelIndex.Key().UserKey) <= 0, we are at the correct lower level
   336  		// index block. No need to reset the state of singleLevelIterator.
   337  		//
   338  		// Note that cases 1 and 2 never overlap, and one of them must be true,
   339  		// but we have some test code (TestIterRandomizedMaybeFilteredKeys) that
   340  		// sets both to true, so we fix things here and then do an invariant
   341  		// check.
   342  		//
   343  		// This invariant checking is important enough that we do not gate it
   344  		// behind invariants.Enabled.
   345  		if i.boundsCmp > 0 {
   346  			// TODO(sumeer): fix TestIterRandomizedMaybeFilteredKeys so as to not
   347  			// need this behavior.
   348  			flags = flags.DisableTrySeekUsingNext()
   349  		}
   350  		if i.boundsCmp > 0 == flags.TrySeekUsingNext() {
   351  			panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
   352  				i.boundsCmp > 0, flags.TrySeekUsingNext()))
   353  		}
   354  
   355  		if !flags.TrySeekUsingNext() {
   356  			// Case 1. Bounds have changed so the previous exhausted bounds state is
   357  			// irrelevant.
   358  			// WARNING-data-exhausted: this is safe to do only because the monotonic
   359  			// bounds optimizations only work when !data-exhausted. If they also
   360  			// worked with data-exhausted, we have made it unclear whether
   361  			// data-exhausted is actually true. See the comment at the top of the
   362  			// file.
   363  			i.exhaustedBounds = 0
   364  		}
   365  		// Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
   366  		// preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
   367  		// bug https://github.com/cockroachdb/pebble/issues/2036.
   368  	}
   369  
   370  	if !dontSeekWithinSingleLevelIter {
   371  		// Note that while trySeekUsingNext could be false here, singleLevelIterator
   372  		// could do its own boundsCmp-based optimization to seek using next.
   373  		if ikey, val := i.singleLevelIterator.SeekGE(key, flags); ikey != nil {
   374  			return ikey, val
   375  		}
   376  	}
   377  	return i.skipForward()
   378  }
   379  
   380  // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
   381  // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up
   382  // to the caller to ensure that key is greater than or equal to the lower bound.
   383  func (i *twoLevelIterator) SeekPrefixGE(
   384  	prefix, key []byte, flags base.SeekGEFlags,
   385  ) (*base.InternalKey, base.LazyValue) {
   386  	if i.vState != nil {
   387  		// Callers of SeekGE don't know about virtual sstable bounds, so we may
   388  		// have to internally restrict the bounds.
   389  		//
   390  		// TODO(bananabrick): We can optimize away this check for the level iter
   391  		// if necessary.
   392  		if i.cmp(key, i.lower) < 0 {
   393  			key = i.lower
   394  		}
   395  	}
   396  
   397  	// NOTE: prefix is only used for bloom filter checking and not later work in
   398  	// this method. Hence, we can use the existing iterator position if the last
   399  	// SeekPrefixGE did not fail bloom filter matching.
   400  
   401  	err := i.err
   402  	i.err = nil // clear cached iteration error
   403  
   404  	// The twoLevelIterator could be already exhausted. Utilize that when
   405  	// trySeekUsingNext is true. See the comment about data-exhausted, PGDE, and
   406  	// bounds-exhausted near the top of the file.
   407  	filterUsedAndDidNotMatch :=
   408  		i.reader.tableFilter != nil && i.useFilter && !i.lastBloomFilterMatched
   409  	if flags.TrySeekUsingNext() && !filterUsedAndDidNotMatch &&
   410  		(i.exhaustedBounds == +1 || (i.data.isDataInvalidated() && i.index.isDataInvalidated())) &&
   411  		err == nil {
   412  		// Already exhausted, so return nil.
   413  		return nil, base.LazyValue{}
   414  	}
   415  
   416  	// Check prefix bloom filter.
   417  	if i.reader.tableFilter != nil && i.useFilter {
   418  		if !i.lastBloomFilterMatched {
   419  			// Iterator is not positioned based on last seek.
   420  			flags = flags.DisableTrySeekUsingNext()
   421  		}
   422  		i.lastBloomFilterMatched = false
   423  		var dataH bufferHandle
   424  		dataH, i.err = i.reader.readFilter(i.ctx, i.stats)
   425  		if i.err != nil {
   426  			i.data.invalidate()
   427  			return nil, base.LazyValue{}
   428  		}
   429  		mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix)
   430  		dataH.Release()
   431  		if !mayContain {
   432  			// This invalidation may not be necessary for correctness, and may
   433  			// be a place to optimize later by reusing the already loaded
   434  			// block. It was necessary in earlier versions of the code since
   435  			// the caller was allowed to call Next when SeekPrefixGE returned
   436  			// nil. This is no longer allowed.
   437  			i.data.invalidate()
   438  			return nil, base.LazyValue{}
   439  		}
   440  		i.lastBloomFilterMatched = true
   441  	}
   442  
   443  	// Bloom filter matches.
   444  
   445  	// SeekPrefixGE performs various step-instead-of-seeking optimizations: eg
   446  	// enabled by trySeekUsingNext, or by monotonically increasing bounds
   447  	// (i.boundsCmp).  Care must be taken to ensure that when performing these
   448  	// optimizations and the iterator becomes exhausted,
   449  	// i.maybeFilteredKeysTwoLevel is set appropriately.  Consider a previous
   450  	// SeekPrefixGE that filtered keys from k until the current iterator
   451  	// position.
   452  	//
   453  	// If the previous SeekPrefixGE exhausted the iterator while seeking within
   454  	// the two-level index, it's possible keys greater than or equal to the
   455  	// current search key were filtered through skipped index blocks. We must
   456  	// not reuse the position of the two-level index iterator without
   457  	// remembering the previous value of maybeFilteredKeysTwoLevel.
   458  
   459  	// We fall into the slow path if i.index.isDataInvalidated() even if the
   460  	// top-level iterator is already positioned correctly and all other
   461  	// conditions are met. An alternative structure could reuse topLevelIndex's
   462  	// current position and reload the index block to which it points. Arguably,
   463  	// an index block load is expensive and the index block may still be earlier
   464  	// than the index block containing the sought key, resulting in a wasteful
   465  	// block load.
   466  
   467  	var dontSeekWithinSingleLevelIter bool
   468  	if i.topLevelIndex.isDataInvalidated() || !i.topLevelIndex.valid() || i.index.isDataInvalidated() || err != nil ||
   469  		(i.boundsCmp <= 0 && !flags.TrySeekUsingNext()) || i.cmp(key, i.topLevelIndex.Key().UserKey) > 0 {
   470  		// Slow-path: need to position the topLevelIndex.
   471  
   472  		// The previous exhausted state of singleLevelIterator is no longer
   473  		// relevant, since we may be moving to a different index block.
   474  		i.exhaustedBounds = 0
   475  		i.maybeFilteredKeysTwoLevel = false
   476  		flags = flags.DisableTrySeekUsingNext()
   477  		var ikey *InternalKey
   478  		if ikey, _ = i.topLevelIndex.SeekGE(key, flags); ikey == nil {
   479  			i.data.invalidate()
   480  			i.index.invalidate()
   481  			return nil, base.LazyValue{}
   482  		}
   483  
   484  		result := i.loadIndex(+1)
   485  		if result == loadBlockFailed {
   486  			i.boundsCmp = 0
   487  			return nil, base.LazyValue{}
   488  		}
   489  		if result == loadBlockIrrelevant {
   490  			// Enforce the upper bound here since don't want to bother moving
   491  			// to the next entry in the top level index if upper bound is
   492  			// already exceeded. Note that the next entry starts with keys >=
   493  			// ikey.UserKey since even though this is the block separator, the
   494  			// same user key can span multiple index blocks. If upper is
   495  			// exclusive we use >= below, else we use >.
   496  			if i.upper != nil {
   497  				cmp := i.cmp(ikey.UserKey, i.upper)
   498  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   499  					i.exhaustedBounds = +1
   500  				}
   501  			}
   502  			// Fall through to skipForward.
   503  			dontSeekWithinSingleLevelIter = true
   504  			// Clear boundsCmp.
   505  			//
   506  			// In the typical cases where dontSeekWithinSingleLevelIter=false,
   507  			// the singleLevelIterator.SeekPrefixGE call will clear boundsCmp.
   508  			// However, in this case where dontSeekWithinSingleLevelIter=true,
   509  			// we never seek on the single-level iterator. This call will fall
   510  			// through to skipForward, which may improperly leave boundsCmp=+1
   511  			// unless we clear it here.
   512  			i.boundsCmp = 0
   513  		}
   514  	} else {
   515  		// INVARIANT: err == nil.
   516  		//
   517  		// Else fast-path: There are two possible cases, from
   518  		// (i.boundsCmp > 0 || flags.TrySeekUsingNext()):
   519  		//
   520  		// 1) The bounds have moved forward (i.boundsCmp > 0) and this
   521  		// SeekPrefixGE is respecting the lower bound (guaranteed by Iterator). We
   522  		// know that the iterator must already be positioned within or just
   523  		// outside the previous bounds. Therefore, the topLevelIndex iter cannot
   524  		// be positioned at an entry ahead of the seek position (though it can be
   525  		// positioned behind). The !i.cmp(key, i.topLevelIndex.Key().UserKey) > 0
   526  		// confirms that it is not behind. Since it is not ahead and not behind it
   527  		// must be at the right position.
   528  		//
   529  		// 2) This SeekPrefixGE will land on a key that is greater than the key we
   530  		// are currently at (guaranteed by trySeekUsingNext), but since i.cmp(key,
   531  		// i.topLevelIndex.Key().UserKey) <= 0, we are at the correct lower level
   532  		// index block. No need to reset the state of singleLevelIterator.
   533  		//
   534  		// Note that cases 1 and 2 never overlap, and one of them must be true.
   535  		// This invariant checking is important enough that we do not gate it
   536  		// behind invariants.Enabled.
   537  		if i.boundsCmp > 0 == flags.TrySeekUsingNext() {
   538  			panic(fmt.Sprintf("inconsistency in optimization case 1 %t and case 2 %t",
   539  				i.boundsCmp > 0, flags.TrySeekUsingNext()))
   540  		}
   541  
   542  		if !flags.TrySeekUsingNext() {
   543  			// Case 1. Bounds have changed so the previous exhausted bounds state is
   544  			// irrelevant.
   545  			// WARNING-data-exhausted: this is safe to do only because the monotonic
   546  			// bounds optimizations only work when !data-exhausted. If they also
   547  			// worked with data-exhausted, we have made it unclear whether
   548  			// data-exhausted is actually true. See the comment at the top of the
   549  			// file.
   550  			i.exhaustedBounds = 0
   551  		}
   552  		// Else flags.TrySeekUsingNext(). The i.exhaustedBounds is important to
   553  		// preserve for singleLevelIterator, and twoLevelIterator.skipForward. See
   554  		// bug https://github.com/cockroachdb/pebble/issues/2036.
   555  	}
   556  
   557  	if !dontSeekWithinSingleLevelIter {
   558  		if ikey, val := i.singleLevelIterator.seekPrefixGE(
   559  			prefix, key, flags, false /* checkFilter */); ikey != nil {
   560  			return ikey, val
   561  		}
   562  	}
   563  	// NB: skipForward checks whether exhaustedBounds is already +1.
   564  	return i.skipForward()
   565  }
   566  
   567  // virtualLast should only be called if i.vReader != nil and i.endKeyInclusive
   568  // is true.
   569  func (i *twoLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
   570  	if i.vState == nil {
   571  		panic("pebble: invalid call to virtualLast")
   572  	}
   573  
   574  	// Seek to the first internal key.
   575  	ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
   576  	if i.endKeyInclusive {
   577  		// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
   578  		// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
   579  		// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
   580  		// the last key with userkey == i.upper.
   581  		//
   582  		// TODO(bananabrick): Think about how to improve this. If many internal keys
   583  		// with the same user key at the upper bound then this could be slow, but
   584  		// maybe the odds of having many internal keys with the same user key at the
   585  		// upper bound are low.
   586  		for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
   587  			ikey, _ = i.Next()
   588  		}
   589  		return i.Prev()
   590  	}
   591  	// We seeked to the first key >= i.upper.
   592  	return i.Prev()
   593  }
   594  
   595  // SeekLT implements internalIterator.SeekLT, as documented in the pebble
   596  // package. Note that SeekLT only checks the lower bound. It is up to the
   597  // caller to ensure that key is less than the upper bound.
   598  func (i *twoLevelIterator) SeekLT(
   599  	key []byte, flags base.SeekLTFlags,
   600  ) (*InternalKey, base.LazyValue) {
   601  	if i.vState != nil {
   602  		// Might have to fix upper bound since virtual sstable bounds are not
   603  		// known to callers of SeekLT.
   604  		//
   605  		// TODO(bananabrick): We can optimize away this check for the level iter
   606  		// if necessary.
   607  		cmp := i.cmp(key, i.upper)
   608  		// key == i.upper is fine. We'll do the right thing and return the
   609  		// first internal key with user key < key.
   610  		if cmp > 0 {
   611  			return i.virtualLast()
   612  		}
   613  	}
   614  
   615  	i.exhaustedBounds = 0
   616  	i.err = nil // clear cached iteration error
   617  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   618  	i.boundsCmp = 0
   619  
   620  	var result loadBlockResult
   621  	var ikey *InternalKey
   622  	// NB: Unlike SeekGE, we don't have a fast-path here since we don't know
   623  	// whether the topLevelIndex is positioned after the position that would
   624  	// be returned by doing i.topLevelIndex.SeekGE(). To know this we would
   625  	// need to know the index key preceding the current one.
   626  	// NB: If a bound-limited block property filter is configured, it's
   627  	// externally ensured that the filter is disabled (through returning
   628  	// Intersects=false irrespective of the block props provided) during seeks.
   629  	i.maybeFilteredKeysTwoLevel = false
   630  	if ikey, _ = i.topLevelIndex.SeekGE(key, base.SeekGEFlagsNone); ikey == nil {
   631  		if ikey, _ = i.topLevelIndex.Last(); ikey == nil {
   632  			i.data.invalidate()
   633  			i.index.invalidate()
   634  			return nil, base.LazyValue{}
   635  		}
   636  
   637  		result = i.loadIndex(-1)
   638  		if result == loadBlockFailed {
   639  			return nil, base.LazyValue{}
   640  		}
   641  		if result == loadBlockOK {
   642  			if ikey, val := i.singleLevelIterator.lastInternal(); ikey != nil {
   643  				return i.maybeVerifyKey(ikey, val)
   644  			}
   645  			// Fall through to skipBackward since the singleLevelIterator did
   646  			// not have any blocks that satisfy the block interval
   647  			// constraints, or the lower bound was reached.
   648  		}
   649  		// Else loadBlockIrrelevant, so fall through.
   650  	} else {
   651  		result = i.loadIndex(-1)
   652  		if result == loadBlockFailed {
   653  			return nil, base.LazyValue{}
   654  		}
   655  		if result == loadBlockOK {
   656  			if ikey, val := i.singleLevelIterator.SeekLT(key, flags); ikey != nil {
   657  				return i.maybeVerifyKey(ikey, val)
   658  			}
   659  			// Fall through to skipBackward since the singleLevelIterator did
   660  			// not have any blocks that satisfy the block interval
   661  			// constraint, or the lower bound was reached.
   662  		}
   663  		// Else loadBlockIrrelevant, so fall through.
   664  	}
   665  	if result == loadBlockIrrelevant {
   666  		// Enforce the lower bound here since don't want to bother moving to
   667  		// the previous entry in the top level index if lower bound is already
   668  		// exceeded. Note that the previous entry starts with keys <=
   669  		// ikey.UserKey since even though this is the current block's
   670  		// separator, the same user key can span multiple index blocks.
   671  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   672  			i.exhaustedBounds = -1
   673  		}
   674  	}
   675  	// NB: skipBackward checks whether exhaustedBounds is already -1.
   676  	return i.skipBackward()
   677  }
   678  
   679  // First implements internalIterator.First, as documented in the pebble
   680  // package. Note that First only checks the upper bound. It is up to the caller
   681  // to ensure that key is greater than or equal to the lower bound (e.g. via a
   682  // call to SeekGE(lower)).
   683  func (i *twoLevelIterator) First() (*InternalKey, base.LazyValue) {
   684  	// If the iterator was created on a virtual sstable, we will SeekGE to the
   685  	// lower bound instead of using First, because First does not respect
   686  	// bounds.
   687  	if i.vState != nil {
   688  		return i.SeekGE(i.lower, base.SeekGEFlagsNone)
   689  	}
   690  
   691  	if i.lower != nil {
   692  		panic("twoLevelIterator.First() used despite lower bound")
   693  	}
   694  	i.exhaustedBounds = 0
   695  	i.maybeFilteredKeysTwoLevel = false
   696  	i.err = nil // clear cached iteration error
   697  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   698  	i.boundsCmp = 0
   699  
   700  	var ikey *InternalKey
   701  	if ikey, _ = i.topLevelIndex.First(); ikey == nil {
   702  		return nil, base.LazyValue{}
   703  	}
   704  
   705  	result := i.loadIndex(+1)
   706  	if result == loadBlockFailed {
   707  		return nil, base.LazyValue{}
   708  	}
   709  	if result == loadBlockOK {
   710  		if ikey, val := i.singleLevelIterator.First(); ikey != nil {
   711  			return ikey, val
   712  		}
   713  		// Else fall through to skipForward.
   714  	} else {
   715  		// result == loadBlockIrrelevant. Enforce the upper bound here since
   716  		// don't want to bother moving to the next entry in the top level
   717  		// index if upper bound is already exceeded. Note that the next entry
   718  		// starts with keys >= ikey.UserKey since even though this is the
   719  		// block separator, the same user key can span multiple index blocks.
   720  		// If upper is exclusive we use >= below, else we use >.
   721  		if i.upper != nil {
   722  			cmp := i.cmp(ikey.UserKey, i.upper)
   723  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   724  				i.exhaustedBounds = +1
   725  			}
   726  		}
   727  	}
   728  	// NB: skipForward checks whether exhaustedBounds is already +1.
   729  	return i.skipForward()
   730  }
   731  
   732  // Last implements internalIterator.Last, as documented in the pebble
   733  // package. Note that Last only checks the lower bound. It is up to the caller
   734  // to ensure that key is less than the upper bound (e.g. via a call to
   735  // SeekLT(upper))
   736  func (i *twoLevelIterator) Last() (*InternalKey, base.LazyValue) {
   737  	if i.vState != nil {
   738  		if i.endKeyInclusive {
   739  			return i.virtualLast()
   740  		}
   741  		return i.SeekLT(i.upper, base.SeekLTFlagsNone)
   742  	}
   743  
   744  	if i.upper != nil {
   745  		panic("twoLevelIterator.Last() used despite upper bound")
   746  	}
   747  	i.exhaustedBounds = 0
   748  	i.maybeFilteredKeysTwoLevel = false
   749  	i.err = nil // clear cached iteration error
   750  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   751  	i.boundsCmp = 0
   752  
   753  	var ikey *InternalKey
   754  	if ikey, _ = i.topLevelIndex.Last(); ikey == nil {
   755  		return nil, base.LazyValue{}
   756  	}
   757  
   758  	result := i.loadIndex(-1)
   759  	if result == loadBlockFailed {
   760  		return nil, base.LazyValue{}
   761  	}
   762  	if result == loadBlockOK {
   763  		if ikey, val := i.singleLevelIterator.Last(); ikey != nil {
   764  			return ikey, val
   765  		}
   766  		// Else fall through to skipBackward.
   767  	} else {
   768  		// result == loadBlockIrrelevant. Enforce the lower bound here
   769  		// since don't want to bother moving to the previous entry in the
   770  		// top level index if lower bound is already exceeded. Note that
   771  		// the previous entry starts with keys <= ikey.UserKey since even
   772  		// though this is the current block's separator, the same user key
   773  		// can span multiple index blocks.
   774  		if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   775  			i.exhaustedBounds = -1
   776  		}
   777  	}
   778  	// NB: skipBackward checks whether exhaustedBounds is already -1.
   779  	return i.skipBackward()
   780  }
   781  
   782  // Next implements internalIterator.Next, as documented in the pebble
   783  // package.
   784  // Note: twoLevelCompactionIterator.Next mirrors the implementation of
   785  // twoLevelIterator.Next due to performance. Keep the two in sync.
   786  func (i *twoLevelIterator) Next() (*InternalKey, base.LazyValue) {
   787  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   788  	i.boundsCmp = 0
   789  	i.maybeFilteredKeysTwoLevel = false
   790  	if i.err != nil {
   791  		return nil, base.LazyValue{}
   792  	}
   793  	if key, val := i.singleLevelIterator.Next(); key != nil {
   794  		return key, val
   795  	}
   796  	return i.skipForward()
   797  }
   798  
   799  // NextPrefix implements (base.InternalIterator).NextPrefix.
   800  func (i *twoLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
   801  	if i.exhaustedBounds == +1 {
   802  		panic("Next called even though exhausted upper bound")
   803  	}
   804  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   805  	i.boundsCmp = 0
   806  	i.maybeFilteredKeysTwoLevel = false
   807  	if i.err != nil {
   808  		return nil, base.LazyValue{}
   809  	}
   810  	if key, val := i.singleLevelIterator.NextPrefix(succKey); key != nil {
   811  		return key, val
   812  	}
   813  	// key == nil
   814  	if i.err != nil {
   815  		return nil, base.LazyValue{}
   816  	}
   817  
   818  	// Did not find prefix in the existing second-level index block. This is the
   819  	// slow-path where we seek the iterator.
   820  	var ikey *InternalKey
   821  	if ikey, _ = i.topLevelIndex.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil {
   822  		i.data.invalidate()
   823  		i.index.invalidate()
   824  		return nil, base.LazyValue{}
   825  	}
   826  	result := i.loadIndex(+1)
   827  	if result == loadBlockFailed {
   828  		return nil, base.LazyValue{}
   829  	}
   830  	if result == loadBlockIrrelevant {
   831  		// Enforce the upper bound here since don't want to bother moving to the
   832  		// next entry in the top level index if upper bound is already exceeded.
   833  		// Note that the next entry starts with keys >= ikey.UserKey since even
   834  		// though this is the block separator, the same user key can span multiple
   835  		// index blocks. If upper is exclusive we use >= below, else we use >.
   836  		if i.upper != nil {
   837  			cmp := i.cmp(ikey.UserKey, i.upper)
   838  			if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   839  				i.exhaustedBounds = +1
   840  			}
   841  		}
   842  	} else if key, val := i.singleLevelIterator.SeekGE(succKey, base.SeekGEFlagsNone); key != nil {
   843  		return i.maybeVerifyKey(key, val)
   844  	}
   845  	return i.skipForward()
   846  }
   847  
   848  // Prev implements internalIterator.Prev, as documented in the pebble
   849  // package.
   850  func (i *twoLevelIterator) Prev() (*InternalKey, base.LazyValue) {
   851  	// Seek optimization only applies until iterator is first positioned after SetBounds.
   852  	i.boundsCmp = 0
   853  	i.maybeFilteredKeysTwoLevel = false
   854  	if i.err != nil {
   855  		return nil, base.LazyValue{}
   856  	}
   857  	if key, val := i.singleLevelIterator.Prev(); key != nil {
   858  		return key, val
   859  	}
   860  	return i.skipBackward()
   861  }
   862  
   863  func (i *twoLevelIterator) skipForward() (*InternalKey, base.LazyValue) {
   864  	for {
   865  		if i.err != nil || i.exhaustedBounds > 0 {
   866  			return nil, base.LazyValue{}
   867  		}
   868  		i.exhaustedBounds = 0
   869  		var ikey *InternalKey
   870  		if ikey, _ = i.topLevelIndex.Next(); ikey == nil {
   871  			i.data.invalidate()
   872  			i.index.invalidate()
   873  			return nil, base.LazyValue{}
   874  		}
   875  		result := i.loadIndex(+1)
   876  		if result == loadBlockFailed {
   877  			return nil, base.LazyValue{}
   878  		}
   879  		if result == loadBlockOK {
   880  			if ikey, val := i.singleLevelIterator.firstInternal(); ikey != nil {
   881  				return i.maybeVerifyKey(ikey, val)
   882  			}
   883  			// Next iteration will return if singleLevelIterator set
   884  			// exhaustedBounds = +1.
   885  		} else {
   886  			// result == loadBlockIrrelevant. Enforce the upper bound here
   887  			// since don't want to bother moving to the next entry in the top
   888  			// level index if upper bound is already exceeded. Note that the
   889  			// next entry starts with keys >= ikey.UserKey since even though
   890  			// this is the block separator, the same user key can span
   891  			// multiple index blocks. If upper is exclusive we use >=
   892  			// below, else we use >.
   893  			if i.upper != nil {
   894  				cmp := i.cmp(ikey.UserKey, i.upper)
   895  				if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 {
   896  					i.exhaustedBounds = +1
   897  					// Next iteration will return.
   898  				}
   899  			}
   900  		}
   901  	}
   902  }
   903  
   904  func (i *twoLevelIterator) skipBackward() (*InternalKey, base.LazyValue) {
   905  	for {
   906  		if i.err != nil || i.exhaustedBounds < 0 {
   907  			return nil, base.LazyValue{}
   908  		}
   909  		i.exhaustedBounds = 0
   910  		var ikey *InternalKey
   911  		if ikey, _ = i.topLevelIndex.Prev(); ikey == nil {
   912  			i.data.invalidate()
   913  			i.index.invalidate()
   914  			return nil, base.LazyValue{}
   915  		}
   916  		result := i.loadIndex(-1)
   917  		if result == loadBlockFailed {
   918  			return nil, base.LazyValue{}
   919  		}
   920  		if result == loadBlockOK {
   921  			if ikey, val := i.singleLevelIterator.lastInternal(); ikey != nil {
   922  				return i.maybeVerifyKey(ikey, val)
   923  			}
   924  			// Next iteration will return if singleLevelIterator set
   925  			// exhaustedBounds = -1.
   926  		} else {
   927  			// result == loadBlockIrrelevant. Enforce the lower bound here
   928  			// since don't want to bother moving to the previous entry in the
   929  			// top level index if lower bound is already exceeded. Note that
   930  			// the previous entry starts with keys <= ikey.UserKey since even
   931  			// though this is the current block's separator, the same user key
   932  			// can span multiple index blocks.
   933  			if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 {
   934  				i.exhaustedBounds = -1
   935  				// Next iteration will return.
   936  			}
   937  		}
   938  	}
   939  }
   940  
   941  // Close implements internalIterator.Close, as documented in the pebble
   942  // package.
   943  func (i *twoLevelIterator) Close() error {
   944  	var err error
   945  	if i.closeHook != nil {
   946  		err = firstError(err, i.closeHook(i))
   947  	}
   948  	err = firstError(err, i.data.Close())
   949  	err = firstError(err, i.index.Close())
   950  	err = firstError(err, i.topLevelIndex.Close())
   951  	if i.dataRH != nil {
   952  		err = firstError(err, i.dataRH.Close())
   953  		i.dataRH = nil
   954  	}
   955  	err = firstError(err, i.err)
   956  	if i.bpfs != nil {
   957  		releaseBlockPropertiesFilterer(i.bpfs)
   958  	}
   959  	if i.vbReader != nil {
   960  		i.vbReader.close()
   961  	}
   962  	if i.vbRH != nil {
   963  		err = firstError(err, i.vbRH.Close())
   964  		i.vbRH = nil
   965  	}
   966  	*i = twoLevelIterator{
   967  		singleLevelIterator: i.singleLevelIterator.resetForReuse(),
   968  		topLevelIndex:       i.topLevelIndex.resetForReuse(),
   969  	}
   970  	twoLevelIterPool.Put(i)
   971  	return err
   972  }
   973  
   974  // Note: twoLevelCompactionIterator and compactionIterator are very similar but
   975  // were separated due to performance.
   976  type twoLevelCompactionIterator struct {
   977  	*twoLevelIterator
   978  	bytesIterated *uint64
   979  	prevOffset    uint64
   980  }
   981  
   982  // twoLevelCompactionIterator implements the base.InternalIterator interface.
   983  var _ base.InternalIterator = (*twoLevelCompactionIterator)(nil)
   984  
   985  func (i *twoLevelCompactionIterator) Close() error {
   986  	return i.twoLevelIterator.Close()
   987  }
   988  
   989  func (i *twoLevelCompactionIterator) SeekGE(
   990  	key []byte, flags base.SeekGEFlags,
   991  ) (*InternalKey, base.LazyValue) {
   992  	panic("pebble: SeekGE unimplemented")
   993  }
   994  
   995  func (i *twoLevelCompactionIterator) SeekPrefixGE(
   996  	prefix, key []byte, flags base.SeekGEFlags,
   997  ) (*base.InternalKey, base.LazyValue) {
   998  	panic("pebble: SeekPrefixGE unimplemented")
   999  }
  1000  
  1001  func (i *twoLevelCompactionIterator) SeekLT(
  1002  	key []byte, flags base.SeekLTFlags,
  1003  ) (*InternalKey, base.LazyValue) {
  1004  	panic("pebble: SeekLT unimplemented")
  1005  }
  1006  
  1007  func (i *twoLevelCompactionIterator) First() (*InternalKey, base.LazyValue) {
  1008  	i.err = nil // clear cached iteration error
  1009  	return i.skipForward(i.twoLevelIterator.First())
  1010  }
  1011  
  1012  func (i *twoLevelCompactionIterator) Last() (*InternalKey, base.LazyValue) {
  1013  	panic("pebble: Last unimplemented")
  1014  }
  1015  
  1016  // Note: twoLevelCompactionIterator.Next mirrors the implementation of
  1017  // twoLevelIterator.Next due to performance. Keep the two in sync.
  1018  func (i *twoLevelCompactionIterator) Next() (*InternalKey, base.LazyValue) {
  1019  	if i.err != nil {
  1020  		return nil, base.LazyValue{}
  1021  	}
  1022  	return i.skipForward(i.singleLevelIterator.Next())
  1023  }
  1024  
  1025  func (i *twoLevelCompactionIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
  1026  	panic("pebble: NextPrefix unimplemented")
  1027  }
  1028  
  1029  func (i *twoLevelCompactionIterator) Prev() (*InternalKey, base.LazyValue) {
  1030  	panic("pebble: Prev unimplemented")
  1031  }
  1032  
  1033  func (i *twoLevelCompactionIterator) String() string {
  1034  	if i.vState != nil {
  1035  		return i.vState.fileNum.String()
  1036  	}
  1037  	return i.reader.fileNum.String()
  1038  }
  1039  
  1040  func (i *twoLevelCompactionIterator) skipForward(
  1041  	key *InternalKey, val base.LazyValue,
  1042  ) (*InternalKey, base.LazyValue) {
  1043  	if key == nil {
  1044  		for {
  1045  			if key, _ := i.topLevelIndex.Next(); key == nil {
  1046  				break
  1047  			}
  1048  			result := i.loadIndex(+1)
  1049  			if result != loadBlockOK {
  1050  				if i.err != nil {
  1051  					break
  1052  				}
  1053  				switch result {
  1054  				case loadBlockFailed:
  1055  					// We checked that i.index was at a valid entry, so
  1056  					// loadBlockFailed could not have happened due to to i.index
  1057  					// being exhausted, and must be due to an error.
  1058  					panic("loadBlock should not have failed with no error")
  1059  				case loadBlockIrrelevant:
  1060  					panic("compactionIter should not be using block intervals for skipping")
  1061  				default:
  1062  					panic(fmt.Sprintf("unexpected case %d", result))
  1063  				}
  1064  			}
  1065  			// result == loadBlockOK
  1066  			if key, val = i.singleLevelIterator.First(); key != nil {
  1067  				break
  1068  			}
  1069  		}
  1070  	}
  1071  
  1072  	curOffset := i.recordOffset()
  1073  	*i.bytesIterated += uint64(curOffset - i.prevOffset)
  1074  	i.prevOffset = curOffset
  1075  
  1076  	if i.vState != nil && key != nil {
  1077  		cmp := i.cmp(key.UserKey, i.vState.upper.UserKey)
  1078  		if cmp > 0 || (i.vState.upper.IsExclusiveSentinel() && cmp == 0) {
  1079  			return nil, base.LazyValue{}
  1080  		}
  1081  	}
  1082  
  1083  	return key, val
  1084  }