github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/range_keys.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  
    10  	"github.com/cockroachdb/errors"
    11  	"github.com/cockroachdb/pebble/internal/base"
    12  	"github.com/cockroachdb/pebble/internal/invariants"
    13  	"github.com/cockroachdb/pebble/internal/keyspan"
    14  	"github.com/cockroachdb/pebble/internal/manifest"
    15  	"github.com/cockroachdb/pebble/sstable"
    16  )
    17  
    18  // constructRangeKeyIter constructs the range-key iterator stack, populating
    19  // i.rangeKey.rangeKeyIter with the resulting iterator.
    20  func (i *Iterator) constructRangeKeyIter() {
    21  	i.rangeKey.rangeKeyIter = i.rangeKey.iterConfig.Init(
    22  		&i.comparer, i.seqNum, i.opts.LowerBound, i.opts.UpperBound,
    23  		&i.hasPrefix, &i.prefixOrFullSeekKey, false /* internalKeys */, &i.rangeKey.rangeKeyBuffers.internal)
    24  
    25  	// If there's an indexed batch with range keys, include it.
    26  	if i.batch != nil {
    27  		if i.batch.index == nil {
    28  			// This isn't an indexed batch. We shouldn't have gotten this far.
    29  			panic(errors.AssertionFailedf("creating an iterator over an unindexed batch"))
    30  		} else {
    31  			// Only include the batch's range key iterator if it has any keys.
    32  			// NB: This can force reconstruction of the rangekey iterator stack
    33  			// in SetOptions if subsequently range keys are added. See
    34  			// SetOptions.
    35  			if i.batch.countRangeKeys > 0 {
    36  				i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, i.batchSeqNum)
    37  				i.rangeKey.iterConfig.AddLevel(&i.batchRangeKeyIter)
    38  			}
    39  		}
    40  	}
    41  
    42  	if !i.batchOnlyIter {
    43  		// Next are the flushables: memtables and large batches.
    44  		if i.readState != nil {
    45  			for j := len(i.readState.memtables) - 1; j >= 0; j-- {
    46  				mem := i.readState.memtables[j]
    47  				// We only need to read from memtables which contain sequence numbers older
    48  				// than seqNum.
    49  				if logSeqNum := mem.logSeqNum; logSeqNum >= i.seqNum {
    50  					continue
    51  				}
    52  				if rki := mem.newRangeKeyIter(&i.opts); rki != nil {
    53  					i.rangeKey.iterConfig.AddLevel(rki)
    54  				}
    55  			}
    56  		}
    57  
    58  		current := i.version
    59  		if current == nil {
    60  			current = i.readState.current
    61  		}
    62  		// Next are the file levels: L0 sub-levels followed by lower levels.
    63  
    64  		// Add file-specific iterators for L0 files containing range keys. We
    65  		// maintain a separate manifest.LevelMetadata for each level containing only
    66  		// files that contain range keys, however we don't compute a separate
    67  		// L0Sublevels data structure too.
    68  		//
    69  		// We first use L0's LevelMetadata to peek and see whether L0 contains any
    70  		// range keys at all. If it does, we create a range key level iterator per
    71  		// level that contains range keys using the information from L0Sublevels.
    72  		// Some sublevels may not contain any range keys, and we need to iterate
    73  		// through the fileMetadata to determine that. Since L0's file count should
    74  		// not significantly exceed ~1000 files (see L0CompactionFileThreshold),
    75  		// this should be okay.
    76  		if !current.RangeKeyLevels[0].Empty() {
    77  			// L0 contains at least 1 file containing range keys.
    78  			// Add level iterators for the L0 sublevels, iterating from newest to
    79  			// oldest.
    80  			for j := len(current.L0SublevelFiles) - 1; j >= 0; j-- {
    81  				iter := current.L0SublevelFiles[j].Iter()
    82  				if !containsAnyRangeKeys(iter) {
    83  					continue
    84  				}
    85  
    86  				li := i.rangeKey.iterConfig.NewLevelIter()
    87  				li.Init(
    88  					i.opts.SpanIterOptions(),
    89  					i.cmp,
    90  					i.newIterRangeKey,
    91  					iter.Filter(manifest.KeyTypeRange),
    92  					manifest.L0Sublevel(j),
    93  					manifest.KeyTypeRange,
    94  				)
    95  				i.rangeKey.iterConfig.AddLevel(li)
    96  			}
    97  		}
    98  
    99  		// Add level iterators for the non-empty non-L0 levels.
   100  		for level := 1; level < len(current.RangeKeyLevels); level++ {
   101  			if current.RangeKeyLevels[level].Empty() {
   102  				continue
   103  			}
   104  			li := i.rangeKey.iterConfig.NewLevelIter()
   105  			spanIterOpts := i.opts.SpanIterOptions()
   106  			li.Init(spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(),
   107  				manifest.Level(level), manifest.KeyTypeRange)
   108  			i.rangeKey.iterConfig.AddLevel(li)
   109  		}
   110  	}
   111  }
   112  
   113  func containsAnyRangeKeys(iter manifest.LevelIterator) bool {
   114  	for f := iter.First(); f != nil; f = iter.Next() {
   115  		if f.HasRangeKeys {
   116  			return true
   117  		}
   118  	}
   119  	return false
   120  }
   121  
   122  // Range key masking
   123  //
   124  // Pebble iterators may be configured such that range keys with suffixes mask
   125  // point keys with lower suffixes. The intended use is implementing a MVCC
   126  // delete range operation using range keys, when suffixes are MVCC timestamps.
   127  //
   128  // To enable masking, the user populates the IterOptions's RangeKeyMasking
   129  // field. The Suffix field configures which range keys act as masks. The
   130  // intended use is to hold a MVCC read timestamp. When implementing a MVCC
   131  // delete range operation, only range keys that are visible at the read
   132  // timestamp should be visible. If a range key has a suffix ≤
   133  // RangeKeyMasking.Suffix, it acts as a mask.
   134  //
   135  // Range key masking is facilitated by the keyspan.InterleavingIter. The
   136  // interleaving iterator interleaves range keys and point keys during combined
   137  // iteration. During user iteration, the interleaving iterator is configured
   138  // with a keyspan.SpanMask, implemented by the rangeKeyMasking struct below.
   139  // The SpanMask interface defines two methods: SpanChanged and SkipPoint.
   140  //
   141  // SpanChanged is used to keep the current mask up-to-date. Whenever the point
   142  // iterator has stepped into or out of the bounds of a range key, the
   143  // interleaving iterator invokes SpanChanged passing the current covering range
   144  // key. The below rangeKeyMasking implementation scans the range keys looking
   145  // for the range key with the largest suffix that's still ≤ the suffix supplied
   146  // to IterOptions.RangeKeyMasking.Suffix (the "read timestamp"). If it finds a
   147  // range key that meets the condition, the range key should act as a mask. The
   148  // span and the relevant range key's suffix are saved.
   149  //
   150  // The above ensures that `rangeKeyMasking.maskActiveSuffix` always contains the
   151  // current masking suffix such that any point keys with lower suffixes should be
   152  // skipped.
   153  //
   154  // There are two ways in which masked point keys are skipped.
   155  //
   156  //   1. Interleaving iterator SkipPoint
   157  //
   158  // Whenever the interleaving iterator encounters a point key that falls within
   159  // the bounds of a range key, it invokes SkipPoint. The interleaving iterator
   160  // guarantees that the SpanChanged method described above has already been
   161  // invoked with the covering range key. The below rangeKeyMasking implementation
   162  // of SkipPoint splits the key into prefix and suffix, compares the suffix to
   163  // the `maskActiveSuffix` updated by SpanChanged and returns true if
   164  // suffix(point) < maskActiveSuffix.
   165  //
   166  // The SkipPoint logic is sufficient to ensure that the Pebble iterator filters
   167  // out all masked point keys. However, it requires the iterator read each masked
   168  // point key. For broad range keys that mask many points, this may be expensive.
   169  //
   170  //   2. Block property filter
   171  //
   172  // For more efficient handling of braad range keys that mask many points, the
   173  // IterOptions.RangeKeyMasking field has an optional Filter option. This Filter
   174  // field takes a superset of the block-property filter interface, adding a
   175  // method to dynamically configure the filter's filtering criteria.
   176  //
   177  // To make use of the Filter option, the user is required to define and
   178  // configure a block-property collector that collects a property containing at
   179  // least the maximum suffix of a key within a block.
   180  //
   181  // When the SpanChanged method described above is invoked, rangeKeyMasking also
   182  // reconfigures the user-provided filter. It invokes a SetSuffix method,
   183  // providing the `maskActiveSuffix`, requesting that from now on the
   184  // block-property filter return Intersects()=false for any properties indicating
   185  // that a block contains exclusively keys with suffixes greater than the
   186  // provided suffix.
   187  //
   188  // Note that unlike other block-property filters, the filter used for masking
   189  // must not apply across the entire keyspace. It must only filter blocks that
   190  // lie within the bounds of the range key that set the mask suffix. To
   191  // accommodate this, rangeKeyMasking implements a special interface:
   192  // sstable.BoundLimitedBlockPropertyFilter. This interface extends the block
   193  // property filter interface with two new methods: KeyIsWithinLowerBound and
   194  // KeyIsWithinUpperBound. The rangeKeyMasking type wraps the user-provided block
   195  // property filter, implementing these two methods and overriding Intersects to
   196  // always return true if there is no active mask.
   197  //
   198  // The logic to ensure that a mask block-property filter is only applied within
   199  // the bounds of the masking range key is subtle. The interleaving iterator
   200  // guarantees that it never invokes SpanChanged until the point iterator is
   201  // positioned within the range key. During forward iteration, this guarantees
   202  // that any block that a sstable reader might attempt to load contains only keys
   203  // greater than or equal to the range key's lower bound. During backward
   204  // iteration, it provides the analagous guarantee on the range key's upper
   205  // bound.
   206  //
   207  // The above ensures that an sstable reader only needs to verify that a block
   208  // that it skips meets the opposite bound. This is where the
   209  // KeyIsWithinLowerBound and KeyIsWithinUpperBound methods are used. When an
   210  // sstable iterator is configured with a BoundLimitedBlockPropertyFilter, it
   211  // checks for intersection with the block-property filter before every block
   212  // load, like ordinary block-property filters. However, if the bound-limited
   213  // block property filter indicates that it does NOT intersect, the filter's
   214  // relevant KeyIsWithin{Lower,Upper}Bound method is queried, using a block
   215  // index separator as the bound. If the method indicates that the provided index
   216  // separator does not fall within the range key bounds, the no-intersection
   217  // result is ignored, and the block is read.
   218  
   219  type rangeKeyMasking struct {
   220  	cmp    base.Compare
   221  	split  base.Split
   222  	filter BlockPropertyFilterMask
   223  	// maskActiveSuffix holds the suffix of a range key currently acting as a
   224  	// mask, hiding point keys with suffixes greater than it. maskActiveSuffix
   225  	// is only ever non-nil if IterOptions.RangeKeyMasking.Suffix is non-nil.
   226  	// maskActiveSuffix is updated whenever the iterator passes over a new range
   227  	// key. The maskActiveSuffix should only be used if maskSpan is non-nil.
   228  	//
   229  	// See SpanChanged.
   230  	maskActiveSuffix []byte
   231  	// maskSpan holds the span from which the active mask suffix was extracted.
   232  	// The span is used for bounds comparisons, to ensure that a range-key mask
   233  	// is not applied beyond the bounds of the range key.
   234  	maskSpan *keyspan.Span
   235  	parent   *Iterator
   236  }
   237  
   238  func (m *rangeKeyMasking) init(parent *Iterator, cmp base.Compare, split base.Split) {
   239  	m.cmp = cmp
   240  	m.split = split
   241  	if parent.opts.RangeKeyMasking.Filter != nil {
   242  		m.filter = parent.opts.RangeKeyMasking.Filter()
   243  	}
   244  	m.parent = parent
   245  }
   246  
   247  // SpanChanged implements the keyspan.SpanMask interface, used during range key
   248  // iteration.
   249  func (m *rangeKeyMasking) SpanChanged(s *keyspan.Span) {
   250  	if s == nil && m.maskSpan == nil {
   251  		return
   252  	}
   253  	m.maskSpan = nil
   254  	m.maskActiveSuffix = m.maskActiveSuffix[:0]
   255  
   256  	// Find the smallest suffix of a range key contained within the Span,
   257  	// excluding suffixes less than m.opts.RangeKeyMasking.Suffix.
   258  	if s != nil {
   259  		m.parent.rangeKey.stale = true
   260  		if m.parent.opts.RangeKeyMasking.Suffix != nil {
   261  			for j := range s.Keys {
   262  				if s.Keys[j].Suffix == nil {
   263  					continue
   264  				}
   265  				if m.cmp(s.Keys[j].Suffix, m.parent.opts.RangeKeyMasking.Suffix) < 0 {
   266  					continue
   267  				}
   268  				if len(m.maskActiveSuffix) == 0 || m.cmp(m.maskActiveSuffix, s.Keys[j].Suffix) > 0 {
   269  					m.maskSpan = s
   270  					m.maskActiveSuffix = append(m.maskActiveSuffix[:0], s.Keys[j].Suffix...)
   271  				}
   272  			}
   273  		}
   274  	}
   275  
   276  	if m.maskSpan != nil && m.parent.opts.RangeKeyMasking.Filter != nil {
   277  		// Update the  block-property filter to filter point keys with suffixes
   278  		// greater than m.maskActiveSuffix.
   279  		err := m.filter.SetSuffix(m.maskActiveSuffix)
   280  		if err != nil {
   281  			m.parent.err = err
   282  		}
   283  	}
   284  	// If no span is active, we leave the inner block-property filter configured
   285  	// with its existing suffix. That's okay, because Intersects calls are first
   286  	// evaluated by iteratorRangeKeyState.Intersects, which considers all blocks
   287  	// as intersecting if there's no active mask.
   288  }
   289  
   290  // SkipPoint implements the keyspan.SpanMask interface, used during range key
   291  // iteration. Whenever a point key is covered by a non-empty Span, the
   292  // interleaving iterator invokes SkipPoint. This function is responsible for
   293  // performing range key masking.
   294  //
   295  // If a non-nil IterOptions.RangeKeyMasking.Suffix is set, range key masking is
   296  // enabled. Masking hides point keys, transparently skipping over the keys.
   297  // Whether or not a point key is masked is determined by comparing the point
   298  // key's suffix, the overlapping span's keys' suffixes, and the user-configured
   299  // IterOption's RangeKeyMasking.Suffix. When configured with a masking threshold
   300  // _t_, and there exists a span with suffix _r_ covering a point key with suffix
   301  // _p_, and
   302  //
   303  //	_t_ ≤ _r_ < _p_
   304  //
   305  // then the point key is elided. Consider the following rendering, where using
   306  // integer suffixes with higher integers sort before suffixes with lower
   307  // integers, (for example @7 ≤ @6 < @5):
   308  //
   309  //	     ^
   310  //	  @9 |        •―――――――――――――――○ [e,m)@9
   311  //	s  8 |                      • l@8
   312  //	u  7 |------------------------------------ @7 RangeKeyMasking.Suffix
   313  //	f  6 |      [h,q)@6 •―――――――――――――――――○            (threshold)
   314  //	f  5 |              • h@5
   315  //	f  4 |                          • n@4
   316  //	i  3 |          •―――――――――――○ [f,l)@3
   317  //	x  2 |  • b@2
   318  //	   1 |
   319  //	   0 |___________________________________
   320  //	      a b c d e f g h i j k l m n o p q
   321  //
   322  // An iterator scanning the entire keyspace with the masking threshold set to @7
   323  // will observe point keys b@2 and l@8. The span keys [h,q)@6 and [f,l)@3 serve
   324  // as masks, because cmp(@6,@7) ≥ 0 and cmp(@3,@7) ≥ 0. The span key [e,m)@9
   325  // does not serve as a mask, because cmp(@9,@7) < 0.
   326  //
   327  // Although point l@8 falls within the user key bounds of [e,m)@9, [e,m)@9 is
   328  // non-masking due to its suffix. The point key l@8 also falls within the user
   329  // key bounds of [h,q)@6, but since cmp(@6,@8) ≥ 0, l@8 is unmasked.
   330  //
   331  // Invariant: The userKey is within the user key bounds of the span most
   332  // recently provided to `SpanChanged`.
   333  func (m *rangeKeyMasking) SkipPoint(userKey []byte) bool {
   334  	m.parent.stats.RangeKeyStats.ContainedPoints++
   335  	if m.maskSpan == nil {
   336  		// No range key is currently acting as a mask, so don't skip.
   337  		return false
   338  	}
   339  	// Range key masking is enabled and the current span includes a range key
   340  	// that is being used as a mask. (NB: SpanChanged already verified that the
   341  	// range key's suffix is ≥ RangeKeyMasking.Suffix).
   342  	//
   343  	// This point key falls within the bounds of the range key (guaranteed by
   344  	// the InterleavingIter). Skip the point key if the range key's suffix is
   345  	// greater than the point key's suffix.
   346  	pointSuffix := userKey[m.split(userKey):]
   347  	if len(pointSuffix) > 0 && m.cmp(m.maskActiveSuffix, pointSuffix) < 0 {
   348  		m.parent.stats.RangeKeyStats.SkippedPoints++
   349  		return true
   350  	}
   351  	return false
   352  }
   353  
   354  // The iteratorRangeKeyState type implements the sstable package's
   355  // BoundLimitedBlockPropertyFilter interface in order to use block property
   356  // filters for range key masking. The iteratorRangeKeyState implementation wraps
   357  // the block-property filter provided in Options.RangeKeyMasking.Filter.
   358  //
   359  // Using a block-property filter for range-key masking requires limiting the
   360  // filter's effect to the bounds of the range key currently acting as a mask.
   361  // Consider the range key [a,m)@10, and an iterator positioned just before the
   362  // below block, bounded by index separators `c` and `z`:
   363  //
   364  //	          c                          z
   365  //	   x      |  c@9 c@5 c@1 d@7 e@4 y@4 | ...
   366  //	iter pos
   367  //
   368  // The next block cannot be skipped, despite the range key suffix @10 is greater
   369  // than all the block's keys' suffixes, because it contains a key (y@4) outside
   370  // the bounds of the range key.
   371  //
   372  // This extended BoundLimitedBlockPropertyFilter interface adds two new methods,
   373  // KeyIsWithinLowerBound and KeyIsWithinUpperBound, for testing whether a
   374  // particular block is within bounds.
   375  //
   376  // The iteratorRangeKeyState implements these new methods by first checking if
   377  // the iterator is currently positioned within a range key. If not, the provided
   378  // key is considered out-of-bounds. If the iterator is positioned within a range
   379  // key, it compares the corresponding range key bound.
   380  var _ sstable.BoundLimitedBlockPropertyFilter = (*rangeKeyMasking)(nil)
   381  
   382  // Name implements the limitedBlockPropertyFilter interface defined in the
   383  // sstable package by passing through to the user-defined block property filter.
   384  func (m *rangeKeyMasking) Name() string {
   385  	return m.filter.Name()
   386  }
   387  
   388  // Intersects implements the limitedBlockPropertyFilter interface defined in the
   389  // sstable package by passing the intersection decision to the user-provided
   390  // block property filter only if a range key is covering the current iterator
   391  // position.
   392  func (m *rangeKeyMasking) Intersects(prop []byte) (bool, error) {
   393  	if m.maskSpan == nil {
   394  		// No span is actively masking.
   395  		return true, nil
   396  	}
   397  	return m.filter.Intersects(prop)
   398  }
   399  
   400  // KeyIsWithinLowerBound implements the limitedBlockPropertyFilter interface
   401  // defined in the sstable package. It's used to restrict the masking block
   402  // property filter to only applying within the bounds of the active range key.
   403  func (m *rangeKeyMasking) KeyIsWithinLowerBound(key []byte) bool {
   404  	// Invariant: m.maskSpan != nil
   405  	//
   406  	// The provided `key` is an inclusive lower bound of the block we're
   407  	// considering skipping.
   408  	return m.cmp(m.maskSpan.Start, key) <= 0
   409  }
   410  
   411  // KeyIsWithinUpperBound implements the limitedBlockPropertyFilter interface
   412  // defined in the sstable package. It's used to restrict the masking block
   413  // property filter to only applying within the bounds of the active range key.
   414  func (m *rangeKeyMasking) KeyIsWithinUpperBound(key []byte) bool {
   415  	// Invariant: m.maskSpan != nil
   416  	//
   417  	// The provided `key` is an *inclusive* upper bound of the block we're
   418  	// considering skipping, so the range key's end must be strictly greater
   419  	// than the block bound for the block to be within bounds.
   420  	return m.cmp(m.maskSpan.End, key) > 0
   421  }
   422  
   423  // lazyCombinedIter implements the internalIterator interface, wrapping a
   424  // pointIter. It requires the pointIter's the levelIters be configured with
   425  // pointers to its combinedIterState. When the levelIter observes a file
   426  // containing a range key, the lazyCombinedIter constructs the combined
   427  // range+point key iterator stack and switches to it.
   428  type lazyCombinedIter struct {
   429  	// parent holds a pointer to the root *pebble.Iterator containing this
   430  	// iterator. It's used to mutate the internalIterator in use when switching
   431  	// to combined iteration.
   432  	parent            *Iterator
   433  	pointIter         internalIterator
   434  	combinedIterState combinedIterState
   435  }
   436  
   437  // combinedIterState encapsulates the current state of combined iteration.
   438  // Various low-level iterators (mergingIter, leveliter) hold pointers to the
   439  // *pebble.Iterator's combinedIterState. This allows them to check whether or
   440  // not they must monitor for files containing range keys (!initialized), or not.
   441  //
   442  // When !initialized, low-level iterators watch for files containing range keys.
   443  // When one is discovered, they set triggered=true and key to the smallest
   444  // (forward direction) or largest (reverse direction) range key that's been
   445  // observed.
   446  type combinedIterState struct {
   447  	// key holds the smallest (forward direction) or largest (backward
   448  	// direction) user key from a range key bound discovered during the iterator
   449  	// operation that triggered the switch to combined iteration.
   450  	//
   451  	// Slices stored here must be stable. This is possible because callers pass
   452  	// a Smallest/Largest bound from a fileMetadata, which are immutable. A key
   453  	// slice's bytes must not be overwritten.
   454  	key         []byte
   455  	triggered   bool
   456  	initialized bool
   457  }
   458  
   459  // Assert that *lazyCombinedIter implements internalIterator.
   460  var _ internalIterator = (*lazyCombinedIter)(nil)
   461  
   462  // initCombinedIteration is invoked after a pointIter positioning operation
   463  // resulted in i.combinedIterState.triggered=true.
   464  //
   465  // The `dir` parameter is `+1` or `-1` indicating forward iteration or backward
   466  // iteration respectively.
   467  //
   468  // The `pointKey` and `pointValue` parameters provide the new point key-value
   469  // pair that the iterator was just positioned to. The combined iterator should
   470  // be seeded with this point key-value pair and return the smaller (forward
   471  // iteration) or largest (backward iteration) of the two.
   472  //
   473  // The `seekKey` parameter is non-nil only if the iterator operation that
   474  // triggered the switch to combined iteration was a SeekGE, SeekPrefixGE or
   475  // SeekLT. It provides the seek key supplied and is used to seek the range-key
   476  // iterator using the same key. This is necessary for SeekGE/SeekPrefixGE
   477  // operations that land in the middle of a range key and must truncate to the
   478  // user-provided seek key.
   479  func (i *lazyCombinedIter) initCombinedIteration(
   480  	dir int8, pointKey *InternalKey, pointValue base.LazyValue, seekKey []byte,
   481  ) (*InternalKey, base.LazyValue) {
   482  	// Invariant: i.parent.rangeKey is nil.
   483  	// Invariant: !i.combinedIterState.initialized.
   484  	if invariants.Enabled {
   485  		if i.combinedIterState.initialized {
   486  			panic("pebble: combined iterator already initialized")
   487  		}
   488  		if i.parent.rangeKey != nil {
   489  			panic("pebble: iterator already has a range-key iterator stack")
   490  		}
   491  	}
   492  
   493  	// We need to determine the key to seek the range key iterator to. If
   494  	// seekKey is not nil, the user-initiated operation that triggered the
   495  	// switch to combined iteration was itself a seek, and we can use that key.
   496  	// Otherwise, a First/Last or relative positioning operation triggered the
   497  	// switch to combined iteration.
   498  	//
   499  	// The levelIter that observed a file containing range keys populated
   500  	// combinedIterState.key with the smallest (forward) or largest (backward)
   501  	// range key it observed. If multiple levelIters observed files with range
   502  	// keys during the same operation on the mergingIter, combinedIterState.key
   503  	// is the smallest [during forward iteration; largest in reverse iteration]
   504  	// such key.
   505  	if seekKey == nil {
   506  		// Use the levelIter-populated key.
   507  		seekKey = i.combinedIterState.key
   508  
   509  		// We may need to adjust the levelIter-populated seek key to the
   510  		// surfaced point key. If the key observed is beyond [in the iteration
   511  		// direction] the current point key, there may still exist a range key
   512  		// at an earlier key. Consider the following example:
   513  		//
   514  		//   L5:  000003:[bar.DEL.5, foo.RANGEKEYSET.9]
   515  		//   L6:  000001:[bar.SET.2] 000002:[bax.RANGEKEYSET.8]
   516  		//
   517  		// A call to First() seeks the levels to files L5.000003 and L6.000001.
   518  		// The L5 levelIter observes that L5.000003 contains the range key with
   519  		// start key `foo`, and triggers a switch to combined iteration, setting
   520  		// `combinedIterState.key` = `foo`.
   521  		//
   522  		// The L6 levelIter did not observe the true first range key
   523  		// (bax.RANGEKEYSET.8), because it appears in a later sstable. When the
   524  		// combined iterator is initialized, the range key iterator must be
   525  		// seeked to a key that will find `bax`. To accomplish this, we seek the
   526  		// key instead to `bar`. It is guaranteed that no range key exists
   527  		// earlier than `bar`, otherwise a levelIter would've observed it and
   528  		// set `combinedIterState.key` to its start key.
   529  		if pointKey != nil {
   530  			if dir == +1 && i.parent.cmp(i.combinedIterState.key, pointKey.UserKey) > 0 {
   531  				seekKey = pointKey.UserKey
   532  			} else if dir == -1 && i.parent.cmp(seekKey, pointKey.UserKey) < 0 {
   533  				seekKey = pointKey.UserKey
   534  			}
   535  		}
   536  	}
   537  
   538  	// An operation on the point iterator observed a file containing range keys,
   539  	// so we must switch to combined interleaving iteration. First, construct
   540  	// the range key iterator stack. It must not exist, otherwise we'd already
   541  	// be performing combined iteration.
   542  	i.parent.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState)
   543  	i.parent.rangeKey.init(i.parent.comparer.Compare, i.parent.comparer.Split, &i.parent.opts)
   544  	i.parent.constructRangeKeyIter()
   545  
   546  	// Initialize the Iterator's interleaving iterator.
   547  	i.parent.rangeKey.iiter.Init(
   548  		&i.parent.comparer, i.parent.pointIter, i.parent.rangeKey.rangeKeyIter,
   549  		keyspan.InterleavingIterOpts{
   550  			Mask:       &i.parent.rangeKeyMasking,
   551  			LowerBound: i.parent.opts.LowerBound,
   552  			UpperBound: i.parent.opts.UpperBound,
   553  		})
   554  
   555  	// Set the parent's primary iterator to point to the combined, interleaving
   556  	// iterator that's now initialized with our current state.
   557  	i.parent.iter = &i.parent.rangeKey.iiter
   558  	i.combinedIterState.initialized = true
   559  	i.combinedIterState.key = nil
   560  
   561  	// All future iterator operations will go directly through the combined
   562  	// iterator.
   563  	//
   564  	// Initialize the interleaving iterator. We pass the point key-value pair so
   565  	// that the interleaving iterator knows where the point iterator is
   566  	// positioned. Additionally, we pass the seek key to which the range-key
   567  	// iterator should be seeked in order to initialize its position.
   568  	//
   569  	// In the forward direction (invert for backwards), the seek key is a key
   570  	// guaranteed to find the smallest range key that's greater than the last
   571  	// key the iterator returned. The range key may be less than pointKey, in
   572  	// which case the range key will be interleaved next instead of the point
   573  	// key.
   574  	if dir == +1 {
   575  		var prefix []byte
   576  		if i.parent.hasPrefix {
   577  			prefix = i.parent.prefixOrFullSeekKey
   578  		}
   579  		return i.parent.rangeKey.iiter.InitSeekGE(prefix, seekKey, pointKey, pointValue)
   580  	}
   581  	return i.parent.rangeKey.iiter.InitSeekLT(seekKey, pointKey, pointValue)
   582  }
   583  
   584  func (i *lazyCombinedIter) SeekGE(
   585  	key []byte, flags base.SeekGEFlags,
   586  ) (*InternalKey, base.LazyValue) {
   587  	if i.combinedIterState.initialized {
   588  		return i.parent.rangeKey.iiter.SeekGE(key, flags)
   589  	}
   590  	k, v := i.pointIter.SeekGE(key, flags)
   591  	if i.combinedIterState.triggered {
   592  		return i.initCombinedIteration(+1, k, v, key)
   593  	}
   594  	return k, v
   595  }
   596  
   597  func (i *lazyCombinedIter) SeekPrefixGE(
   598  	prefix, key []byte, flags base.SeekGEFlags,
   599  ) (*InternalKey, base.LazyValue) {
   600  	if i.combinedIterState.initialized {
   601  		return i.parent.rangeKey.iiter.SeekPrefixGE(prefix, key, flags)
   602  	}
   603  	k, v := i.pointIter.SeekPrefixGE(prefix, key, flags)
   604  	if i.combinedIterState.triggered {
   605  		return i.initCombinedIteration(+1, k, v, key)
   606  	}
   607  	return k, v
   608  }
   609  
   610  func (i *lazyCombinedIter) SeekLT(
   611  	key []byte, flags base.SeekLTFlags,
   612  ) (*InternalKey, base.LazyValue) {
   613  	if i.combinedIterState.initialized {
   614  		return i.parent.rangeKey.iiter.SeekLT(key, flags)
   615  	}
   616  	k, v := i.pointIter.SeekLT(key, flags)
   617  	if i.combinedIterState.triggered {
   618  		return i.initCombinedIteration(-1, k, v, key)
   619  	}
   620  	return k, v
   621  }
   622  
   623  func (i *lazyCombinedIter) First() (*InternalKey, base.LazyValue) {
   624  	if i.combinedIterState.initialized {
   625  		return i.parent.rangeKey.iiter.First()
   626  	}
   627  	k, v := i.pointIter.First()
   628  	if i.combinedIterState.triggered {
   629  		return i.initCombinedIteration(+1, k, v, nil)
   630  	}
   631  	return k, v
   632  }
   633  
   634  func (i *lazyCombinedIter) Last() (*InternalKey, base.LazyValue) {
   635  	if i.combinedIterState.initialized {
   636  		return i.parent.rangeKey.iiter.Last()
   637  	}
   638  	k, v := i.pointIter.Last()
   639  	if i.combinedIterState.triggered {
   640  		return i.initCombinedIteration(-1, k, v, nil)
   641  	}
   642  	return k, v
   643  }
   644  
   645  func (i *lazyCombinedIter) Next() (*InternalKey, base.LazyValue) {
   646  	if i.combinedIterState.initialized {
   647  		return i.parent.rangeKey.iiter.Next()
   648  	}
   649  	k, v := i.pointIter.Next()
   650  	if i.combinedIterState.triggered {
   651  		return i.initCombinedIteration(+1, k, v, nil)
   652  	}
   653  	return k, v
   654  }
   655  
   656  func (i *lazyCombinedIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
   657  	if i.combinedIterState.initialized {
   658  		return i.parent.rangeKey.iiter.NextPrefix(succKey)
   659  	}
   660  	k, v := i.pointIter.NextPrefix(succKey)
   661  	if i.combinedIterState.triggered {
   662  		return i.initCombinedIteration(+1, k, v, nil)
   663  	}
   664  	return k, v
   665  }
   666  
   667  func (i *lazyCombinedIter) Prev() (*InternalKey, base.LazyValue) {
   668  	if i.combinedIterState.initialized {
   669  		return i.parent.rangeKey.iiter.Prev()
   670  	}
   671  	k, v := i.pointIter.Prev()
   672  	if i.combinedIterState.triggered {
   673  		return i.initCombinedIteration(-1, k, v, nil)
   674  	}
   675  	return k, v
   676  }
   677  
   678  func (i *lazyCombinedIter) Error() error {
   679  	if i.combinedIterState.initialized {
   680  		return i.parent.rangeKey.iiter.Error()
   681  	}
   682  	return i.pointIter.Error()
   683  }
   684  
   685  func (i *lazyCombinedIter) Close() error {
   686  	if i.combinedIterState.initialized {
   687  		return i.parent.rangeKey.iiter.Close()
   688  	}
   689  	return i.pointIter.Close()
   690  }
   691  
   692  func (i *lazyCombinedIter) SetBounds(lower, upper []byte) {
   693  	if i.combinedIterState.initialized {
   694  		i.parent.rangeKey.iiter.SetBounds(lower, upper)
   695  		return
   696  	}
   697  	i.pointIter.SetBounds(lower, upper)
   698  }
   699  
   700  func (i *lazyCombinedIter) SetContext(ctx context.Context) {
   701  	if i.combinedIterState.initialized {
   702  		i.parent.rangeKey.iiter.SetContext(ctx)
   703  		return
   704  	}
   705  	i.pointIter.SetContext(ctx)
   706  }
   707  
   708  func (i *lazyCombinedIter) String() string {
   709  	if i.combinedIterState.initialized {
   710  		return i.parent.rangeKey.iiter.String()
   711  	}
   712  	return i.pointIter.String()
   713  }