github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/base/iterator.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package base
     6  
     7  import "fmt"
     8  
     9  // InternalIterator iterates over a DB's key/value pairs in key order. Unlike
    10  // the Iterator interface, the returned keys are InternalKeys composed of the
    11  // user-key, a sequence number and a key kind. In forward iteration, key/value
    12  // pairs for identical user-keys are returned in descending sequence order. In
    13  // reverse iteration, key/value pairs for identical user-keys are returned in
    14  // ascending sequence order.
    15  //
    16  // InternalIterators provide 5 absolute positioning methods and 2 relative
    17  // positioning methods. The absolute positioning methods are:
    18  //
    19  // - SeekGE
    20  // - SeekPrefixGE
    21  // - SeekLT
    22  // - First
    23  // - Last
    24  //
    25  // The relative positioning methods are:
    26  //
    27  // - Next
    28  // - Prev
    29  //
    30  // The relative positioning methods can be used in conjunction with any of the
    31  // absolute positioning methods with one exception: SeekPrefixGE does not
    32  // support reverse iteration via Prev. It is undefined to call relative
    33  // positioning methods without ever calling an absolute positioning method.
    34  //
    35  // InternalIterators can optionally implement a prefix iteration mode. This
    36  // mode is entered by calling SeekPrefixGE and exited by any other absolute
    37  // positioning method (SeekGE, SeekLT, First, Last). When in prefix iteration
    38  // mode, a call to Next will advance to the next key which has the same
    39  // "prefix" as the one supplied to SeekPrefixGE. Note that "prefix" in this
    40  // context is not a strict byte prefix, but defined by byte equality for the
    41  // result of the Comparer.Split method. An InternalIterator is not required to
    42  // support prefix iteration mode, and can implement SeekPrefixGE by forwarding
    43  // to SeekGE.
    44  //
    45  // Bounds, [lower, upper), can be set on iterators, either using the SetBounds()
    46  // function in the interface, or in implementation specific ways during iterator
    47  // creation. The forward positioning routines (SeekGE, First, and Next) only
    48  // check the upper bound. The reverse positioning routines (SeekLT, Last, and
    49  // Prev) only check the lower bound. It is up to the caller to ensure that the
    50  // forward positioning routines respect the lower bound and the reverse
    51  // positioning routines respect the upper bound (i.e. calling SeekGE instead of
    52  // First if there is a lower bound, and SeekLT instead of Last if there is an
    53  // upper bound). This imposition is done in order to elevate that enforcement to
    54  // the caller (generally bitalostable.Iterator or bitalostable.mergingIter) rather than
    55  // having it duplicated in every InternalIterator implementation.
    56  //
    57  // Additionally, the caller needs to ensure that SeekGE/SeekPrefixGE are not
    58  // called with a key > the upper bound, and SeekLT is not called with a key <
    59  // the lower bound. InternalIterator implementations are required to respect
    60  // the iterator bounds, never returning records outside of the bounds with one
    61  // exception: an iterator may generate synthetic RANGEDEL marker records. See
    62  // levelIter.syntheticBoundary for the sole existing example of this behavior.
    63  // Specifically, levelIter can return synthetic keys whose user key is equal to
    64  // the lower/upper bound.
    65  //
    66  // The bounds provided to an internal iterator must remain valid until a
    67  // subsequent call to SetBounds has returned. This requirement exists so that
    68  // iterator implementations may compare old and new bounds to apply low-level
    69  // optimizations. The bitalostable.Iterator satisfies this requirement by maintaining
    70  // two bound buffers and switching between them.
    71  //
    72  // An iterator must be closed after use, but it is not necessary to read an
    73  // iterator until exhaustion.
    74  //
    75  // An iterator is not goroutine-safe, but it is safe to use multiple iterators
    76  // concurrently, either in separate goroutines or switching between the
    77  // iterators in a single goroutine.
    78  //
    79  // It is also safe to use an iterator concurrently with modifying its
    80  // underlying DB, if that DB permits modification. However, the resultant
    81  // key/value pairs are not guaranteed to be a consistent snapshot of that DB
    82  // at a particular point in time.
    83  //
    84  // InternalIterators accumulate errors encountered during operation, exposing
    85  // them through the Error method. All of the absolute positioning methods
    86  // reset any accumulated error before positioning. Relative positioning
    87  // methods return without advancing if the iterator has accumulated an error.
    88  type InternalIterator interface {
    89  	// SeekGE moves the iterator to the first key/value pair whose key is greater
    90  	// than or equal to the given key. Returns the key and value if the iterator
    91  	// is pointing at a valid entry, and (nil, nil) otherwise. Note that SeekGE
    92  	// only checks the upper bound. It is up to the caller to ensure that key
    93  	// is greater than or equal to the lower bound.
    94  	SeekGE(key []byte, flags SeekGEFlags) (*InternalKey, []byte)
    95  
    96  	// SeekPrefixGE moves the iterator to the first key/value pair whose key is
    97  	// greater than or equal to the given key. Returns the key and value if the
    98  	// iterator is pointing at a valid entry, and (nil, nil) otherwise. Note that
    99  	// SeekPrefixGE only checks the upper bound. It is up to the caller to ensure
   100  	// that key is greater than or equal to the lower bound.
   101  	//
   102  	// The prefix argument is used by some InternalIterator implementations (e.g.
   103  	// sstable.Reader) to avoid expensive operations. A user-defined Split
   104  	// function must be supplied to the Comparer for the DB. The supplied prefix
   105  	// will be the prefix of the given key returned by that Split function. If
   106  	// the iterator is able to determine that no key with the prefix exists, it
   107  	// can return (nil,nil). Unlike SeekGE, this is not an indication that
   108  	// iteration is exhausted.
   109  	//
   110  	// Note that the iterator may return keys not matching the prefix. It is up
   111  	// to the caller to check if the prefix matches.
   112  	//
   113  	// Calling SeekPrefixGE places the receiver into prefix iteration mode. Once
   114  	// in this mode, reverse iteration may not be supported and will return an
   115  	// error. Note that bitalostable/Iterator.SeekPrefixGE has this same restriction on
   116  	// not supporting reverse iteration in prefix iteration mode until a
   117  	// different positioning routine (SeekGE, SeekLT, First or Last) switches the
   118  	// iterator out of prefix iteration.
   119  	SeekPrefixGE(prefix, key []byte, flags SeekGEFlags) (*InternalKey, []byte)
   120  
   121  	// SeekLT moves the iterator to the last key/value pair whose key is less
   122  	// than the given key. Returns the key and value if the iterator is pointing
   123  	// at a valid entry, and (nil, nil) otherwise. Note that SeekLT only checks
   124  	// the lower bound. It is up to the caller to ensure that key is less than
   125  	// the upper bound.
   126  	SeekLT(key []byte, flags SeekLTFlags) (*InternalKey, []byte)
   127  
   128  	// First moves the iterator the the first key/value pair. Returns the key and
   129  	// value if the iterator is pointing at a valid entry, and (nil, nil)
   130  	// otherwise. Note that First only checks the upper bound. It is up to the
   131  	// caller to ensure that First() is not called when there is a lower bound,
   132  	// and instead call SeekGE(lower).
   133  	First() (*InternalKey, []byte)
   134  
   135  	// Last moves the iterator the the last key/value pair. Returns the key and
   136  	// value if the iterator is pointing at a valid entry, and (nil, nil)
   137  	// otherwise. Note that Last only checks the lower bound. It is up to the
   138  	// caller to ensure that Last() is not called when there is an upper bound,
   139  	// and instead call SeekLT(upper).
   140  	Last() (*InternalKey, []byte)
   141  
   142  	// Next moves the iterator to the next key/value pair. Returns the key and
   143  	// value if the iterator is pointing at a valid entry, and (nil, nil)
   144  	// otherwise. Note that Next only checks the upper bound. It is up to the
   145  	// caller to ensure that key is greater than or equal to the lower bound.
   146  	//
   147  	// It is valid to call Next when the iterator is positioned before the first
   148  	// key/value pair due to either a prior call to SeekLT or Prev which returned
   149  	// (nil, nil). It is not allowed to call Next when the previous call to SeekGE,
   150  	// SeekPrefixGE or Next returned (nil, nil).
   151  	Next() (*InternalKey, []byte)
   152  
   153  	// Prev moves the iterator to the previous key/value pair. Returns the key
   154  	// and value if the iterator is pointing at a valid entry, and (nil, nil)
   155  	// otherwise. Note that Prev only checks the lower bound. It is up to the
   156  	// caller to ensure that key is less than the upper bound.
   157  	//
   158  	// It is valid to call Prev when the iterator is positioned after the last
   159  	// key/value pair due to either a prior call to SeekGE or Next which returned
   160  	// (nil, nil). It is not allowed to call Prev when the previous call to SeekLT
   161  	// or Prev returned (nil, nil).
   162  	Prev() (*InternalKey, []byte)
   163  
   164  	// Error returns any accumulated error.
   165  	Error() error
   166  
   167  	// Close closes the iterator and returns any accumulated error. Exhausting
   168  	// all the key/value pairs in a table is not considered to be an error.
   169  	// It is valid to call Close multiple times. Other methods should not be
   170  	// called after the iterator has been closed.
   171  	Close() error
   172  
   173  	// SetBounds sets the lower and upper bounds for the iterator. Note that the
   174  	// result of Next and Prev will be undefined until the iterator has been
   175  	// repositioned with SeekGE, SeekPrefixGE, SeekLT, First, or Last.
   176  	//
   177  	// The bounds provided must remain valid until a subsequent call to
   178  	// SetBounds has returned. This requirement exists so that iterator
   179  	// implementations may compare old and new bounds to apply low-level
   180  	// optimizations.
   181  	SetBounds(lower, upper []byte)
   182  
   183  	fmt.Stringer
   184  }
   185  
   186  // SeekGEFlags holds flags that may configure the behavior of a forward seek.
   187  // Not all flags are relevant to all iterators.
   188  type SeekGEFlags uint8
   189  
   190  const (
   191  	seekGEFlagTrySeekUsingNext uint8 = iota
   192  	seekGEFlagRelativeSeek
   193  )
   194  
   195  // SeekGEFlagsNone is the default value of SeekGEFlags, with all flags disabled.
   196  const SeekGEFlagsNone = SeekGEFlags(0)
   197  
   198  // TrySeekUsingNext indicates whether a performance optimization was enabled
   199  // by a caller, indicating the caller has not done any action to move this
   200  // iterator beyond the first key that would be found if this iterator were to
   201  // honestly do the intended seek. For example, say the caller did a
   202  // SeekGE(k1...), followed by SeekGE(k2...) where k1 <= k2, without any
   203  // intermediate positioning calls. The caller can safely specify true for this
   204  // parameter in the second call. As another example, say the caller did do one
   205  // call to Next between the two Seek calls, and k1 < k2. Again, the caller can
   206  // safely specify a true value for this parameter. Note that a false value is
   207  // always safe. The callee is free to ignore the true value if its
   208  // implementation does not permit this optimization.
   209  //
   210  // We make the caller do this determination since a string comparison of k1, k2
   211  // is not necessarily cheap, and there may be many iterators in the iterator
   212  // stack. Doing it once at the root of the iterator stack is cheaper.
   213  //
   214  // This optimization could also be applied to SeekLT (where it would be
   215  // trySeekUsingPrev). We currently only do it for SeekPrefixGE and SeekGE
   216  // because this is where this optimization helps the performance of CockroachDB.
   217  // The SeekLT cases in CockroachDB are typically accompanied with bounds that
   218  // change between seek calls, and is optimized inside certain iterator
   219  // implementations, like singleLevelIterator, without any extra parameter
   220  // passing (though the same amortization of string comparisons could be done to
   221  // improve that optimization, by making the root of the iterator stack do it).
   222  func (s SeekGEFlags) TrySeekUsingNext() bool { return (s & (1 << seekGEFlagTrySeekUsingNext)) != 0 }
   223  
   224  // RelativeSeek is set when in the course of a forward positioning operation, a
   225  // higher-level iterator seeks a lower-level iterator to a larger key than the
   226  // one at the current iterator position.
   227  //
   228  // Concretely, this occurs when the merging iterator observes a range deletion
   229  // covering the key at a level's current position, and the merging iterator
   230  // seeks the level to the range deletion's end key. During lazy-combined
   231  // iteration, this flag signals to the level iterator that the seek is NOT an
   232  // absolute-positioning operation from the perspective of the bitalostable.Iterator,
   233  // and the level iterator must look for range keys in tables between the current
   234  // iterator position and the new seeked position.
   235  func (s SeekGEFlags) RelativeSeek() bool { return (s & (1 << seekGEFlagRelativeSeek)) != 0 }
   236  
   237  // EnableTrySeekUsingNext returns the provided flags with the
   238  // try-seek-using-next optimization enabled. See TrySeekUsingNext for an
   239  // explanation of this optimization.
   240  func (s SeekGEFlags) EnableTrySeekUsingNext() SeekGEFlags {
   241  	return s | (1 << seekGEFlagTrySeekUsingNext)
   242  }
   243  
   244  // DisableTrySeekUsingNext returns the provided flags with the
   245  // try-seek-using-next optimization disabled.
   246  func (s SeekGEFlags) DisableTrySeekUsingNext() SeekGEFlags {
   247  	return s &^ (1 << seekGEFlagTrySeekUsingNext)
   248  }
   249  
   250  // EnableRelativeSeek returns the provided flags with the relative-seek flag
   251  // enabled. See RelativeSeek for an explanation of this flag's use.
   252  func (s SeekGEFlags) EnableRelativeSeek() SeekGEFlags {
   253  	return s | (1 << seekGEFlagRelativeSeek)
   254  }
   255  
   256  // DisableRelativeSeek returns the provided flags with the relative-seek flag
   257  // disabled.
   258  func (s SeekGEFlags) DisableRelativeSeek() SeekGEFlags {
   259  	return s &^ (1 << seekGEFlagRelativeSeek)
   260  }
   261  
   262  // SeekLTFlags holds flags that may configure the behavior of a reverse seek.
   263  // Not all flags are relevant to all iterators.
   264  type SeekLTFlags uint8
   265  
   266  const (
   267  	seekLTFlagRelativeSeek uint8 = iota
   268  )
   269  
   270  // SeekLTFlagsNone is the default value of SeekLTFlags, with all flags disabled.
   271  const SeekLTFlagsNone = SeekLTFlags(0)
   272  
   273  // RelativeSeek is set when in the course of a reverse positioning operation, a
   274  // higher-level iterator seeks a lower-level iterator to a smaller key than the
   275  // one at the current iterator position.
   276  //
   277  // Concretely, this occurs when the merging iterator observes a range deletion
   278  // covering the key at a level's current position, and the merging iterator
   279  // seeks the level to the range deletion's start key. During lazy-combined
   280  // iteration, this flag signals to the level iterator that the seek is NOT an
   281  // absolute-positioning operation from the perspective of the bitalostable.Iterator,
   282  // and the level iterator must look for range keys in tables between the current
   283  // iterator position and the new seeked position.
   284  func (s SeekLTFlags) RelativeSeek() bool { return s&(1<<seekLTFlagRelativeSeek) != 0 }
   285  
   286  // EnableRelativeSeek returns the provided flags with the relative-seek flag
   287  // enabled. See RelativeSeek for an explanation of this flag's use.
   288  func (s SeekLTFlags) EnableRelativeSeek() SeekLTFlags {
   289  	return s | (1 << seekLTFlagRelativeSeek)
   290  }
   291  
   292  // DisableRelativeSeek returns the provided flags with the relative-seek flag
   293  // disabled.
   294  func (s SeekLTFlags) DisableRelativeSeek() SeekLTFlags {
   295  	return s &^ (1 << seekLTFlagRelativeSeek)
   296  }
   297  
   298  // InternalIteratorStats contains miscellaneous stats produced by
   299  // InternalIterators that are part of the InternalIterator tree. Not every
   300  // field is relevant for an InternalIterator implementation. The field values
   301  // are aggregated as one goes up the InternalIterator tree.
   302  type InternalIteratorStats struct {
   303  	// Bytes in the loaded blocks. If the block was compressed, this is the
   304  	// compressed bytes. Currently, only the second-level index and data blocks
   305  	// containing points are included.
   306  	BlockBytes uint64
   307  	// Subset of BlockBytes that were in the block cache.
   308  	BlockBytesInCache uint64
   309  
   310  	// The following can repeatedly count the same points if they are iterated
   311  	// over multiple times. Additionally, they may count a point twice when
   312  	// switching directions. The latter could be improved if needed.
   313  
   314  	// Bytes in keys that were iterated over. Currently, only point keys are
   315  	// included.
   316  	KeyBytes uint64
   317  	// Bytes in values that were iterated over. Currently, only point values are
   318  	// included.
   319  	ValueBytes uint64
   320  	// The count of points iterated over.
   321  	PointCount uint64
   322  	// Points that were iterated over that were covered by range tombstones. It
   323  	// can be useful for discovering instances of
   324  	// https://github.com/zuoyebang/bitalostable/issues/1070.
   325  	PointsCoveredByRangeTombstones uint64
   326  }
   327  
   328  // Merge merges the stats in from into the given stats.
   329  func (s *InternalIteratorStats) Merge(from InternalIteratorStats) {
   330  	s.BlockBytes += from.BlockBytes
   331  	s.BlockBytesInCache += from.BlockBytesInCache
   332  	s.KeyBytes += from.KeyBytes
   333  	s.ValueBytes += from.ValueBytes
   334  	s.PointCount += from.PointCount
   335  	s.PointsCoveredByRangeTombstones += from.PointsCoveredByRangeTombstones
   336  }