github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/level_iter.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/cockroachdb/pebble/internal/base"
    11  	"github.com/cockroachdb/pebble/internal/invariants"
    12  	"github.com/cockroachdb/pebble/internal/manifest"
    13  )
    14  
    15  // LevelIter provides a merged view of spans from sstables in a level.
    16  // It takes advantage of level invariants to only have one sstable span block
    17  // open at one time, opened using the newIter function passed in.
    18  type LevelIter struct {
    19  	cmp base.Compare
    20  	// Denotes the kind of key the level iterator should read. If the key type
    21  	// is KeyTypePoint, the level iterator will read range tombstones (which
    22  	// only affect point keys). If the key type is KeyTypeRange, the level
    23  	// iterator will read range keys. It is invalid to configure an iterator
    24  	// with the KeyTypePointAndRange key type.
    25  	//
    26  	// If key type is KeyTypePoint, no straddle spans are emitted between files,
    27  	// and point key bounds are used to find files instead of range key bounds.
    28  	//
    29  	// TODO(bilal): Straddle spans can safely be produced in rangedel mode once
    30  	// we can guarantee that we will never read sstables in a level that split
    31  	// user keys across them. This might be guaranteed in a future release, but
    32  	// as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when
    33  	// keyType == KeyTypePoint
    34  	keyType manifest.KeyType
    35  	// The LSM level this LevelIter is initialized for. Used in logging.
    36  	level manifest.Level
    37  	// The below fields are used to fill in gaps between adjacent files' range
    38  	// key spaces. This is an optimization to avoid unnecessarily loading files
    39  	// in cases where range keys are sparse and rare. dir is set by every
    40  	// positioning operation, straddleDir is set to dir whenever a straddling
    41  	// Span is synthesized and the last positioning operation returned a
    42  	// synthesized straddle span.
    43  	//
    44  	// Note that when a straddle span is initialized, iterFile is modified to
    45  	// point to the next file in the straddleDir direction. A change of direction
    46  	// on a straddle key therefore necessitates the value of iterFile to be
    47  	// reverted.
    48  	dir         int
    49  	straddle    Span
    50  	straddleDir int
    51  	// The iter for the current file (iterFile). It is nil under any of the
    52  	// following conditions:
    53  	// - files.Current() == nil
    54  	// - err != nil
    55  	// - straddleDir != 0, in which case iterFile is not nil and points to the
    56  	//   next file (in the straddleDir direction).
    57  	// - some other constraint, like the bounds in opts, caused the file at index to not
    58  	//   be relevant to the iteration.
    59  	iter FragmentIterator
    60  	// iterFile holds the current file.
    61  	// INVARIANT: iterFile = files.Current()
    62  	iterFile *manifest.FileMetadata
    63  	newIter  TableNewSpanIter
    64  	files    manifest.LevelIterator
    65  	err      error
    66  
    67  	// The options that were passed in.
    68  	tableOpts SpanIterOptions
    69  
    70  	// TODO(bilal): Add InternalIteratorStats.
    71  }
    72  
    73  // LevelIter implements the keyspan.FragmentIterator interface.
    74  var _ FragmentIterator = (*LevelIter)(nil)
    75  
    76  // NewLevelIter returns a LevelIter.
    77  func NewLevelIter(
    78  	opts SpanIterOptions,
    79  	cmp base.Compare,
    80  	newIter TableNewSpanIter,
    81  	files manifest.LevelIterator,
    82  	level manifest.Level,
    83  	keyType manifest.KeyType,
    84  ) *LevelIter {
    85  	l := &LevelIter{}
    86  	l.Init(opts, cmp, newIter, files, level, keyType)
    87  	return l
    88  }
    89  
    90  // Init initializes a LevelIter.
    91  func (l *LevelIter) Init(
    92  	opts SpanIterOptions,
    93  	cmp base.Compare,
    94  	newIter TableNewSpanIter,
    95  	files manifest.LevelIterator,
    96  	level manifest.Level,
    97  	keyType manifest.KeyType,
    98  ) {
    99  	l.err = nil
   100  	l.level = level
   101  	l.tableOpts = opts
   102  	l.cmp = cmp
   103  	l.iterFile = nil
   104  	l.newIter = newIter
   105  	switch keyType {
   106  	case manifest.KeyTypePoint:
   107  		l.keyType = keyType
   108  		l.files = files.Filter(keyType)
   109  	case manifest.KeyTypeRange:
   110  		l.keyType = keyType
   111  		l.files = files.Filter(keyType)
   112  	default:
   113  		panic(fmt.Sprintf("unsupported key type: %v", keyType))
   114  	}
   115  }
   116  
   117  func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata {
   118  	// Find the earliest file whose largest key is >= key.
   119  	//
   120  	// If the earliest file has its largest key == key and that largest key is a
   121  	// range deletion sentinel, we know that we manufactured this sentinel to convert
   122  	// the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum
   123  	// is the form of a range deletion sentinel which can contribute a largest key = end#sentinel).
   124  	// In this case we don't return this as the earliest file since there is nothing actually
   125  	// equal to key in it.
   126  
   127  	m := l.files.SeekGE(l.cmp, key)
   128  	for m != nil {
   129  		largestKey := m.LargestRangeKey
   130  		if l.keyType == manifest.KeyTypePoint {
   131  			largestKey = m.LargestPointKey
   132  		}
   133  		if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 {
   134  			break
   135  		}
   136  		m = l.files.Next()
   137  	}
   138  	return m
   139  }
   140  
   141  func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata {
   142  	// Find the last file whose smallest key is < key.
   143  	return l.files.SeekLT(l.cmp, key)
   144  }
   145  
   146  type loadFileReturnIndicator int8
   147  
   148  const (
   149  	noFileLoaded loadFileReturnIndicator = iota
   150  	fileAlreadyLoaded
   151  	newFileLoaded
   152  )
   153  
   154  func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator {
   155  	indicator := noFileLoaded
   156  	if l.iterFile == file {
   157  		if l.err != nil {
   158  			return noFileLoaded
   159  		}
   160  		if l.iter != nil {
   161  			// We are already at the file, but we would need to check for bounds.
   162  			// Set indicator accordingly.
   163  			indicator = fileAlreadyLoaded
   164  		}
   165  		// We were already at file, but don't have an iterator, probably because the file was
   166  		// beyond the iteration bounds. It may still be, but it is also possible that the bounds
   167  		// have changed. We handle that below.
   168  	}
   169  
   170  	// Note that LevelIter.Close() can be called multiple times.
   171  	if indicator != fileAlreadyLoaded {
   172  		if err := l.Close(); err != nil {
   173  			return noFileLoaded
   174  		}
   175  	}
   176  
   177  	l.iterFile = file
   178  	if file == nil {
   179  		return noFileLoaded
   180  	}
   181  	if indicator != fileAlreadyLoaded {
   182  		l.iter, l.err = l.newIter(file, l.tableOpts)
   183  		indicator = newFileLoaded
   184  	}
   185  	if l.err != nil {
   186  		return noFileLoaded
   187  	}
   188  	return indicator
   189  }
   190  
   191  // SeekGE implements keyspan.FragmentIterator.
   192  func (l *LevelIter) SeekGE(key []byte) *Span {
   193  	l.dir = +1
   194  	l.straddle = Span{}
   195  	l.straddleDir = 0
   196  	l.err = nil // clear cached iteration error
   197  
   198  	f := l.findFileGE(key)
   199  	if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 {
   200  		// Peek at the previous file.
   201  		prevFile := l.files.Prev()
   202  		l.files.Next()
   203  		if prevFile != nil {
   204  			// We could unconditionally return an empty span between the seek key and
   205  			// f.SmallestRangeKey, however if this span is to the left of all range
   206  			// keys on this level, it could lead to inconsistent behaviour in relative
   207  			// positioning operations. Consider this example, with a b-c range key:
   208  			//
   209  			// SeekGE(a) -> a-b:{}
   210  			// Next() -> b-c{(#5,RANGEKEYSET,@4,foo)}
   211  			// Prev() -> nil
   212  			//
   213  			// Iterators higher up in the iterator stack rely on this sort of relative
   214  			// positioning consistency.
   215  			//
   216  			// TODO(bilal): Investigate ways to be able to return straddle spans in
   217  			// cases similar to the above, while still retaining correctness.
   218  			// Return a straddling key instead of loading the file.
   219  			l.iterFile = f
   220  			if err := l.Close(); err != nil {
   221  				return l.verify(nil)
   222  			}
   223  			l.straddleDir = +1
   224  			l.straddle = Span{
   225  				Start: prevFile.LargestRangeKey.UserKey,
   226  				End:   f.SmallestRangeKey.UserKey,
   227  				Keys:  nil,
   228  			}
   229  			return l.verify(&l.straddle)
   230  		}
   231  	}
   232  	loadFileIndicator := l.loadFile(f, +1)
   233  	if loadFileIndicator == noFileLoaded {
   234  		return l.verify(nil)
   235  	}
   236  	if span := l.iter.SeekGE(key); span != nil {
   237  		return l.verify(span)
   238  	}
   239  	return l.skipEmptyFileForward()
   240  }
   241  
   242  // SeekLT implements keyspan.FragmentIterator.
   243  func (l *LevelIter) SeekLT(key []byte) *Span {
   244  	l.dir = -1
   245  	l.straddle = Span{}
   246  	l.straddleDir = 0
   247  	l.err = nil // clear cached iteration error
   248  
   249  	f := l.findFileLT(key)
   250  	if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 {
   251  		// Peek at the next file.
   252  		nextFile := l.files.Next()
   253  		l.files.Prev()
   254  		if nextFile != nil {
   255  			// We could unconditionally return an empty span between f.LargestRangeKey
   256  			// and the seek key, however if this span is to the right of all range keys
   257  			// on this level, it could lead to inconsistent behaviour in relative
   258  			// positioning operations. Consider this example, with a b-c range key:
   259  			//
   260  			// SeekLT(d) -> c-d:{}
   261  			// Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)}
   262  			// Next() -> nil
   263  			//
   264  			// Iterators higher up in the iterator stack rely on this sort of relative
   265  			// positioning consistency.
   266  			//
   267  			// TODO(bilal): Investigate ways to be able to return straddle spans in
   268  			// cases similar to the above, while still retaining correctness.
   269  			// Return a straddling key instead of loading the file.
   270  			l.iterFile = f
   271  			if err := l.Close(); err != nil {
   272  				return l.verify(nil)
   273  			}
   274  			l.straddleDir = -1
   275  			l.straddle = Span{
   276  				Start: f.LargestRangeKey.UserKey,
   277  				End:   nextFile.SmallestRangeKey.UserKey,
   278  				Keys:  nil,
   279  			}
   280  			return l.verify(&l.straddle)
   281  		}
   282  	}
   283  	if l.loadFile(f, -1) == noFileLoaded {
   284  		return l.verify(nil)
   285  	}
   286  	if span := l.iter.SeekLT(key); span != nil {
   287  		return l.verify(span)
   288  	}
   289  	return l.skipEmptyFileBackward()
   290  }
   291  
   292  // First implements keyspan.FragmentIterator.
   293  func (l *LevelIter) First() *Span {
   294  	l.dir = +1
   295  	l.straddle = Span{}
   296  	l.straddleDir = 0
   297  	l.err = nil // clear cached iteration error
   298  
   299  	if l.loadFile(l.files.First(), +1) == noFileLoaded {
   300  		return l.verify(nil)
   301  	}
   302  	if span := l.iter.First(); span != nil {
   303  		return l.verify(span)
   304  	}
   305  	return l.skipEmptyFileForward()
   306  }
   307  
   308  // Last implements keyspan.FragmentIterator.
   309  func (l *LevelIter) Last() *Span {
   310  	l.dir = -1
   311  	l.straddle = Span{}
   312  	l.straddleDir = 0
   313  	l.err = nil // clear cached iteration error
   314  
   315  	if l.loadFile(l.files.Last(), -1) == noFileLoaded {
   316  		return l.verify(nil)
   317  	}
   318  	if span := l.iter.Last(); span != nil {
   319  		return l.verify(span)
   320  	}
   321  	return l.skipEmptyFileBackward()
   322  }
   323  
   324  // Next implements keyspan.FragmentIterator.
   325  func (l *LevelIter) Next() *Span {
   326  	if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) {
   327  		return l.verify(nil)
   328  	}
   329  	if l.iter == nil && l.iterFile == nil {
   330  		// l.dir <= 0
   331  		return l.First()
   332  	}
   333  	l.dir = +1
   334  
   335  	if l.iter != nil {
   336  		if span := l.iter.Next(); span != nil {
   337  			return l.verify(span)
   338  		}
   339  	}
   340  	return l.skipEmptyFileForward()
   341  }
   342  
   343  // Prev implements keyspan.FragmentIterator.
   344  func (l *LevelIter) Prev() *Span {
   345  	if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) {
   346  		return l.verify(nil)
   347  	}
   348  	if l.iter == nil && l.iterFile == nil {
   349  		// l.dir >= 0
   350  		return l.Last()
   351  	}
   352  	l.dir = -1
   353  
   354  	if l.iter != nil {
   355  		if span := l.iter.Prev(); span != nil {
   356  			return l.verify(span)
   357  		}
   358  	}
   359  	return l.skipEmptyFileBackward()
   360  }
   361  
   362  func (l *LevelIter) skipEmptyFileForward() *Span {
   363  	if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
   364  		l.iterFile != nil && l.iter != nil {
   365  		// We were at a file that had spans. Check if the next file that has
   366  		// spans is not directly adjacent to the current file i.e. there is a
   367  		// gap in the span keyspace between the two files. In that case, synthesize
   368  		// a "straddle span" in l.straddle and return that.
   369  		//
   370  		// Straddle spans are not created in rangedel mode.
   371  		if err := l.Close(); err != nil {
   372  			l.err = err
   373  			return l.verify(nil)
   374  		}
   375  		startKey := l.iterFile.LargestRangeKey.UserKey
   376  		// Resetting l.iterFile without loading the file into l.iter is okay and
   377  		// does not change the logic in loadFile() as long as l.iter is also nil;
   378  		// which it should be due to the Close() call above.
   379  		l.iterFile = l.files.Next()
   380  		if l.iterFile == nil {
   381  			return l.verify(nil)
   382  		}
   383  		endKey := l.iterFile.SmallestRangeKey.UserKey
   384  		if l.cmp(startKey, endKey) < 0 {
   385  			// There is a gap between the two files. Synthesize a straddling span
   386  			// to avoid unnecessarily loading the next file.
   387  			l.straddle = Span{
   388  				Start: startKey,
   389  				End:   endKey,
   390  			}
   391  			l.straddleDir = +1
   392  			return l.verify(&l.straddle)
   393  		}
   394  	} else if l.straddleDir < 0 {
   395  		// We were at a straddle key, but are now changing directions. l.iterFile
   396  		// was already moved backward by skipEmptyFileBackward, so advance it
   397  		// forward.
   398  		l.iterFile = l.files.Next()
   399  	}
   400  	l.straddle = Span{}
   401  	l.straddleDir = 0
   402  	var span *Span
   403  	for span.Empty() {
   404  		fileToLoad := l.iterFile
   405  		if l.keyType == manifest.KeyTypePoint {
   406  			// We haven't iterated to the next file yet if we're in point key
   407  			// (rangedel) mode.
   408  			fileToLoad = l.files.Next()
   409  		}
   410  		if l.loadFile(fileToLoad, +1) == noFileLoaded {
   411  			return l.verify(nil)
   412  		}
   413  		span = l.iter.First()
   414  		// In rangedel mode, we can expect to get empty files that we'd need to
   415  		// skip over, but not in range key mode.
   416  		if l.keyType == manifest.KeyTypeRange {
   417  			break
   418  		}
   419  	}
   420  	return l.verify(span)
   421  }
   422  
   423  func (l *LevelIter) skipEmptyFileBackward() *Span {
   424  	// We were at a file that had spans. Check if the previous file that has
   425  	// spans is not directly adjacent to the current file i.e. there is a
   426  	// gap in the span keyspace between the two files. In that case, synthesize
   427  	// a "straddle span" in l.straddle and return that.
   428  	//
   429  	// Straddle spans are not created in rangedel mode.
   430  	if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
   431  		l.iterFile != nil && l.iter != nil {
   432  		if err := l.Close(); err != nil {
   433  			l.err = err
   434  			return l.verify(nil)
   435  		}
   436  		endKey := l.iterFile.SmallestRangeKey.UserKey
   437  		// Resetting l.iterFile without loading the file into l.iter is okay and
   438  		// does not change the logic in loadFile() as long as l.iter is also nil;
   439  		// which it should be due to the Close() call above.
   440  		l.iterFile = l.files.Prev()
   441  		if l.iterFile == nil {
   442  			return l.verify(nil)
   443  		}
   444  		startKey := l.iterFile.LargestRangeKey.UserKey
   445  		if l.cmp(startKey, endKey) < 0 {
   446  			// There is a gap between the two files. Synthesize a straddling span
   447  			// to avoid unnecessarily loading the next file.
   448  			l.straddle = Span{
   449  				Start: startKey,
   450  				End:   endKey,
   451  			}
   452  			l.straddleDir = -1
   453  			return l.verify(&l.straddle)
   454  		}
   455  	} else if l.straddleDir > 0 {
   456  		// We were at a straddle key, but are now changing directions. l.iterFile
   457  		// was already advanced forward by skipEmptyFileForward, so move it
   458  		// backward.
   459  		l.iterFile = l.files.Prev()
   460  	}
   461  	l.straddle = Span{}
   462  	l.straddleDir = 0
   463  	var span *Span
   464  	for span.Empty() {
   465  		fileToLoad := l.iterFile
   466  		if l.keyType == manifest.KeyTypePoint {
   467  			fileToLoad = l.files.Prev()
   468  		}
   469  		if l.loadFile(fileToLoad, -1) == noFileLoaded {
   470  			return l.verify(nil)
   471  		}
   472  		span = l.iter.Last()
   473  		// In rangedel mode, we can expect to get empty files that we'd need to
   474  		// skip over, but not in range key mode as the filter on the FileMetadata
   475  		// should guarantee we always get a non-empty file.
   476  		if l.keyType == manifest.KeyTypeRange {
   477  			break
   478  		}
   479  	}
   480  	return l.verify(span)
   481  }
   482  
   483  // verify is invoked whenever a span is returned from an iterator positioning
   484  // method to a caller. During invariant builds, it asserts invariants to the
   485  // caller.
   486  func (l *LevelIter) verify(s *Span) *Span {
   487  	// NB: Do not add any logic outside the invariants.Enabled conditional to
   488  	// ensure that verify is always compiled away in production builds.
   489  	if invariants.Enabled {
   490  		if f := l.files.Current(); f != l.iterFile {
   491  			panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged",
   492  				f, l.iterFile))
   493  		}
   494  	}
   495  	return s
   496  }
   497  
   498  // Error implements keyspan.FragmentIterator.
   499  func (l *LevelIter) Error() error {
   500  	if l.err != nil || l.iter == nil {
   501  		return l.err
   502  	}
   503  	return l.iter.Error()
   504  }
   505  
   506  // Close implements keyspan.FragmentIterator.
   507  func (l *LevelIter) Close() error {
   508  	if l.iter != nil {
   509  		l.err = l.iter.Close()
   510  		l.iter = nil
   511  	}
   512  	return l.err
   513  }
   514  
   515  // String implements keyspan.FragmentIterator.
   516  func (l *LevelIter) String() string {
   517  	if l.iterFile != nil {
   518  		return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum)
   519  	}
   520  	return fmt.Sprintf("%s: fileNum=<nil>", l.level)
   521  }