github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/defragment.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  
    10  	"github.com/cockroachdb/pebble/internal/base"
    11  	"github.com/cockroachdb/pebble/internal/bytealloc"
    12  	"github.com/cockroachdb/pebble/internal/invariants"
    13  )
    14  
    15  // bufferReuseMaxCapacity is the maximum capacity of a DefragmentingIter buffer
    16  // that DefragmentingIter will reuse. Buffers larger than this will be
    17  // discarded and reallocated as necessary.
    18  const bufferReuseMaxCapacity = 10 << 10 // 10 KB
    19  
    20  // keysReuseMaxCapacity is the maximum capacity of a []keyspan.Key buffer that
    21  // DefragmentingIter will reuse. Buffers larger than this will be discarded and
    22  // reallocated as necessary.
    23  const keysReuseMaxCapacity = 100
    24  
    25  // DefragmentMethod configures the defragmentation performed by the
    26  // DefragmentingIter.
    27  type DefragmentMethod interface {
    28  	// ShouldDefragment takes two abutting spans and returns whether the two
    29  	// spans should be combined into a single, defragmented Span.
    30  	ShouldDefragment(equal base.Equal, left, right *Span) bool
    31  }
    32  
    33  // The DefragmentMethodFunc type is an adapter to allow the use of ordinary
    34  // functions as DefragmentMethods. If f is a function with the appropriate
    35  // signature, DefragmentMethodFunc(f) is a DefragmentMethod that calls f.
    36  type DefragmentMethodFunc func(equal base.Equal, left, right *Span) bool
    37  
    38  // ShouldDefragment calls f(equal, left, right).
    39  func (f DefragmentMethodFunc) ShouldDefragment(equal base.Equal, left, right *Span) bool {
    40  	return f(equal, left, right)
    41  }
    42  
    43  // DefragmentInternal configures a DefragmentingIter to defragment spans
    44  // only if they have identical keys. It requires spans' keys to be sorted in
    45  // trailer descending order.
    46  //
    47  // This defragmenting method is intended for use in compactions that may see
    48  // internal range keys fragments that may now be joined, because the state that
    49  // required their fragmentation has been dropped.
    50  var DefragmentInternal DefragmentMethod = DefragmentMethodFunc(func(equal base.Equal, a, b *Span) bool {
    51  	if a.KeysOrder != ByTrailerDesc || b.KeysOrder != ByTrailerDesc {
    52  		panic("pebble: span keys unexpectedly not in trailer descending order")
    53  	}
    54  	if len(a.Keys) != len(b.Keys) {
    55  		return false
    56  	}
    57  	for i := range a.Keys {
    58  		if a.Keys[i].Trailer != b.Keys[i].Trailer {
    59  			return false
    60  		}
    61  		if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) {
    62  			return false
    63  		}
    64  		if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) {
    65  			return false
    66  		}
    67  	}
    68  	return true
    69  })
    70  
    71  // DefragmentReducer merges the current and next Key slices, returning a new Key
    72  // slice.
    73  //
    74  // Implementations should modify and return `cur` to save on allocations, or
    75  // consider allocating a new slice, as the `cur` slice may be retained by the
    76  // DefragmentingIter and mutated. The `next` slice must not be mutated.
    77  //
    78  // The incoming slices are sorted by (SeqNum, Kind) descending. The output slice
    79  // must also have this sort order.
    80  type DefragmentReducer func(cur, next []Key) []Key
    81  
    82  // StaticDefragmentReducer is a no-op DefragmentReducer that simply returns the
    83  // current key slice, effectively retaining the first set of keys encountered
    84  // for a defragmented span.
    85  //
    86  // This reducer can be used, for example, when the set of Keys for each Span
    87  // being reduced is not expected to change, and therefore the keys from the
    88  // first span encountered can be used without considering keys in subsequent
    89  // spans.
    90  var StaticDefragmentReducer DefragmentReducer = func(cur, _ []Key) []Key {
    91  	return cur
    92  }
    93  
    94  // iterPos is an enum indicating the position of the defragmenting iter's
    95  // wrapped iter. The defragmenting iter must look ahead or behind when
    96  // defragmenting forward or backwards respectively, and this enum records that
    97  // current position.
    98  type iterPos int8
    99  
   100  const (
   101  	iterPosPrev iterPos = -1
   102  	iterPosCurr iterPos = 0
   103  	iterPosNext iterPos = +1
   104  )
   105  
   106  // DefragmentingIter wraps a key span iterator, defragmenting physical
   107  // fragmentation during iteration.
   108  //
   109  // During flushes and compactions, keys applied over a span may be split at
   110  // sstable boundaries. This fragmentation can produce internal key bounds that
   111  // do not match any of the bounds ever supplied to a user operation. This
   112  // physical fragmentation is necessary to avoid excessively wide sstables.
   113  //
   114  // The defragmenting iterator undoes this physical fragmentation, joining spans
   115  // with abutting bounds and equal state. The defragmenting iterator takes a
   116  // DefragmentMethod to determine what is "equal state" for a span. The
   117  // DefragmentMethod is a function type, allowing arbitrary comparisons between
   118  // Span keys.
   119  //
   120  // Seeking (SeekGE, SeekLT) poses an obstacle to defragmentation. A seek may
   121  // land on a physical fragment in the middle of several fragments that must be
   122  // defragmented. A seek that lands in a fragment straddling the seek key must
   123  // first degfragment in the opposite direction of iteration to find the
   124  // beginning of the defragmented span, and then defragments in the iteration
   125  // direction, ensuring it's found a whole defragmented span.
   126  type DefragmentingIter struct {
   127  	// DefragmentingBuffers holds buffers used for copying iterator state.
   128  	*DefragmentingBuffers
   129  	comparer *base.Comparer
   130  	equal    base.Equal
   131  	iter     FragmentIterator
   132  	iterSpan *Span
   133  	iterPos  iterPos
   134  
   135  	// curr holds the span at the current iterator position.
   136  	curr Span
   137  
   138  	// method is a comparison function for two spans. method is called when two
   139  	// spans are abutting to determine whether they may be defragmented.
   140  	// method does not itself check for adjacency for the two spans.
   141  	method DefragmentMethod
   142  
   143  	// reduce is the reducer function used to collect Keys across all spans that
   144  	// constitute a defragmented span.
   145  	reduce DefragmentReducer
   146  }
   147  
   148  // DefragmentingBuffers holds buffers used for copying iterator state.
   149  type DefragmentingBuffers struct {
   150  	// currBuf is a buffer for use when copying user keys for curr. currBuf is
   151  	// cleared between positioning methods.
   152  	currBuf bytealloc.A
   153  	// keysBuf is a buffer for use when copying Keys for DefragmentingIter.curr.
   154  	keysBuf []Key
   155  	// keyBuf is a buffer specifically for the defragmented start key when
   156  	// defragmenting backwards or the defragmented end key when defragmenting
   157  	// forwards. These bounds are overwritten repeatedly during defragmentation,
   158  	// and the defragmentation routines overwrite keyBuf repeatedly to store
   159  	// these extended bounds.
   160  	keyBuf []byte
   161  }
   162  
   163  // PrepareForReuse discards any excessively large buffers.
   164  func (bufs *DefragmentingBuffers) PrepareForReuse() {
   165  	if cap(bufs.currBuf) > bufferReuseMaxCapacity {
   166  		bufs.currBuf = nil
   167  	}
   168  	if cap(bufs.keyBuf) > bufferReuseMaxCapacity {
   169  		bufs.keyBuf = nil
   170  	}
   171  	if cap(bufs.keysBuf) > keysReuseMaxCapacity {
   172  		bufs.keysBuf = nil
   173  	}
   174  }
   175  
   176  // Assert that *DefragmentingIter implements the FragmentIterator interface.
   177  var _ FragmentIterator = (*DefragmentingIter)(nil)
   178  
   179  // Init initializes the defragmenting iter using the provided defragment
   180  // method.
   181  func (i *DefragmentingIter) Init(
   182  	comparer *base.Comparer,
   183  	iter FragmentIterator,
   184  	equal DefragmentMethod,
   185  	reducer DefragmentReducer,
   186  	bufs *DefragmentingBuffers,
   187  ) {
   188  	*i = DefragmentingIter{
   189  		DefragmentingBuffers: bufs,
   190  		comparer:             comparer,
   191  		equal:                comparer.Equal,
   192  		iter:                 iter,
   193  		method:               equal,
   194  		reduce:               reducer,
   195  	}
   196  }
   197  
   198  // Error returns any accumulated error.
   199  func (i *DefragmentingIter) Error() error {
   200  	return i.iter.Error()
   201  }
   202  
   203  // Close closes the underlying iterators.
   204  func (i *DefragmentingIter) Close() error {
   205  	return i.iter.Close()
   206  }
   207  
   208  // SeekGE moves the iterator to the first span covering a key greater than or
   209  // equal to the given key. This is equivalent to seeking to the first span with
   210  // an end key greater than the given key.
   211  func (i *DefragmentingIter) SeekGE(key []byte) *Span {
   212  	i.iterSpan = i.iter.SeekGE(key)
   213  	if i.iterSpan == nil {
   214  		i.iterPos = iterPosCurr
   215  		return nil
   216  	} else if i.iterSpan.Empty() {
   217  		i.iterPos = iterPosCurr
   218  		return i.iterSpan
   219  	}
   220  	// If the span starts strictly after key, we know there mustn't be an
   221  	// earlier span that ends at i.iterSpan.Start, otherwise i.iter would've
   222  	// returned that span instead.
   223  	if i.comparer.Compare(i.iterSpan.Start, key) > 0 {
   224  		return i.defragmentForward()
   225  	}
   226  
   227  	// The span we landed on has a Start bound ≤ key. There may be additional
   228  	// fragments before this span. Defragment backward to find the start of the
   229  	// defragmented span.
   230  	i.defragmentBackward()
   231  
   232  	// Defragmenting backward may have stopped because it encountered an error.
   233  	// If so, we must not continue so that i.iter.Error() (and thus i.Error())
   234  	// yields the error.
   235  	if i.iterSpan == nil && i.iter.Error() != nil {
   236  		return nil
   237  	}
   238  
   239  	if i.iterPos == iterPosPrev {
   240  		// Next once back onto the span.
   241  		i.iterSpan = i.iter.Next()
   242  	}
   243  	// Defragment the full span from its start.
   244  	return i.defragmentForward()
   245  }
   246  
   247  // SeekLT moves the iterator to the last span covering a key less than the
   248  // given key. This is equivalent to seeking to the last span with a start
   249  // key less than the given key.
   250  func (i *DefragmentingIter) SeekLT(key []byte) *Span {
   251  	i.iterSpan = i.iter.SeekLT(key)
   252  	if i.iterSpan == nil {
   253  		i.iterPos = iterPosCurr
   254  		return nil
   255  	} else if i.iterSpan.Empty() {
   256  		i.iterPos = iterPosCurr
   257  		return i.iterSpan
   258  	}
   259  	// If the span ends strictly before key, we know there mustn't be a later
   260  	// span that starts at i.iterSpan.End, otherwise i.iter would've returned
   261  	// that span instead.
   262  	if i.comparer.Compare(i.iterSpan.End, key) < 0 {
   263  		return i.defragmentBackward()
   264  	}
   265  
   266  	// The span we landed on has a End bound ≥ key. There may be additional
   267  	// fragments after this span. Defragment forward to find the end of the
   268  	// defragmented span.
   269  	i.defragmentForward()
   270  
   271  	// Defragmenting forward may have stopped because it encountered an error.
   272  	// If so, we must not continue so that i.iter.Error() (and thus i.Error())
   273  	// yields the error.
   274  	if i.iterSpan == nil && i.iter.Error() != nil {
   275  		return nil
   276  	}
   277  
   278  	if i.iterPos == iterPosNext {
   279  		// Prev once back onto the span.
   280  		i.iterSpan = i.iter.Prev()
   281  	}
   282  	// Defragment the full span from its end.
   283  	return i.defragmentBackward()
   284  }
   285  
   286  // First seeks the iterator to the first span and returns it.
   287  func (i *DefragmentingIter) First() *Span {
   288  	i.iterSpan = i.iter.First()
   289  	if i.iterSpan == nil {
   290  		i.iterPos = iterPosCurr
   291  		return nil
   292  	}
   293  	return i.defragmentForward()
   294  }
   295  
   296  // Last seeks the iterator to the last span and returns it.
   297  func (i *DefragmentingIter) Last() *Span {
   298  	i.iterSpan = i.iter.Last()
   299  	if i.iterSpan == nil {
   300  		i.iterPos = iterPosCurr
   301  		return nil
   302  	}
   303  	return i.defragmentBackward()
   304  }
   305  
   306  // Next advances to the next span and returns it.
   307  func (i *DefragmentingIter) Next() *Span {
   308  	switch i.iterPos {
   309  	case iterPosPrev:
   310  		// Switching directions; The iterator is currently positioned over the
   311  		// last span of the previous set of fragments. In the below diagram,
   312  		// the iterator is positioned over the last span that contributes to
   313  		// the defragmented x position. We want to be positioned over the first
   314  		// span that contributes to the z position.
   315  		//
   316  		//   x x x y y y z z z
   317  		//       ^       ^
   318  		//      old     new
   319  		//
   320  		// Next once to move onto y, defragment forward to land on the first z
   321  		// position.
   322  		i.iterSpan = i.iter.Next()
   323  		if invariants.Enabled && i.iterSpan == nil && i.iter.Error() == nil {
   324  			panic("pebble: invariant violation: no next span while switching directions")
   325  		}
   326  		// We're now positioned on the first span that was defragmented into the
   327  		// current iterator position. Skip over the rest of the current iterator
   328  		// position's constitutent fragments. In the above example, this would
   329  		// land on the first 'z'.
   330  		i.defragmentForward()
   331  		if i.iterSpan == nil {
   332  			i.iterPos = iterPosCurr
   333  			return nil
   334  		}
   335  
   336  		// Now that we're positioned over the first of the next set of
   337  		// fragments, defragment forward.
   338  		return i.defragmentForward()
   339  	case iterPosCurr:
   340  		// iterPosCurr is only used when the iter is exhausted or when the iterator
   341  		// is at an empty span.
   342  		if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() {
   343  			panic("pebble: invariant violation: iterPosCurr with valid iterSpan")
   344  		}
   345  
   346  		i.iterSpan = i.iter.Next()
   347  		if i.iterSpan == nil {
   348  			return nil
   349  		}
   350  		return i.defragmentForward()
   351  	case iterPosNext:
   352  		// Already at the next span.
   353  		if i.iterSpan == nil {
   354  			i.iterPos = iterPosCurr
   355  			return nil
   356  		}
   357  		return i.defragmentForward()
   358  	default:
   359  		panic("unreachable")
   360  	}
   361  }
   362  
   363  // Prev steps back to the previous span and returns it.
   364  func (i *DefragmentingIter) Prev() *Span {
   365  	switch i.iterPos {
   366  	case iterPosPrev:
   367  		// Already at the previous span.
   368  		if i.iterSpan == nil {
   369  			i.iterPos = iterPosCurr
   370  			return nil
   371  		}
   372  		return i.defragmentBackward()
   373  	case iterPosCurr:
   374  		// iterPosCurr is only used when the iter is exhausted or when the iterator
   375  		// is at an empty span.
   376  		if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() {
   377  			panic("pebble: invariant violation: iterPosCurr with valid iterSpan")
   378  		}
   379  
   380  		i.iterSpan = i.iter.Prev()
   381  		if i.iterSpan == nil {
   382  			return nil
   383  		}
   384  		return i.defragmentBackward()
   385  	case iterPosNext:
   386  		// Switching directions; The iterator is currently positioned over the
   387  		// first fragment of the next set of fragments. In the below diagram,
   388  		// the iterator is positioned over the first span that contributes to
   389  		// the defragmented z position. We want to be positioned over the last
   390  		// span that contributes to the x position.
   391  		//
   392  		//   x x x y y y z z z
   393  		//       ^       ^
   394  		//      new     old
   395  		//
   396  		// Prev once to move onto y, defragment backward to land on the last x
   397  		// position.
   398  		i.iterSpan = i.iter.Prev()
   399  		if invariants.Enabled && i.iterSpan == nil && i.iter.Error() == nil {
   400  			panic("pebble: invariant violation: no previous span while switching directions")
   401  		}
   402  		// We're now positioned on the last span that was defragmented into the
   403  		// current iterator position. Skip over the rest of the current iterator
   404  		// position's constitutent fragments. In the above example, this would
   405  		// land on the last 'x'.
   406  		i.defragmentBackward()
   407  
   408  		// Now that we're positioned over the last of the prev set of
   409  		// fragments, defragment backward.
   410  		if i.iterSpan == nil {
   411  			i.iterPos = iterPosCurr
   412  			return nil
   413  		}
   414  		return i.defragmentBackward()
   415  	default:
   416  		panic("unreachable")
   417  	}
   418  }
   419  
   420  // checkEqual checks the two spans for logical equivalence. It uses the passed-in
   421  // DefragmentMethod and ensures both spans are NOT empty; not defragmenting empty
   422  // spans is an optimization that lets us load fewer sstable blocks.
   423  func (i *DefragmentingIter) checkEqual(left, right *Span) bool {
   424  	return (!left.Empty() && !right.Empty()) && i.method.ShouldDefragment(i.equal, i.iterSpan, &i.curr)
   425  }
   426  
   427  // defragmentForward defragments spans in the forward direction, starting from
   428  // i.iter's current position. The span at the current position must be non-nil,
   429  // but may be Empty().
   430  func (i *DefragmentingIter) defragmentForward() *Span {
   431  	if i.iterSpan.Empty() {
   432  		// An empty span will never be equal to another span; see checkEqual for
   433  		// why. To avoid loading non-empty range keys further ahead by calling Next,
   434  		// return early.
   435  		i.iterPos = iterPosCurr
   436  		return i.iterSpan
   437  	}
   438  	i.saveCurrent()
   439  
   440  	i.iterPos = iterPosNext
   441  	i.iterSpan = i.iter.Next()
   442  	for i.iterSpan != nil {
   443  		if !i.equal(i.curr.End, i.iterSpan.Start) {
   444  			// Not a continuation.
   445  			break
   446  		}
   447  		if !i.checkEqual(i.iterSpan, &i.curr) {
   448  			// Not a continuation.
   449  			break
   450  		}
   451  		i.keyBuf = append(i.keyBuf[:0], i.iterSpan.End...)
   452  		i.curr.End = i.keyBuf
   453  		i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys)
   454  		i.iterSpan = i.iter.Next()
   455  	}
   456  	// i.iterSpan == nil
   457  	//
   458  	// The inner iterator may return nil when it encounters an error. If there
   459  	// was an error, we don't know whether there is another span we should
   460  	// defragment or not. Return nil so that the caller knows they should check
   461  	// Error().
   462  	if i.iter.Error() != nil {
   463  		return nil
   464  	}
   465  	i.curr.Keys = i.keysBuf
   466  	return &i.curr
   467  }
   468  
   469  // defragmentBackward defragments spans in the backward direction, starting from
   470  // i.iter's current position. The span at the current position must be non-nil,
   471  // but may be Empty().
   472  func (i *DefragmentingIter) defragmentBackward() *Span {
   473  	if i.iterSpan.Empty() {
   474  		// An empty span will never be equal to another span; see checkEqual for
   475  		// why. To avoid loading non-empty range keys further ahead by calling Next,
   476  		// return early.
   477  		i.iterPos = iterPosCurr
   478  		return i.iterSpan
   479  	}
   480  	i.saveCurrent()
   481  
   482  	i.iterPos = iterPosPrev
   483  	i.iterSpan = i.iter.Prev()
   484  	for i.iterSpan != nil {
   485  		if !i.equal(i.curr.Start, i.iterSpan.End) {
   486  			// Not a continuation.
   487  			break
   488  		}
   489  		if !i.checkEqual(i.iterSpan, &i.curr) {
   490  			// Not a continuation.
   491  			break
   492  		}
   493  		i.keyBuf = append(i.keyBuf[:0], i.iterSpan.Start...)
   494  		i.curr.Start = i.keyBuf
   495  		i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys)
   496  		i.iterSpan = i.iter.Prev()
   497  	}
   498  	// i.iterSpan == nil
   499  	//
   500  	// The inner iterator may return nil when it encounters an error. If there
   501  	// was an error, we don't know whether there is another span we should
   502  	// defragment or not. Return nil so that the caller knows they should check
   503  	// Error().
   504  	if i.iter.Error() != nil {
   505  		return nil
   506  	}
   507  	i.curr.Keys = i.keysBuf
   508  	return &i.curr
   509  }
   510  
   511  func (i *DefragmentingIter) saveCurrent() {
   512  	i.currBuf.Reset()
   513  	i.keysBuf = i.keysBuf[:0]
   514  	i.keyBuf = i.keyBuf[:0]
   515  	if i.iterSpan == nil {
   516  		return
   517  	}
   518  	i.curr = Span{
   519  		Start:     i.saveBytes(i.iterSpan.Start),
   520  		End:       i.saveBytes(i.iterSpan.End),
   521  		KeysOrder: i.iterSpan.KeysOrder,
   522  	}
   523  	for j := range i.iterSpan.Keys {
   524  		i.keysBuf = append(i.keysBuf, Key{
   525  			Trailer: i.iterSpan.Keys[j].Trailer,
   526  			Suffix:  i.saveBytes(i.iterSpan.Keys[j].Suffix),
   527  			Value:   i.saveBytes(i.iterSpan.Keys[j].Value),
   528  		})
   529  	}
   530  	i.curr.Keys = i.keysBuf
   531  }
   532  
   533  func (i *DefragmentingIter) saveBytes(b []byte) []byte {
   534  	if b == nil {
   535  		return nil
   536  	}
   537  	i.currBuf, b = i.currBuf.Copy(b)
   538  	return b
   539  }