github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/defragment.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  
    10  	"github.com/zuoyebang/bitalostable/internal/base"
    11  	"github.com/zuoyebang/bitalostable/internal/invariants"
    12  )
    13  
    14  // bufferReuseMaxCapacity is the maximum capacity of a DefragmentingIter buffer
    15  // that DefragmentingIter will reuse. Buffers greater than this will be
    16  // discarded and reallocated as necessary.
    17  const bufferReuseMaxCapacity = 10 << 10 // 10 KB
    18  
    19  // DefragmentMethod configures the defragmentation performed by the
    20  // DefragmentingIter.
    21  type DefragmentMethod interface {
    22  	// ShouldDefragment takes two abutting spans and returns whether the two
    23  	// spans should be combined into a single, defragmented Span.
    24  	ShouldDefragment(equal base.Equal, left, right *Span) bool
    25  }
    26  
    27  // The DefragmentMethodFunc type is an adapter to allow the use of ordinary
    28  // functions as DefragmentMethods. If f is a function with the appropriate
    29  // signature, DefragmentMethodFunc(f) is a DefragmentMethod that calls f.
    30  type DefragmentMethodFunc func(equal base.Equal, left, right *Span) bool
    31  
    32  // ShouldDefragment calls f(equal, left, right).
    33  func (f DefragmentMethodFunc) ShouldDefragment(equal base.Equal, left, right *Span) bool {
    34  	return f(equal, left, right)
    35  }
    36  
    37  // DefragmentInternal configures a DefragmentingIter to defragment spans
    38  // only if they have identical keys. It requires spans' keys to be sorted in
    39  // trailer descending order.
    40  //
    41  // This defragmenting method is intended for use in compactions that may see
    42  // internal range keys fragments that may now be joined, because the state that
    43  // required their fragmentation has been dropped.
    44  var DefragmentInternal DefragmentMethod = DefragmentMethodFunc(func(equal base.Equal, a, b *Span) bool {
    45  	if a.KeysOrder != ByTrailerDesc || b.KeysOrder != ByTrailerDesc {
    46  		panic("bitalostable: span keys unexpectedly not in trailer descending order")
    47  	}
    48  	if len(a.Keys) != len(b.Keys) {
    49  		return false
    50  	}
    51  	for i := range a.Keys {
    52  		if a.Keys[i].Trailer != b.Keys[i].Trailer {
    53  			return false
    54  		}
    55  		if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) {
    56  			return false
    57  		}
    58  		if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) {
    59  			return false
    60  		}
    61  	}
    62  	return true
    63  })
    64  
    65  // DefragmentReducer merges the current and next Key slices, returning a new Key
    66  // slice.
    67  //
    68  // Implementations should modify and return `cur` to save on allocations, or
    69  // consider allocating a new slice, as the `cur` slice may be retained by the
    70  // DefragmentingIter and mutated. The `next` slice must not be mutated.
    71  //
    72  // The incoming slices are sorted by (SeqNum, Kind) descending. The output slice
    73  // must also have this sort order.
    74  type DefragmentReducer func(cur, next []Key) []Key
    75  
    76  // StaticDefragmentReducer is a no-op DefragmentReducer that simply returns the
    77  // current key slice, effectively retaining the first set of keys encountered
    78  // for a defragmented span.
    79  //
    80  // This reducer can be used, for example, when the set of Keys for each Span
    81  // being reduced is not expected to change, and therefore the keys from the
    82  // first span encountered can be used without considering keys in subsequent
    83  // spans.
    84  var StaticDefragmentReducer DefragmentReducer = func(cur, _ []Key) []Key {
    85  	return cur
    86  }
    87  
    88  // iterPos is an enum indicating the position of the defragmenting iter's
    89  // wrapped iter. The defragmenting iter must look ahead or behind when
    90  // defragmenting forward or backwards respectively, and this enum records that
    91  // current position.
    92  type iterPos int8
    93  
    94  const (
    95  	iterPosPrev iterPos = -1
    96  	iterPosCurr iterPos = 0
    97  	iterPosNext iterPos = +1
    98  )
    99  
   100  // DefragmentingIter wraps a key span iterator, defragmenting physical
   101  // fragmentation during iteration.
   102  //
   103  // During flushes and compactions, keys applied over a span may be split at
   104  // sstable boundaries. This fragmentation can produce internal key bounds that
   105  // do not match any of the bounds ever supplied to a user operation. This
   106  // physical fragmentation is necessary to avoid excessively wide sstables.
   107  //
   108  // The defragmenting iterator undoes this physical fragmentation, joining spans
   109  // with abutting bounds and equal state. The defragmenting iterator takes a
   110  // DefragmentMethod to determine what is "equal state" for a span. The
   111  // DefragmentMethod is a function type, allowing arbitrary comparisons between
   112  // Span keys.
   113  //
   114  // Seeking (SeekGE, SeekLT) poses an obstacle to defragmentation. A seek may
   115  // land on a physical fragment in the middle of several fragments that must be
   116  // defragmented. A seek first degfragments in the opposite direction of
   117  // iteration to find the beginning of the defragmented span, and then
   118  // defragments in the iteration direction, ensuring it's found a whole
   119  // defragmented span.
   120  type DefragmentingIter struct {
   121  	comparer *base.Comparer
   122  	equal    base.Equal
   123  	iter     FragmentIterator
   124  	iterSpan *Span
   125  	iterPos  iterPos
   126  
   127  	// curr holds the span at the current iterator position. currBuf is a buffer
   128  	// for use when copying user keys for curr. keysBuf is a buffer for use when
   129  	// copying Keys for curr. currBuf is cleared between positioning methods.
   130  	//
   131  	// keyBuf is a buffer specifically for the defragmented start key when
   132  	// defragmenting backwards or the defragmented end key when defragmenting
   133  	// forwards. These bounds are overwritten repeatedly during defragmentation,
   134  	// and the defragmentation routines overwrite keyBuf repeatedly to store
   135  	// these extended bounds.
   136  	curr    Span
   137  	currBuf []byte
   138  	keysBuf []Key
   139  	keyBuf  []byte
   140  
   141  	// method is a comparison function for two spans. method is called when two
   142  	// spans are abutting to determine whether they may be defragmented.
   143  	// method does not itself check for adjacency for the two spans.
   144  	method DefragmentMethod
   145  
   146  	// reduce is the reducer function used to collect Keys across all spans that
   147  	// constitute a defragmented span.
   148  	reduce DefragmentReducer
   149  }
   150  
   151  // Assert that *DefragmentingIter implements the FragmentIterator interface.
   152  var _ FragmentIterator = (*DefragmentingIter)(nil)
   153  
   154  // Init initializes the defragmenting iter using the provided defragment
   155  // method.
   156  func (i *DefragmentingIter) Init(
   157  	comparer *base.Comparer, iter FragmentIterator, equal DefragmentMethod, reducer DefragmentReducer,
   158  ) {
   159  	*i = DefragmentingIter{
   160  		comparer: comparer,
   161  		equal:    comparer.Equal,
   162  		iter:     iter,
   163  		method:   equal,
   164  		reduce:   reducer,
   165  	}
   166  }
   167  
   168  // Error returns any accumulated error.
   169  func (i *DefragmentingIter) Error() error {
   170  	return i.iter.Error()
   171  }
   172  
   173  // Close closes the underlying iterators.
   174  func (i *DefragmentingIter) Close() error {
   175  	return i.iter.Close()
   176  }
   177  
   178  // SeekGE seeks the iterator to the first span with a start key greater than or
   179  // equal to key and returns it.
   180  func (i *DefragmentingIter) SeekGE(key []byte) *Span {
   181  	i.iterSpan = i.iter.SeekGE(key)
   182  	if i.iterSpan == nil {
   183  		i.iterPos = iterPosCurr
   184  		return nil
   185  	} else if i.iterSpan.Empty() {
   186  		i.iterPos = iterPosCurr
   187  		return i.iterSpan
   188  	}
   189  	// Save the current span and peek backwards.
   190  	i.saveCurrent()
   191  	i.iterSpan = i.iter.Prev()
   192  	if i.iterSpan != nil && i.equal(i.curr.Start, i.iterSpan.End) && i.checkEqual(i.iterSpan, &i.curr) {
   193  		// A continuation. The span we originally landed on and defragmented
   194  		// backwards has a true Start key < key. To obey the FragmentIterator
   195  		// contract, we must not return this defragmented span. Defragment
   196  		// forward to finish defragmenting the span in the forward direction.
   197  		i.defragmentForward()
   198  
   199  		// Now we must be on a span that truly has a defragmented Start key >
   200  		// key.
   201  		return i.defragmentForward()
   202  	}
   203  
   204  	// The span previous to i.curr does not defragment, so we should return it.
   205  	// Next the underlying iterator back onto the span we previously saved to
   206  	// i.curr and then defragment forward.
   207  	i.iterSpan = i.iter.Next()
   208  	return i.defragmentForward()
   209  }
   210  
   211  // SeekLT seeks the iterator to the last span with a start key less than
   212  // key and returns it.
   213  func (i *DefragmentingIter) SeekLT(key []byte) *Span {
   214  	i.iterSpan = i.iter.SeekLT(key)
   215  	if i.iterSpan == nil {
   216  		i.iterPos = iterPosCurr
   217  		return nil
   218  	} else if i.iterSpan.Empty() {
   219  		i.iterPos = iterPosCurr
   220  		return i.iterSpan
   221  	}
   222  	// Defragment forward to find the end of the defragmented span.
   223  	i.defragmentForward()
   224  	if i.iterPos == iterPosNext {
   225  		// Prev once back onto the span.
   226  		i.iterSpan = i.iter.Prev()
   227  	}
   228  	// Defragment the full span from its end.
   229  	return i.defragmentBackward()
   230  }
   231  
   232  // First seeks the iterator to the first span and returns it.
   233  func (i *DefragmentingIter) First() *Span {
   234  	i.iterSpan = i.iter.First()
   235  	if i.iterSpan == nil {
   236  		i.iterPos = iterPosCurr
   237  		return nil
   238  	}
   239  	return i.defragmentForward()
   240  }
   241  
   242  // Last seeks the iterator to the last span and returns it.
   243  func (i *DefragmentingIter) Last() *Span {
   244  	i.iterSpan = i.iter.Last()
   245  	if i.iterSpan == nil {
   246  		i.iterPos = iterPosCurr
   247  		return nil
   248  	}
   249  	return i.defragmentBackward()
   250  }
   251  
   252  // Next advances to the next span and returns it.
   253  func (i *DefragmentingIter) Next() *Span {
   254  	switch i.iterPos {
   255  	case iterPosPrev:
   256  		// Switching directions; The iterator is currently positioned over the
   257  		// last span of the previous set of fragments. In the below diagram,
   258  		// the iterator is positioned over the last span that contributes to
   259  		// the defragmented x position. We want to be positioned over the first
   260  		// span that contributes to the z position.
   261  		//
   262  		//   x x x y y y z z z
   263  		//       ^       ^
   264  		//      old     new
   265  		//
   266  		// Next once to move onto y, defragment forward to land on the first z
   267  		// position.
   268  		i.iterSpan = i.iter.Next()
   269  		if invariants.Enabled && i.iterSpan == nil {
   270  			panic("bitalostable: invariant violation: no next span while switching directions")
   271  		}
   272  		// We're now positioned on the first span that was defragmented into the
   273  		// current iterator position. Skip over the rest of the current iterator
   274  		// position's constitutent fragments. In the above example, this would
   275  		// land on the first 'z'.
   276  		i.defragmentForward()
   277  		if i.iterSpan == nil {
   278  			i.iterPos = iterPosCurr
   279  			return nil
   280  		}
   281  
   282  		// Now that we're positioned over the first of the next set of
   283  		// fragments, defragment forward.
   284  		return i.defragmentForward()
   285  	case iterPosCurr:
   286  		// iterPosCurr is only used when the iter is exhausted or when the iterator
   287  		// is at an empty span.
   288  		if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() {
   289  			panic("bitalostable: invariant violation: iterPosCurr with valid iterSpan")
   290  		}
   291  
   292  		i.iterSpan = i.iter.Next()
   293  		if i.iterSpan == nil {
   294  			return nil
   295  		}
   296  		return i.defragmentForward()
   297  	case iterPosNext:
   298  		// Already at the next span.
   299  		if i.iterSpan == nil {
   300  			i.iterPos = iterPosCurr
   301  			return nil
   302  		}
   303  		return i.defragmentForward()
   304  	default:
   305  		panic("unreachable")
   306  	}
   307  }
   308  
   309  // Prev steps back to the previous span and returns it.
   310  func (i *DefragmentingIter) Prev() *Span {
   311  	switch i.iterPos {
   312  	case iterPosPrev:
   313  		// Already at the previous span.
   314  		if i.iterSpan == nil {
   315  			i.iterPos = iterPosCurr
   316  			return nil
   317  		}
   318  		return i.defragmentBackward()
   319  	case iterPosCurr:
   320  		// iterPosCurr is only used when the iter is exhausted or when the iterator
   321  		// is at an empty span.
   322  		if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() {
   323  			panic("bitalostable: invariant violation: iterPosCurr with valid iterSpan")
   324  		}
   325  
   326  		i.iterSpan = i.iter.Prev()
   327  		if i.iterSpan == nil {
   328  			return nil
   329  		}
   330  		return i.defragmentBackward()
   331  	case iterPosNext:
   332  		// Switching directions; The iterator is currently positioned over the
   333  		// first fragment of the next set of fragments. In the below diagram,
   334  		// the iterator is positioned over the first span that contributes to
   335  		// the defragmented z position. We want to be positioned over the last
   336  		// span that contributes to the x position.
   337  		//
   338  		//   x x x y y y z z z
   339  		//       ^       ^
   340  		//      new     old
   341  		//
   342  		// Prev once to move onto y, defragment backward to land on the last x
   343  		// position.
   344  		i.iterSpan = i.iter.Prev()
   345  		if invariants.Enabled && i.iterSpan == nil {
   346  			panic("bitalostable: invariant violation: no previous span while switching directions")
   347  		}
   348  		// We're now positioned on the last span that was defragmented into the
   349  		// current iterator position. Skip over the rest of the current iterator
   350  		// position's constitutent fragments. In the above example, this would
   351  		// land on the last 'x'.
   352  		i.defragmentBackward()
   353  
   354  		// Now that we're positioned over the last of the prev set of
   355  		// fragments, defragment backward.
   356  		if i.iterSpan == nil {
   357  			i.iterPos = iterPosCurr
   358  			return nil
   359  		}
   360  		return i.defragmentBackward()
   361  	default:
   362  		panic("unreachable")
   363  	}
   364  }
   365  
   366  // checkEqual checks the two spans for logical equivalence. It uses the passed-in
   367  // DefragmentMethod and ensures both spans are NOT empty; not defragmenting empty
   368  // spans is an optimization that lets us load fewer sstable blocks.
   369  func (i *DefragmentingIter) checkEqual(left, right *Span) bool {
   370  	return (!left.Empty() && !right.Empty()) && i.method.ShouldDefragment(i.equal, i.iterSpan, &i.curr)
   371  }
   372  
   373  // defragmentForward defragments spans in the forward direction, starting from
   374  // i.iter's current position. The span at the current position must be non-nil,
   375  // but may be Empty().
   376  func (i *DefragmentingIter) defragmentForward() *Span {
   377  	if i.iterSpan.Empty() {
   378  		// An empty span will never be equal to another span; see checkEqual for
   379  		// why. To avoid loading non-empty range keys further ahead by calling Next,
   380  		// return early.
   381  		i.iterPos = iterPosCurr
   382  		return i.iterSpan
   383  	}
   384  	i.saveCurrent()
   385  
   386  	i.iterPos = iterPosNext
   387  	i.iterSpan = i.iter.Next()
   388  	for i.iterSpan != nil {
   389  		if !i.equal(i.curr.End, i.iterSpan.Start) {
   390  			// Not a continuation.
   391  			break
   392  		}
   393  		if !i.checkEqual(i.iterSpan, &i.curr) {
   394  			// Not a continuation.
   395  			break
   396  		}
   397  		i.keyBuf = append(i.keyBuf[:0], i.iterSpan.End...)
   398  		i.curr.End = i.keyBuf
   399  		i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys)
   400  		i.iterSpan = i.iter.Next()
   401  	}
   402  	i.curr.Keys = i.keysBuf
   403  	return &i.curr
   404  }
   405  
   406  // defragmentBackward defragments spans in the backward direction, starting from
   407  // i.iter's current position. The span at the current position must be non-nil,
   408  // but may be Empty().
   409  func (i *DefragmentingIter) defragmentBackward() *Span {
   410  	if i.iterSpan.Empty() {
   411  		// An empty span will never be equal to another span; see checkEqual for
   412  		// why. To avoid loading non-empty range keys further ahead by calling Next,
   413  		// return early.
   414  		i.iterPos = iterPosCurr
   415  		return i.iterSpan
   416  	}
   417  	i.saveCurrent()
   418  
   419  	i.iterPos = iterPosPrev
   420  	i.iterSpan = i.iter.Prev()
   421  	for i.iterSpan != nil {
   422  		if !i.equal(i.curr.Start, i.iterSpan.End) {
   423  			// Not a continuation.
   424  			break
   425  		}
   426  		if !i.checkEqual(i.iterSpan, &i.curr) {
   427  			// Not a continuation.
   428  			break
   429  		}
   430  		i.keyBuf = append(i.keyBuf[:0], i.iterSpan.Start...)
   431  		i.curr.Start = i.keyBuf
   432  		i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys)
   433  		i.iterSpan = i.iter.Prev()
   434  	}
   435  	i.curr.Keys = i.keysBuf
   436  	return &i.curr
   437  }
   438  
   439  func (i *DefragmentingIter) saveCurrent() {
   440  	i.currBuf = i.currBuf[:0]
   441  	i.keysBuf = i.keysBuf[:0]
   442  	i.keyBuf = i.keyBuf[:0]
   443  	if cap(i.currBuf) > bufferReuseMaxCapacity {
   444  		i.currBuf = nil
   445  	}
   446  	if cap(i.keyBuf) > bufferReuseMaxCapacity {
   447  		i.keyBuf = nil
   448  	}
   449  	if i.iterSpan == nil {
   450  		return
   451  	}
   452  	i.curr = Span{
   453  		Start:     i.saveBytes(i.iterSpan.Start),
   454  		End:       i.saveBytes(i.iterSpan.End),
   455  		KeysOrder: i.iterSpan.KeysOrder,
   456  	}
   457  	for j := range i.iterSpan.Keys {
   458  		i.keysBuf = append(i.keysBuf, Key{
   459  			Trailer: i.iterSpan.Keys[j].Trailer,
   460  			Suffix:  i.saveBytes(i.iterSpan.Keys[j].Suffix),
   461  			Value:   i.saveBytes(i.iterSpan.Keys[j].Value),
   462  		})
   463  	}
   464  	i.curr.Keys = i.keysBuf
   465  }
   466  
   467  func (i *DefragmentingIter) saveBytes(b []byte) []byte {
   468  	if b == nil {
   469  		return nil
   470  	}
   471  	ret := append(i.currBuf, b...)
   472  	i.currBuf = ret[len(ret):]
   473  	return ret
   474  }