github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/rangekey/coalesce.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package rangekey
     6  
     7  import (
     8  	"bytes"
     9  	"sort"
    10  
    11  	"github.com/zuoyebang/bitalostable/internal/base"
    12  	"github.com/zuoyebang/bitalostable/internal/invariants"
    13  	"github.com/zuoyebang/bitalostable/internal/keyspan"
    14  	"github.com/zuoyebang/bitalostable/internal/manifest"
    15  )
    16  
    17  // UserIteratorConfig holds state for constructing the range key iterator stack
    18  // for user iteration.
    19  type UserIteratorConfig struct {
    20  	snapshot   uint64
    21  	comparer   *base.Comparer
    22  	miter      keyspan.MergingIter
    23  	biter      keyspan.BoundedIter
    24  	diter      keyspan.DefragmentingIter
    25  	liters     [manifest.NumLevels]keyspan.LevelIter
    26  	litersUsed int
    27  	sortBuf    keysBySuffix
    28  }
    29  
    30  // Init initializes the range key iterator stack for user iteration. The
    31  // resulting fragment iterator applies range key semantics, defragments spans
    32  // according to their user-observable state and removes all Keys other than
    33  // RangeKeySets describing the current state of range keys. The resulting spans
    34  // contain Keys sorted by Suffix.
    35  //
    36  // The snapshot sequence number parameter determines which keys are visible. Any
    37  // keys not visible at the provided snapshot are ignored.
    38  func (ui *UserIteratorConfig) Init(
    39  	comparer *base.Comparer,
    40  	snapshot uint64,
    41  	lower, upper []byte,
    42  	hasPrefix *bool,
    43  	prefix *[]byte,
    44  	iters ...keyspan.FragmentIterator,
    45  ) keyspan.FragmentIterator {
    46  	ui.snapshot = snapshot
    47  	ui.comparer = comparer
    48  	ui.miter.Init(comparer.Compare, ui, iters...)
    49  	ui.biter.Init(comparer.Compare, comparer.Split, &ui.miter, lower, upper, hasPrefix, prefix)
    50  	ui.diter.Init(comparer, &ui.biter, ui, keyspan.StaticDefragmentReducer)
    51  	ui.litersUsed = 0
    52  	return &ui.diter
    53  }
    54  
    55  // AddLevel adds a new level to the bottom of the iterator stack. AddLevel
    56  // must be called after Init and before any other method on the iterator.
    57  func (ui *UserIteratorConfig) AddLevel(iter keyspan.FragmentIterator) {
    58  	ui.miter.AddLevel(iter)
    59  }
    60  
    61  // NewLevelIter returns a pointer to a newly allocated or reused
    62  // keyspan.LevelIter. The caller is responsible for calling Init() on this
    63  // instance.
    64  func (ui *UserIteratorConfig) NewLevelIter() *keyspan.LevelIter {
    65  	if ui.litersUsed >= len(ui.liters) {
    66  		return &keyspan.LevelIter{}
    67  	}
    68  	ui.litersUsed++
    69  	return &ui.liters[ui.litersUsed-1]
    70  }
    71  
    72  // SetBounds propagates bounds to the iterator stack. The fragment iterator
    73  // interface ordinarily doesn't enforce bounds, so this is exposed as an
    74  // explicit method on the user iterator config.
    75  func (ui *UserIteratorConfig) SetBounds(lower, upper []byte) {
    76  	ui.biter.SetBounds(lower, upper)
    77  }
    78  
    79  // Transform implements the keyspan.Transformer interface for use with a
    80  // keyspan.MergingIter. It transforms spans by resolving range keys at the
    81  // provided snapshot sequence number. Shadowing of keys is resolved (eg, removal
    82  // of unset keys, removal of keys overwritten by a set at the same suffix, etc)
    83  // and then non-RangeKeySet keys are removed. The resulting transformed spans
    84  // only contain RangeKeySets describing the state visible at the provided
    85  // sequence number, and hold their Keys sorted by Suffix.
    86  func (ui *UserIteratorConfig) Transform(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error {
    87  	// Apply shadowing of keys.
    88  	dst.Start = s.Start
    89  	dst.End = s.End
    90  	ui.sortBuf = keysBySuffix{
    91  		cmp:  cmp,
    92  		keys: dst.Keys[:0],
    93  	}
    94  	if err := coalesce(&ui.sortBuf, s.Visible(ui.snapshot).Keys, &dst.Keys); err != nil {
    95  		return err
    96  	}
    97  	// During user iteration over range keys, unsets and deletes don't
    98  	// matter. Remove them. This step helps logical defragmentation during
    99  	// iteration.
   100  	keys := dst.Keys
   101  	dst.Keys = dst.Keys[:0]
   102  	for i := range keys {
   103  		switch keys[i].Kind() {
   104  		case base.InternalKeyKindRangeKeySet:
   105  			if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
   106  				panic("bitalostable: keys unexpectedly not in ascending suffix order")
   107  			}
   108  			dst.Keys = append(dst.Keys, keys[i])
   109  		case base.InternalKeyKindRangeKeyUnset:
   110  			if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 {
   111  				panic("bitalostable: keys unexpectedly not in ascending suffix order")
   112  			}
   113  			// Skip.
   114  			continue
   115  		case base.InternalKeyKindRangeKeyDelete:
   116  			// Skip.
   117  			continue
   118  		default:
   119  			return base.CorruptionErrorf("bitalostable: unrecognized range key kind %s", keys[i].Kind())
   120  		}
   121  	}
   122  	// coalesce results in dst.Keys being sorted by Suffix.
   123  	dst.KeysOrder = keyspan.BySuffixAsc
   124  	return nil
   125  }
   126  
   127  // ShouldDefragment implements the DefragmentMethod interface and configures a
   128  // DefragmentingIter to defragment spans of range keys if their user-visible
   129  // state is identical. This defragmenting method assumes the provided spans have
   130  // already been transformed through (UserIterationConfig).Transform, so all
   131  // RangeKeySets are user-visible sets and are already in Suffix order. This
   132  // defragmenter checks for equality between set suffixes and values (ignoring
   133  // sequence numbers). It's intended for use during user iteration, when the
   134  // wrapped keyspan iterator is merging spans across all levels of the LSM.
   135  func (ui *UserIteratorConfig) ShouldDefragment(equal base.Equal, a, b *keyspan.Span) bool {
   136  	// This implementation must only be used on spans that have transformed by
   137  	// ui.Transform. The transform applies shadowing, removes all keys besides
   138  	// the resulting Sets and sorts the keys by suffix. Since shadowing has been
   139  	// applied, each Set must set a unique suffix. If the two spans are
   140  	// equivalent, they must have the same number of range key sets.
   141  	if len(a.Keys) != len(b.Keys) || len(a.Keys) == 0 {
   142  		return false
   143  	}
   144  	if a.KeysOrder != keyspan.BySuffixAsc || b.KeysOrder != keyspan.BySuffixAsc {
   145  		panic("bitalostable: range key span's keys unexpectedly not in ascending suffix order")
   146  	}
   147  
   148  	ret := true
   149  	for i := range a.Keys {
   150  		if invariants.Enabled {
   151  			if a.Keys[i].Kind() != base.InternalKeyKindRangeKeySet ||
   152  				b.Keys[i].Kind() != base.InternalKeyKindRangeKeySet {
   153  				panic("bitalostable: unexpected non-RangeKeySet during defragmentation")
   154  			}
   155  			if i > 0 && (ui.comparer.Compare(a.Keys[i].Suffix, a.Keys[i-1].Suffix) < 0 ||
   156  				ui.comparer.Compare(b.Keys[i].Suffix, b.Keys[i-1].Suffix) < 0) {
   157  				panic("bitalostable: range keys not ordered by suffix during defragmentation")
   158  			}
   159  		}
   160  		if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) {
   161  			ret = false
   162  			break
   163  		}
   164  		if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) {
   165  			ret = false
   166  			break
   167  		}
   168  	}
   169  	return ret
   170  }
   171  
   172  // Coalesce imposes range key semantics and coalesces range keys with the same
   173  // bounds. Coalesce drops any keys shadowed by more recent sets, unsets or
   174  // deletes. Coalesce modifies the provided span's Keys slice, reslicing the
   175  // slice to remove dropped keys.
   176  //
   177  // Coalescence has subtle behavior with respect to sequence numbers. Coalesce
   178  // depends on a keyspan.Span's Keys being sorted in sequence number descending
   179  // order. The first key has the largest sequence number. The returned coalesced
   180  // span includes only the largest sequence number. All other sequence numbers
   181  // are forgotten. When a compaction constructs output range keys from a
   182  // coalesced span, it produces at most one RANGEKEYSET, one RANGEKEYUNSET and
   183  // one RANGEKEYDEL. Each one of these keys adopt the largest sequence number.
   184  //
   185  // This has the potentially surprising effect of 'promoting' a key to a higher
   186  // sequence number. This is okay, because:
   187  //   - There are no other overlapping keys within the coalesced span of
   188  //     sequence numbers (otherwise they would be in the compaction, due to
   189  //     the LSM invariant).
   190  //   - Range key sequence numbers are never compared to point key sequence
   191  //     numbers. Range keys and point keys have parallel existences.
   192  //   - Compactions only coalesce within snapshot stripes.
   193  //
   194  // Additionally, internal range keys at the same sequence number have subtle
   195  // mechanics:
   196  //   - RANGEKEYSETs shadow RANGEKEYUNSETs of the same suffix.
   197  //   - RANGEKEYDELs only apply to keys at lower sequence numbers.
   198  //
   199  // This is required for ingestion. Ingested sstables are assigned a single
   200  // sequence number for the file, at which all of the file's keys are visible.
   201  // The RANGEKEYSET, RANGEKEYUNSET and RANGEKEYDEL key kinds are ordered such
   202  // that among keys with equal sequence numbers (thus ordered by their kinds) the
   203  // keys do not affect one another. Ingested sstables are expected to be
   204  // consistent with respect to the set/unset suffixes: A given suffix should be
   205  // set or unset but not both.
   206  //
   207  // The resulting dst Keys slice is sorted by Trailer.
   208  func Coalesce(cmp base.Compare, keys []keyspan.Key, dst *[]keyspan.Key) error {
   209  	// TODO(jackson): Currently, Coalesce doesn't actually perform the sequence
   210  	// number promotion described in the comment above.
   211  	keysBySuffix := keysBySuffix{
   212  		cmp:  cmp,
   213  		keys: (*dst)[:0],
   214  	}
   215  	if err := coalesce(&keysBySuffix, keys, dst); err != nil {
   216  		return err
   217  	}
   218  	// coalesce left the keys in *dst sorted by suffix. Re-sort them by trailer.
   219  	keyspan.SortKeysByTrailer(dst)
   220  	return nil
   221  }
   222  
   223  func coalesce(keysBySuffix *keysBySuffix, keys []keyspan.Key, dst *[]keyspan.Key) error {
   224  	var deleted bool
   225  	for i := 0; i < len(keys) && !deleted; i++ {
   226  		k := keys[i]
   227  		if invariants.Enabled && i > 0 && k.Trailer > keys[i-1].Trailer {
   228  			panic("bitalostable: invariant violation: span keys unordered")
   229  		}
   230  
   231  		// NB: Within a given sequence number, keys are ordered as:
   232  		//   RangeKeySet > RangeKeyUnset > RangeKeyDelete
   233  		// This is significant, because this ensures that none of the range keys
   234  		// sharing a sequence number shadow each other.
   235  		switch k.Kind() {
   236  		case base.InternalKeyKindRangeKeySet:
   237  			n := len(keysBySuffix.keys)
   238  
   239  			if keysBySuffix.get(n, k.Suffix) < n {
   240  				// This suffix is already set or unset at a higher sequence
   241  				// number. Skip.
   242  				continue
   243  			}
   244  			keysBySuffix.keys = append(keysBySuffix.keys, k)
   245  			sort.Sort(keysBySuffix)
   246  		case base.InternalKeyKindRangeKeyUnset:
   247  			n := len(keysBySuffix.keys)
   248  
   249  			if keysBySuffix.get(n, k.Suffix) < n {
   250  				// This suffix is already set or unset at a higher sequence
   251  				// number. Skip.
   252  				continue
   253  			}
   254  			keysBySuffix.keys = append(keysBySuffix.keys, k)
   255  			sort.Sort(keysBySuffix)
   256  		case base.InternalKeyKindRangeKeyDelete:
   257  			// All remaining range keys in this span have been deleted by this
   258  			// RangeKeyDelete. There's no need to continue looping, because all
   259  			// the remaining keys are shadowed by this one. The for loop
   260  			// condition will terminate when it sees the last key is a
   261  			// range key deletion.
   262  			keysBySuffix.keys = append(keysBySuffix.keys, k)
   263  			deleted = true
   264  		default:
   265  			return base.CorruptionErrorf("bitalostable: unexpected range key kind %s", k.Kind())
   266  		}
   267  	}
   268  
   269  	// Update the span with the (potentially reduced) keys slice.
   270  	// NB: We don't re-sort by Trailer. The exported Coalesce function however
   271  	// will.
   272  	*dst = keysBySuffix.keys
   273  	return nil
   274  }
   275  
   276  type keysBySuffix struct {
   277  	cmp  base.Compare
   278  	keys []keyspan.Key
   279  }
   280  
   281  // get searches for suffix among the first n keys in keys. If the suffix is
   282  // found, it returns the index of the item with the suffix. If the suffix is not
   283  // found, it returns n.
   284  func (s *keysBySuffix) get(n int, suffix []byte) (i int) {
   285  	// Binary search for the suffix to see if there's an existing key with the
   286  	// suffix. Only binary search among the first n items. get is called while
   287  	// appending new keys with suffixes that may sort before existing keys.
   288  	// The n parameter indicates what portion of the keys slice is sorted and
   289  	// may contain relevant keys.
   290  
   291  	i = sort.Search(n, func(i int) bool {
   292  		return s.cmp(s.keys[i].Suffix, suffix) >= 0
   293  	})
   294  	if i < n && s.cmp(s.keys[i].Suffix, suffix) == 0 {
   295  		return i
   296  	}
   297  	return n
   298  }
   299  
   300  func (s *keysBySuffix) Len() int           { return len(s.keys) }
   301  func (s *keysBySuffix) Less(i, j int) bool { return s.cmp(s.keys[i].Suffix, s.keys[j].Suffix) < 0 }
   302  func (s *keysBySuffix) Swap(i, j int)      { s.keys[i], s.keys[j] = s.keys[j], s.keys[i] }