github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/merging_iter.go

github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/merging_iter.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"sort"
    11  
    12  	"github.com/zuoyebang/bitalostable/internal/base"
    13  	"github.com/zuoyebang/bitalostable/internal/invariants"
    14  	"github.com/zuoyebang/bitalostable/internal/manifest"
    15  )
    16  
    17  // TODO(jackson): Consider implementing an optimization to seek lower levels
    18  // past higher levels' RANGEKEYDELs. This would be analaogous to the
    19  // optimization bitalostable.mergingIter performs for RANGEDELs during point key
    20  // seeks. It may not be worth it, because range keys are rare and cascading
    21  // seeks would require introducing key comparisons to switchTo{Min,Max}Heap
    22  // where there currently are none.
    23  
    24  // Transformer defines a transformation to be applied to a Span.
    25  type Transformer interface {
    26  	// Transform takes a Span as input and writes the transformed Span to the
    27  	// provided output *Span pointer. The output Span's Keys slice may be reused
    28  	// by Transform to reduce allocations.
    29  	Transform(cmp base.Compare, in Span, out *Span) error
    30  }
    31  
    32  // The TransformerFunc type is an adapter to allow the use of ordinary functions
    33  // as Transformers. If f is a function with the appropriate signature,
    34  // TransformerFunc(f) is a Transformer that calls f.
    35  type TransformerFunc func(base.Compare, Span, *Span) error
    36  
    37  // Transform calls f(cmp, in, out).
    38  func (tf TransformerFunc) Transform(cmp base.Compare, in Span, out *Span) error {
    39  	return tf(cmp, in, out)
    40  }
    41  
    42  var noopTransform Transformer = TransformerFunc(func(_ base.Compare, s Span, dst *Span) error {
    43  	dst.Start, dst.End = s.Start, s.End
    44  	dst.Keys = append(dst.Keys[:0], s.Keys...)
    45  	return nil
    46  })
    47  
    48  // visibleTransform filters keys that are invisible at the provided snapshot
    49  // sequence number.
    50  func visibleTransform(snapshot uint64) Transformer {
    51  	return TransformerFunc(func(_ base.Compare, s Span, dst *Span) error {
    52  		dst.Start, dst.End = s.Start, s.End
    53  		dst.Keys = dst.Keys[:0]
    54  		for _, k := range s.Keys {
    55  			if base.Visible(k.SeqNum(), snapshot) {
    56  				dst.Keys = append(dst.Keys, k)
    57  			}
    58  		}
    59  		return nil
    60  	})
    61  }
    62  
    63  // MergingIter merges spans across levels of the LSM, exposing an iterator over
    64  // spans that yields sets of spans fragmented at unique user key boundaries.
    65  //
    66  // A MergingIter is initialized with an arbitrary number of child iterators over
    67  // fragmented spans. Each child iterator exposes fragmented key spans, such that
    68  // overlapping keys are surfaced in a single Span. Key spans from one child
    69  // iterator may overlap key spans from another child iterator arbitrarily.
    70  //
    71  // The spans combined by MergingIter will return spans with keys sorted by
    72  // trailer descending. If the MergingIter is configured with a Transformer, it's
    73  // permitted to modify the ordering of the spans' keys returned by MergingIter.
    74  //
    75  // # Algorithm
    76  //
    77  // The merging iterator wraps child iterators, merging and fragmenting spans
    78  // across levels. The high-level algorithm is:
    79  //
    80  //  1. Initialize the heap with bound keys from child iterators' spans.
    81  //  2. Find the next [or previous] two unique user keys' from bounds.
    82  //  3. Consider the span formed between the two unique user keys a candidate
    83  //     span.
    84  //  4. Determine if any of the child iterators' spans overlap the candidate
    85  //     span.
    86  //     4a. If any of the child iterator's current bounds are end keys
    87  //     (during forward iteration) or start keys (during reverse
    88  //     iteration), then all the spans with that bound overlap the
    89  //     candidate span.
    90  //     4b. Apply the configured transform, which may remove keys.
    91  //     4b. If no spans overlap, forget the smallest (forward iteration)
    92  //     or largest (reverse iteration) unique user key and advance
    93  //     the iterators to the next unique user key. Start again from 3.
    94  //
    95  // # Detailed algorithm
    96  //
    97  // Each level (i0, i1, ...) has a user-provided input FragmentIterator. The
    98  // merging iterator steps through individual boundaries of the underlying
    99  // spans separately. If the underlying FragmentIterator has fragments
   100  // [a,b){#2,#1} [b,c){#1} the mergingIterLevel.{next,prev} step through:
   101  //
   102  //	(a, start), (b, end), (b, start), (c, end)
   103  //
   104  // Note that (a, start) and (b, end) are observed ONCE each, despite two keys
   105  // sharing those bounds. Also note that (b, end) and (b, start) are two distinct
   106  // iterator positions of a mergingIterLevel.
   107  //
   108  // The merging iterator maintains a heap (min during forward iteration, max
   109  // during reverse iteration) containing the boundKeys. Each boundKey is a
   110  // 3-tuple holding the bound user key, whether the bound is a start or end key
   111  // and the set of keys from that level that have that bound. The heap orders
   112  // based on the boundKey's user key only.
   113  //
   114  // The merging iterator is responsible for merging spans across levels to
   115  // determine which span is next, but it's also responsible for fragmenting
   116  // overlapping spans. Consider the example:
   117  //
   118  //	       i0:     b---d e-----h
   119  //	       i1:   a---c         h-----k
   120  //	       i2:   a------------------------------p
   121  //
   122  //	fragments:   a-b-c-d-e-----h-----k----------p
   123  //
   124  // None of the individual child iterators contain a span with the exact bounds
   125  // [c,d), but the merging iterator must produce a span [c,d). To accomplish
   126  // this, the merging iterator visits every span between unique boundary user
   127  // keys. In the above example, this is:
   128  //
   129  //	[a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p)
   130  //
   131  // The merging iterator first initializes the heap to prepare for iteration.
   132  // The description below discusses the mechanics of forward iteration after a
   133  // call to First, but the mechanics are similar for reverse iteration and
   134  // other positioning methods.
   135  //
   136  // During a call to First, the heap is initialized by seeking every
   137  // mergingIterLevel to the first bound of the first fragment. In the above
   138  // example, this seeks the child iterators to:
   139  //
   140  //	i0: (b, boundKindFragmentStart, [ [b,d) ])
   141  //	i1: (a, boundKindFragmentStart, [ [a,c) ])
   142  //	i2: (a, boundKindFragmentStart, [ [a,p) ])
   143  //
   144  // After fixing up the heap, the root of the heap is a boundKey with the
   145  // smallest user key ('a' in the example). Once the heap is setup for iteration
   146  // in the appropriate direction and location, the merging iterator uses
   147  // find{Next,Prev}FragmentSet to find the next/previous span bounds.
   148  //
   149  // During forward iteration, the root of the heap's user key is the start key
   150  // key of next merged span. findNextFragmentSet sets m.start to this user
   151  // key. The heap may contain other boundKeys with the same user key if another
   152  // level has a fragment starting or ending at the same key, so the
   153  // findNextFragmentSet method pulls from the heap until it finds the first key
   154  // greater than m.start. This key is used as the end key.
   155  //
   156  // In the above example, this results in m.start = 'a', m.end = 'b' and child
   157  // iterators in the following positions:
   158  //
   159  //	i0: (b, boundKindFragmentStart, [ [b,d) ])
   160  //	i1: (c, boundKindFragmentEnd,   [ [a,c) ])
   161  //	i2: (p, boundKindFragmentEnd,   [ [a,p) ])
   162  //
   163  // With the user key bounds of the next merged span established,
   164  // findNextFragmentSet must determine which, if any, fragments overlap the span.
   165  // During forward iteration any child iterator that is now positioned at an end
   166  // boundary has an overlapping span. (Justification: The child iterator's end
   167  // boundary is ≥ m.end. The corresponding start boundary must be ≤ m.start since
   168  // there were no other user keys between m.start and m.end. So the fragments
   169  // associated with the iterator's current end boundary have start and end bounds
   170  // such that start ≤ m.start < m.end ≤ end).
   171  //
   172  // findNextFragmentSet iterates over the levels, collecting keys from any child
   173  // iterators positioned at end boundaries. In the above example, i1 and i2 are
   174  // positioned at end boundaries, so findNextFragmentSet collects the keys of
   175  // [a,c) and [a,p). These spans contain the merging iterator's [m.start, m.end)
   176  // span, but they may also extend beyond the m.start and m.end. The merging
   177  // iterator returns the keys with the merging iter's m.start and m.end bounds,
   178  // preserving the underlying keys' sequence numbers, key kinds and values.
   179  //
   180  // A MergingIter is configured with a Transform that's applied to the span
   181  // before surfacing it to the iterator user. A Transform may remove keys
   182  // arbitrarily, but it may not modify the values themselves.
   183  //
   184  // It may be the case that findNextFragmentSet finds no levels positioned at end
   185  // boundaries, or that there are no spans remaining after applying a transform,
   186  // in which case the span [m.start, m.end) overlaps with nothing. In this case
   187  // findNextFragmentSet loops, repeating the above process again until it finds a
   188  // span that does contain keys.
   189  //
   190  // # Memory safety
   191  //
   192  // The FragmentIterator interface only guarantees stability of a Span and its
   193  // associated slices until the next positioning method is called. Adjacent Spans
   194  // may be contained in different sstables, requring the FragmentIterator
   195  // implementation to close one sstable, releasing its memory, before opening the
   196  // next. Most of the state used by the MergingIter is derived from spans at
   197  // current child iterator positions only, ensuring state is stable. The one
   198  // exception is the start bound during forward iteration and the end bound
   199  // during reverse iteration.
   200  //
   201  // If the heap root originates from an end boundary when findNextFragmentSet
   202  // begins, a Next on the heap root level may invalidate the end boundary. To
   203  // accommodate this, find{Next,Prev}FragmentSet copy the initial boundary if the
   204  // subsequent Next/Prev would move to the next span.
   205  type MergingIter struct {
   206  	levels []mergingIterLevel
   207  	heap   mergingIterHeap
   208  	// start and end hold the bounds for the span currently under the
   209  	// iterator position.
   210  	//
   211  	// Invariant: None of the levels' iterators contain spans with a bound
   212  	// between start and end. For all bounds b, b ≤ start || b ≥ end.
   213  	start, end []byte
   214  	// buf is a buffer used to save [start, end) boundary keys.
   215  	buf []byte
   216  	// keys holds all of the keys across all levels that overlap the key span
   217  	// [start, end), sorted by Trailer descending. This slice is reconstituted
   218  	// in synthesizeKeys from each mergingIterLevel's keys every time the
   219  	// [start, end) bounds change.
   220  	//
   221  	// Each element points into a child iterator's memory, so the keys may not
   222  	// be directly modified.
   223  	keys keysBySeqNumKind
   224  	// transformer defines a transformation to be applied to a span before it's
   225  	// yielded to the user. Transforming may filter individual keys contained
   226  	// within the span.
   227  	transformer Transformer
   228  	// span holds the iterator's current span. This span is used as the
   229  	// destination for transforms. Every tranformed span overwrites the
   230  	// previous.
   231  	span Span
   232  
   233  	err error
   234  	dir int8
   235  
   236  	// alloc preallocates mergingIterLevel and mergingIterItems for use by the
   237  	// merging iterator. As long as the merging iterator is used with
   238  	// manifest.NumLevels+3 and fewer fragment iterators, the merging iterator
   239  	// will not need to allocate upon initialization. The value NumLevels+3
   240  	// mirrors the preallocated levels in iterAlloc used for point iterators.
   241  	// Invariant: cap(levels) == cap(items)
   242  	alloc struct {
   243  		levels [manifest.NumLevels + 3]mergingIterLevel
   244  		items  [manifest.NumLevels + 3]mergingIterItem
   245  	}
   246  }
   247  
   248  // MergingIter implements the FragmentIterator interface.
   249  var _ FragmentIterator = (*MergingIter)(nil)
   250  
   251  type mergingIterLevel struct {
   252  	iter FragmentIterator
   253  
   254  	// heapKey holds the current key at this level for use within the heap.
   255  	heapKey boundKey
   256  }
   257  
   258  func (l *mergingIterLevel) next() {
   259  	if l.heapKey.kind == boundKindFragmentStart {
   260  		l.heapKey = boundKey{
   261  			kind: boundKindFragmentEnd,
   262  			key:  l.heapKey.span.End,
   263  			span: l.heapKey.span,
   264  		}
   265  		return
   266  	}
   267  	if s := l.iter.Next(); s == nil {
   268  		l.heapKey = boundKey{kind: boundKindInvalid}
   269  	} else {
   270  		l.heapKey = boundKey{
   271  			kind: boundKindFragmentStart,
   272  			key:  s.Start,
   273  			span: s,
   274  		}
   275  	}
   276  }
   277  
   278  func (l *mergingIterLevel) prev() {
   279  	if l.heapKey.kind == boundKindFragmentEnd {
   280  		l.heapKey = boundKey{
   281  			kind: boundKindFragmentStart,
   282  			key:  l.heapKey.span.Start,
   283  			span: l.heapKey.span,
   284  		}
   285  		return
   286  	}
   287  	if s := l.iter.Prev(); s == nil {
   288  		l.heapKey = boundKey{kind: boundKindInvalid}
   289  	} else {
   290  		l.heapKey = boundKey{
   291  			kind: boundKindFragmentEnd,
   292  			key:  s.End,
   293  			span: s,
   294  		}
   295  	}
   296  }
   297  
   298  // Init initializes the merging iterator with the provided fragment iterators.
   299  func (m *MergingIter) Init(cmp base.Compare, transformer Transformer, iters ...FragmentIterator) {
   300  	levels, items := m.levels, m.heap.items
   301  
   302  	*m = MergingIter{
   303  		heap:        mergingIterHeap{cmp: cmp},
   304  		transformer: transformer,
   305  	}
   306  
   307  	// Invariant: cap(levels) >= cap(items)
   308  	// Invariant: cap(alloc.levels) == cap(alloc.items)
   309  	if len(iters) <= len(m.alloc.levels) {
   310  		// The slices allocated on the MergingIter struct are large enough.
   311  		m.levels = m.alloc.levels[:len(iters)]
   312  		m.heap.items = m.alloc.items[:0]
   313  	} else if len(iters) <= cap(levels) {
   314  		// The existing heap-allocated slices are large enough, so reuse them.
   315  		m.levels = levels[:len(iters)]
   316  		m.heap.items = items[:0]
   317  	} else {
   318  		// Heap allocate new slices.
   319  		m.levels = make([]mergingIterLevel, len(iters))
   320  		m.heap.items = make([]mergingIterItem, 0, len(iters))
   321  	}
   322  	for i := range m.levels {
   323  		m.levels[i] = mergingIterLevel{iter: iters[i]}
   324  	}
   325  }
   326  
   327  // AddLevel adds a new level to the bottom of the merging iterator. AddLevel
   328  // must be called after Init and before any other method.
   329  func (m *MergingIter) AddLevel(iter FragmentIterator) {
   330  	m.levels = append(m.levels, mergingIterLevel{iter: iter})
   331  }
   332  
   333  // SeekGE moves the iterator to the first span with a start key greater than or
   334  // equal to key.
   335  func (m *MergingIter) SeekGE(key []byte) *Span {
   336  	m.invalidate() // clear state about current position
   337  	for i := range m.levels {
   338  		l := &m.levels[i]
   339  
   340  		// A SeekGE requires we position each level at the smallest bound ≥ key.
   341  		// We must search through both inclusive start and exclusive end bounds.
   342  		// Note that this search requirement differs from FragmentIterator's
   343  		// .SeekGE'semantics, which returns the span with the smallest start key
   344  		// ≥ key. To remedy this difference, we find the last span less than
   345  		// key. If its end boundary is greater than or equal to key, we use it.
   346  		// Otherwise we use the start boundary of the next span which
   347  		// necessarily has a start ≥ key.
   348  		s := l.iter.SeekLT(key)
   349  		if s != nil && m.cmp(s.End, key) >= 0 {
   350  			// s.End ≥ key
   351  			// We need to use this span's end bound.
   352  			l.heapKey = boundKey{
   353  				kind: boundKindFragmentEnd,
   354  				key:  s.End,
   355  				span: s,
   356  			}
   357  			continue
   358  		}
   359  		// s.End < key
   360  		// The span `s` ends before key. Next to the first span with a Start ≥
   361  		// key, and use that.
   362  		if s = l.iter.Next(); s == nil {
   363  			l.heapKey = boundKey{kind: boundKindInvalid}
   364  		} else {
   365  			l.heapKey = boundKey{
   366  				kind: boundKindFragmentStart,
   367  				key:  s.Start,
   368  				span: s,
   369  			}
   370  		}
   371  	}
   372  	m.initMinHeap()
   373  	return m.findNextFragmentSet()
   374  }
   375  
   376  // SeekLT moves the iterator to the last span with a start key less than key.
   377  func (m *MergingIter) SeekLT(key []byte) *Span {
   378  	// TODO(jackson): Evaluate whether there's an implementation of SeekLT
   379  	// independent of SeekGE that is more efficient. It's tricky, because the
   380  	// span we should return might straddle `key` itself.
   381  	//
   382  	// Consider the scenario:
   383  	//       a----------l      #2
   384  	//         b-----------m   #1
   385  	//
   386  	// The merged, fully-fragmented spans that MergingIter exposes to the caller
   387  	// have bounds:
   388  	//        a-b              #2
   389  	//          b--------l     #2
   390  	//          b--------l     #1
   391  	//                   l-m   #1
   392  	//
   393  	// A call SeekLT(c) must return the largest of the above spans with a
   394  	// Start user key < key: [b,l)#1. This requires examining bounds both < 'c'
   395  	// (the 'b' of [b,m)#1's start key) and bounds ≥ 'c' (the 'l' of ([a,l)#2's
   396  	// end key).
   397  	if s := m.SeekGE(key); s == nil && m.err != nil {
   398  		return nil
   399  	}
   400  	// Prev to the previous span.
   401  	return m.Prev()
   402  }
   403  
   404  // First seeks the iterator to the first span.
   405  func (m *MergingIter) First() *Span {
   406  	m.invalidate() // clear state about current position
   407  	for i := range m.levels {
   408  		if s := m.levels[i].iter.First(); s == nil {
   409  			m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
   410  		} else {
   411  			m.levels[i].heapKey = boundKey{
   412  				kind: boundKindFragmentStart,
   413  				key:  s.Start,
   414  				span: s,
   415  			}
   416  		}
   417  	}
   418  	m.initMinHeap()
   419  	return m.findNextFragmentSet()
   420  }
   421  
   422  // Last seeks the iterator to the last span.
   423  func (m *MergingIter) Last() *Span {
   424  	m.invalidate() // clear state about current position
   425  	for i := range m.levels {
   426  		if s := m.levels[i].iter.Last(); s == nil {
   427  			m.levels[i].heapKey = boundKey{kind: boundKindInvalid}
   428  		} else {
   429  			m.levels[i].heapKey = boundKey{
   430  				kind: boundKindFragmentEnd,
   431  				key:  s.End,
   432  				span: s,
   433  			}
   434  		}
   435  	}
   436  	m.initMaxHeap()
   437  	return m.findPrevFragmentSet()
   438  }
   439  
   440  // Next advances the iterator to the next span.
   441  func (m *MergingIter) Next() *Span {
   442  	if m.err != nil {
   443  		return nil
   444  	}
   445  	if m.dir == +1 && (m.end == nil || m.start == nil) {
   446  		return nil
   447  	}
   448  	if m.dir != +1 {
   449  		m.switchToMinHeap()
   450  	}
   451  	return m.findNextFragmentSet()
   452  }
   453  
   454  // Prev advances the iterator to the previous span.
   455  func (m *MergingIter) Prev() *Span {
   456  	if m.err != nil {
   457  		return nil
   458  	}
   459  	if m.dir == -1 && (m.end == nil || m.start == nil) {
   460  		return nil
   461  	}
   462  	if m.dir != -1 {
   463  		m.switchToMaxHeap()
   464  	}
   465  	return m.findPrevFragmentSet()
   466  }
   467  
   468  // Error returns any accumulated error.
   469  func (m *MergingIter) Error() error {
   470  	if m.heap.len() == 0 || m.err != nil {
   471  		return m.err
   472  	}
   473  	return m.levels[m.heap.items[0].index].iter.Error()
   474  }
   475  
   476  // Close closes the iterator, releasing all acquired resources.
   477  func (m *MergingIter) Close() error {
   478  	for i := range m.levels {
   479  		if err := m.levels[i].iter.Close(); err != nil && m.err == nil {
   480  			m.err = err
   481  		}
   482  	}
   483  	m.levels = nil
   484  	m.heap.items = m.heap.items[:0]
   485  	return m.err
   486  }
   487  
   488  // String implements fmt.Stringer.
   489  func (m *MergingIter) String() string {
   490  	return "merging-keyspan"
   491  }
   492  
   493  func (m *MergingIter) initMinHeap() {
   494  	m.dir = +1
   495  	m.heap.reverse = false
   496  	m.initHeap()
   497  }
   498  
   499  func (m *MergingIter) initMaxHeap() {
   500  	m.dir = -1
   501  	m.heap.reverse = true
   502  	m.initHeap()
   503  }
   504  
   505  func (m *MergingIter) initHeap() {
   506  	m.heap.items = m.heap.items[:0]
   507  	for i := range m.levels {
   508  		if l := &m.levels[i]; l.heapKey.kind != boundKindInvalid {
   509  			m.heap.items = append(m.heap.items, mergingIterItem{
   510  				index:    i,
   511  				boundKey: &l.heapKey,
   512  			})
   513  		} else {
   514  			m.err = firstError(m.err, l.iter.Error())
   515  			if m.err != nil {
   516  				return
   517  			}
   518  		}
   519  	}
   520  	m.heap.init()
   521  }
   522  
   523  func (m *MergingIter) switchToMinHeap() {
   524  	// switchToMinHeap reorients the heap for forward iteration, without moving
   525  	// the current MergingIter position.
   526  
   527  	// The iterator is currently positioned at the span [m.start, m.end),
   528  	// oriented in the reverse direction, so each level's iterator is positioned
   529  	// to the largest key ≤ m.start. To reorient in the forward direction, we
   530  	// must advance each level's iterator to the smallest key ≥ m.end. Consider
   531  	// this three-level example.
   532  	//
   533  	//         i0:     b---d e-----h
   534  	//         i1:   a---c         h-----k
   535  	//         i2:   a------------------------------p
   536  	//
   537  	//     merged:   a-b-c-d-e-----h-----k----------p
   538  	//
   539  	// If currently positioned at the merged span [c,d), then the level
   540  	// iterators' heap keys are:
   541  	//
   542  	//    i0: (b, [b, d))   i1: (c, [a,c))   i2: (a, [a,p))
   543  	//
   544  	// Reversing the heap should not move the merging iterator and should not
   545  	// change the current [m.start, m.end) bounds. It should only prepare for
   546  	// forward iteration by updating the child iterators' heap keys to:
   547  	//
   548  	//    i0: (d, [b, d))   i1: (h, [h,k))   i2: (p, [a,p))
   549  	//
   550  	// In every level the first key ≥ m.end is the next in the iterator.
   551  	// Justification: Suppose not and a level iterator's next key was some key k
   552  	// such that k < m.end. The max-heap invariant dictates that the current
   553  	// iterator position is the largest entry with a user key ≥ m.start. This
   554  	// means k > m.start. We started with the assumption that k < m.end, so
   555  	// m.start < k < m.end. But then k is between our current span bounds,
   556  	// and reverse iteration would have constructed the current interval to be
   557  	// [k, m.end) not [m.start, m.end).
   558  
   559  	if invariants.Enabled {
   560  		for i := range m.levels {
   561  			l := &m.levels[i]
   562  			if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.start) > 0 {
   563  				panic("bitalostable: invariant violation: max-heap key > m.start")
   564  			}
   565  		}
   566  	}
   567  
   568  	for i := range m.levels {
   569  		m.levels[i].next()
   570  	}
   571  	m.initMinHeap()
   572  }
   573  
   574  func (m *MergingIter) switchToMaxHeap() {
   575  	// switchToMaxHeap reorients the heap for reverse iteration, without moving
   576  	// the current MergingIter position.
   577  
   578  	// The iterator is currently positioned at the span [m.start, m.end),
   579  	// oriented in the forward direction. Each level's iterator is positioned at
   580  	// the smallest bound ≥ m.end. To reorient in the reverse direction, we must
   581  	// move each level's iterator to the largest key ≤ m.start. Consider this
   582  	// three-level example.
   583  	//
   584  	//         i0:     b---d e-----h
   585  	//         i1:   a---c         h-----k
   586  	//         i2:   a------------------------------p
   587  	//
   588  	//     merged:   a-b-c-d-e-----h-----k----------p
   589  	//
   590  	// If currently positioned at the merged span [c,d), then the level
   591  	// iterators' heap keys are:
   592  	//
   593  	//    i0: (d, [b, d))   i1: (h, [h,k))   i2: (p, [a,p))
   594  	//
   595  	// Reversing the heap should not move the merging iterator and should not
   596  	// change the current [m.start, m.end) bounds. It should only prepare for
   597  	// reverse iteration by updating the child iterators' heap keys to:
   598  	//
   599  	//    i0: (b, [b, d))   i1: (c, [a,c))   i2: (a, [a,p))
   600  	//
   601  	// In every level the largest key ≤ m.start is the prev in the iterator.
   602  	// Justification: Suppose not and a level iterator's prev key was some key k
   603  	// such that k > m.start. The min-heap invariant dictates that the current
   604  	// iterator position is the smallest entry with a user key ≥ m.end. This
   605  	// means k < m.end, otherwise the iterator would be positioned at k. We
   606  	// started with the assumption that k > m.start, so m.start < k < m.end. But
   607  	// then k is between our current span bounds, and reverse iteration
   608  	// would have constructed the current interval to be [m.start, k) not
   609  	// [m.start, m.end).
   610  
   611  	if invariants.Enabled {
   612  		for i := range m.levels {
   613  			l := &m.levels[i]
   614  			if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.end) < 0 {
   615  				panic("bitalostable: invariant violation: min-heap key < m.end")
   616  			}
   617  		}
   618  	}
   619  
   620  	for i := range m.levels {
   621  		m.levels[i].prev()
   622  	}
   623  	m.initMaxHeap()
   624  }
   625  
   626  func (m *MergingIter) cmp(a, b []byte) int {
   627  	return m.heap.cmp(a, b)
   628  }
   629  
   630  func (m *MergingIter) findNextFragmentSet() *Span {
   631  	// Each iteration of this loop considers a new merged span between unique
   632  	// user keys. An iteration may find that there exists no overlap for a given
   633  	// span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
   634  	// below loop will still consider [b,d) before continuing to [d, e)). It
   635  	// returns when it finds a span that is covered by at least one key.
   636  
   637  	for m.heap.len() > 0 && m.err == nil {
   638  		// Initialize the next span's start bound. SeekGE and First prepare the
   639  		// heap without advancing. Next leaves the heap in a state such that the
   640  		// root is the smallest bound key equal to the returned span's end key,
   641  		// so the heap is already positioned at the next merged span's start key.
   642  
   643  		// NB: m.heapRoot() might be either an end boundary OR a start boundary
   644  		// of a level's span. Both end and start boundaries may still be a start
   645  		// key of a span in the set of fragmented spans returned by MergingIter.
   646  		// Consider the scenario:
   647  		//       a----------l      #1
   648  		//         b-----------m   #2
   649  		//
   650  		// The merged, fully-fragmented spans that MergingIter exposes to the caller
   651  		// have bounds:
   652  		//        a-b              #1
   653  		//          b--------l     #1
   654  		//          b--------l     #2
   655  		//                   l-m   #2
   656  		//
   657  		// When advancing to l-m#2, we must set m.start to 'l', which originated
   658  		// from [a,l)#1's end boundary.
   659  		m.start = m.heap.items[0].boundKey.key
   660  
   661  		// Before calling nextEntry, consider whether it might invalidate our
   662  		// start boundary. If the start boundary key originated from an end
   663  		// boundary, then we need to copy the start key before advancing the
   664  		// underlying iterator to the next Span.
   665  		if m.heap.items[0].boundKey.kind == boundKindFragmentEnd {
   666  			m.buf = append(m.buf[:0], m.start...)
   667  			m.start = m.buf
   668  		}
   669  
   670  		// There may be many entries all with the same user key. Spans in other
   671  		// levels may also start or end at this same user key. For eg:
   672  		// L1:   [a, c) [c, d)
   673  		// L2:          [c, e)
   674  		// If we're positioned at L1's end(c) end boundary, we want to advance
   675  		// to the first bound > c.
   676  		m.nextEntry()
   677  		for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.start) == 0 {
   678  			m.nextEntry()
   679  		}
   680  		if len(m.heap.items) == 0 || m.err != nil {
   681  			break
   682  		}
   683  
   684  		// The current entry at the top of the heap is the first key > m.start.
   685  		// It must become the end bound for the span we will return to the user.
   686  		// In the above example, the root of the heap is L1's end(d).
   687  		m.end = m.heap.items[0].boundKey.key
   688  
   689  		// Each level within m.levels may have a span that overlaps the
   690  		// fragmented key span [m.start, m.end). Update m.keys to point to them
   691  		// and sort them by kind, sequence number. There may not be any keys
   692  		// defined over [m.start, m.end) if we're between the end of one span
   693  		// and the start of the next, OR if the configured transform filters any
   694  		// keys out. We allow empty spans that were emitted by child iterators, but
   695  		// we elide empty spans created by the mergingIter itself that don't overlap
   696  		// with any child iterator returned spans (i.e. empty spans that bridge two
   697  		// distinct child-iterator-defined spans).
   698  		if found, s := m.synthesizeKeys(+1); found && s != nil {
   699  			return s
   700  		}
   701  	}
   702  	// Exhausted.
   703  	m.clear()
   704  	return nil
   705  }
   706  
   707  func (m *MergingIter) findPrevFragmentSet() *Span {
   708  	// Each iteration of this loop considers a new merged span between unique
   709  	// user keys. An iteration may find that there exists no overlap for a given
   710  	// span, (eg, if the spans [a,b), [d, e) exist within level iterators, the
   711  	// below loop will still consider [b,d) before continuing to [a, b)). It
   712  	// returns when it finds a span that is covered by at least one key.
   713  
   714  	for m.heap.len() > 0 && m.err == nil {
   715  		// Initialize the next span's end bound. SeekLT and Last prepare the
   716  		// heap without advancing. Prev leaves the heap in a state such that the
   717  		// root is the largest bound key equal to the returned span's start key,
   718  		// so the heap is already positioned at the next merged span's end key.
   719  
   720  		// NB: m.heapRoot() might be either an end boundary OR a start boundary
   721  		// of a level's span. Both end and start boundaries may still be a start
   722  		// key of a span returned by MergingIter. Consider the scenario:
   723  		//       a----------l      #2
   724  		//         b-----------m   #1
   725  		//
   726  		// The merged, fully-fragmented spans that MergingIter exposes to the caller
   727  		// have bounds:
   728  		//        a-b              #2
   729  		//          b--------l     #2
   730  		//          b--------l     #1
   731  		//                   l-m   #1
   732  		//
   733  		// When Preving to a-b#2, we must set m.end to 'b', which originated
   734  		// from [b,m)#1's start boundary.
   735  		m.end = m.heap.items[0].boundKey.key
   736  
   737  		// Before calling prevEntry, consider whether it might invalidate our
   738  		// end boundary. If the end boundary key originated from a start
   739  		// boundary, then we need to copy the end key before advancing the
   740  		// underlying iterator to the previous Span.
   741  		if m.heap.items[0].boundKey.kind == boundKindFragmentStart {
   742  			m.buf = append(m.buf[:0], m.end...)
   743  			m.end = m.buf
   744  		}
   745  
   746  		// There may be many entries all with the same user key. Spans in other
   747  		// levels may also start or end at this same user key. For eg:
   748  		// L1:   [a, c) [c, d)
   749  		// L2:          [c, e)
   750  		// If we're positioned at L1's start(c) start boundary, we want to prev
   751  		// to move to the first bound < c.
   752  		m.prevEntry()
   753  		for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.end) == 0 {
   754  			m.prevEntry()
   755  		}
   756  		if len(m.heap.items) == 0 || m.err != nil {
   757  			break
   758  		}
   759  
   760  		// The current entry at the top of the heap is the first key < m.end.
   761  		// It must become the start bound for the span we will return to the
   762  		// user. In the above example, the root of the heap is L1's start(a).
   763  		m.start = m.heap.items[0].boundKey.key
   764  
   765  		// Each level within m.levels may have a set of keys that overlap the
   766  		// fragmented key span [m.start, m.end). Update m.keys to point to them
   767  		// and sort them by kind, sequence number. There may not be any keys
   768  		// spanning [m.start, m.end) if we're between the end of one span and
   769  		// the start of the next, OR if the configured transform filters any
   770  		// keys out.  We allow empty spans that were emitted by child iterators, but
   771  		// we elide empty spans created by the mergingIter itself that don't overlap
   772  		// with any child iterator returned spans (i.e. empty spans that bridge two
   773  		// distinct child-iterator-defined spans).
   774  		if found, s := m.synthesizeKeys(-1); found && s != nil {
   775  			return s
   776  		}
   777  	}
   778  	// Exhausted.
   779  	m.clear()
   780  	return nil
   781  }
   782  
   783  func (m *MergingIter) heapRoot() []byte {
   784  	return m.heap.items[0].boundKey.key
   785  }
   786  
   787  // synthesizeKeys is called by find{Next,Prev}FragmentSet to populate and
   788  // sort the set of keys overlapping [m.start, m.end).
   789  //
   790  // During forward iteration, if the current heap item is a fragment end,
   791  // then the fragment's start must be ≤ m.start and the fragment overlaps the
   792  // current iterator position of [m.start, m.end).
   793  //
   794  // During reverse iteration, if the current heap item is a fragment start,
   795  // then the fragment's end must be ≥ m.end and the fragment overlaps the
   796  // current iteration position of [m.start, m.end).
   797  //
   798  // The boolean return value, `found`, is true if the returned span overlaps
   799  // with a span returned by a child iterator.
   800  func (m *MergingIter) synthesizeKeys(dir int8) (bool, *Span) {
   801  	if invariants.Enabled {
   802  		if m.cmp(m.start, m.end) >= 0 {
   803  			panic(fmt.Sprintf("bitalostable: invariant violation: span start ≥ end: %s >= %s", m.start, m.end))
   804  		}
   805  	}
   806  
   807  	m.keys = m.keys[:0]
   808  	found := false
   809  	for i := range m.levels {
   810  		if dir == +1 && m.levels[i].heapKey.kind == boundKindFragmentEnd ||
   811  			dir == -1 && m.levels[i].heapKey.kind == boundKindFragmentStart {
   812  			m.keys = append(m.keys, m.levels[i].heapKey.span.Keys...)
   813  			found = true
   814  		}
   815  	}
   816  	// TODO(jackson): We should be able to remove this sort and instead
   817  	// guarantee that we'll return keys in the order of the levels they're from.
   818  	// With careful iterator construction, this would  guarantee that they're
   819  	// sorted by trailer descending for the range key iteration use case.
   820  	sort.Sort(&m.keys)
   821  
   822  	// Apply the configured transform. See visibleTransform.
   823  	s := Span{
   824  		Start:     m.start,
   825  		End:       m.end,
   826  		Keys:      m.keys,
   827  		KeysOrder: ByTrailerDesc,
   828  	}
   829  	if err := m.transformer.Transform(m.cmp, s, &m.span); err != nil {
   830  		m.err = err
   831  		return false, nil
   832  	}
   833  	return found, &m.span
   834  }
   835  
   836  func (m *MergingIter) invalidate() {
   837  	m.err = nil
   838  }
   839  
   840  func (m *MergingIter) clear() {
   841  	for fi := range m.keys {
   842  		m.keys[fi] = Key{}
   843  	}
   844  	m.keys = m.keys[:0]
   845  }
   846  
   847  // nextEntry steps to the next entry.
   848  func (m *MergingIter) nextEntry() {
   849  	l := &m.levels[m.heap.items[0].index]
   850  	l.next()
   851  	if !l.heapKey.valid() {
   852  		// l.iter is exhausted.
   853  		m.err = l.iter.Error()
   854  		if m.err == nil {
   855  			m.heap.pop()
   856  		}
   857  		return
   858  	}
   859  
   860  	if m.heap.len() > 1 {
   861  		m.heap.fix(0)
   862  	}
   863  }
   864  
   865  // prevEntry steps to the previous entry.
   866  func (m *MergingIter) prevEntry() {
   867  	l := &m.levels[m.heap.items[0].index]
   868  	l.prev()
   869  	if !l.heapKey.valid() {
   870  		// l.iter is exhausted.
   871  		m.err = l.iter.Error()
   872  		if m.err == nil {
   873  			m.heap.pop()
   874  		}
   875  		return
   876  	}
   877  
   878  	if m.heap.len() > 1 {
   879  		m.heap.fix(0)
   880  	}
   881  }
   882  
   883  // DebugString returns a string representing the current internal state of the
   884  // merging iterator and its heap for debugging purposes.
   885  func (m *MergingIter) DebugString() string {
   886  	var buf bytes.Buffer
   887  	fmt.Fprintf(&buf, "Current bounds: [%q, %q)\n", m.start, m.end)
   888  	for i := range m.levels {
   889  		fmt.Fprintf(&buf, "%d: heap key %s\n", i, m.levels[i].heapKey)
   890  	}
   891  	return buf.String()
   892  }
   893  
   894  type mergingIterItem struct {
   895  	// boundKey points to the corresponding mergingIterLevel's `iterKey`.
   896  	*boundKey
   897  	// index is the index of this level within the MergingIter's levels field.
   898  	index int
   899  }
   900  
   901  // mergingIterHeap is copied from mergingIterHeap defined in the root bitalostable
   902  // package for use with point keys.
   903  
   904  type mergingIterHeap struct {
   905  	cmp     base.Compare
   906  	reverse bool
   907  	items   []mergingIterItem
   908  }
   909  
   910  func (h *mergingIterHeap) len() int {
   911  	return len(h.items)
   912  }
   913  
   914  func (h *mergingIterHeap) less(i, j int) bool {
   915  	// This key comparison only uses the user key and not the boundKind. Bound
   916  	// kind doesn't matter because when stepping over a user key,
   917  	// findNextFragmentSet and findPrevFragmentSet skip past all heap items with
   918  	// that user key, and makes no assumptions on ordering. All other heap
   919  	// examinations only consider the user key.
   920  	ik, jk := h.items[i].key, h.items[j].key
   921  	c := h.cmp(ik, jk)
   922  	if h.reverse {
   923  		return c > 0
   924  	}
   925  	return c < 0
   926  }
   927  
   928  func (h *mergingIterHeap) swap(i, j int) {
   929  	h.items[i], h.items[j] = h.items[j], h.items[i]
   930  }
   931  
   932  // init, fix, up and down are copied from the go stdlib.
   933  func (h *mergingIterHeap) init() {
   934  	// heapify
   935  	n := h.len()
   936  	for i := n/2 - 1; i >= 0; i-- {
   937  		h.down(i, n)
   938  	}
   939  }
   940  
   941  func (h *mergingIterHeap) fix(i int) {
   942  	if !h.down(i, h.len()) {
   943  		h.up(i)
   944  	}
   945  }
   946  
   947  func (h *mergingIterHeap) pop() *mergingIterItem {
   948  	n := h.len() - 1
   949  	h.swap(0, n)
   950  	h.down(0, n)
   951  	item := &h.items[n]
   952  	h.items = h.items[:n]
   953  	return item
   954  }
   955  
   956  func (h *mergingIterHeap) up(j int) {
   957  	for {
   958  		i := (j - 1) / 2 // parent
   959  		if i == j || !h.less(j, i) {
   960  			break
   961  		}
   962  		h.swap(i, j)
   963  		j = i
   964  	}
   965  }
   966  
   967  func (h *mergingIterHeap) down(i0, n int) bool {
   968  	i := i0
   969  	for {
   970  		j1 := 2*i + 1
   971  		if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
   972  			break
   973  		}
   974  		j := j1 // left child
   975  		if j2 := j1 + 1; j2 < n && h.less(j2, j1) {
   976  			j = j2 // = 2*i + 2  // right child
   977  		}
   978  		if !h.less(j, i) {
   979  			break
   980  		}
   981  		h.swap(i, j)
   982  		i = j
   983  	}
   984  	return i > i0
   985  }
   986  
   987  type boundKind int8
   988  
   989  const (
   990  	boundKindInvalid boundKind = iota
   991  	boundKindFragmentStart
   992  	boundKindFragmentEnd
   993  )
   994  
   995  type boundKey struct {
   996  	kind boundKind
   997  	key  []byte
   998  	// span holds the span the bound key comes from.
   999  	//
  1000  	// If kind is boundKindFragmentStart, then key is span.Start. If kind is
  1001  	// boundKindFragmentEnd, then key is span.End.
  1002  	span *Span
  1003  }
  1004  
  1005  func (k boundKey) valid() bool {
  1006  	return k.kind != boundKindInvalid
  1007  }
  1008  
  1009  func (k boundKey) String() string {
  1010  	var buf bytes.Buffer
  1011  	switch k.kind {
  1012  	case boundKindInvalid:
  1013  		fmt.Fprint(&buf, "invalid")
  1014  	case boundKindFragmentStart:
  1015  		fmt.Fprint(&buf, "fragment-start")
  1016  	case boundKindFragmentEnd:
  1017  		fmt.Fprint(&buf, "fragment-end  ")
  1018  	default:
  1019  		fmt.Fprintf(&buf, "unknown-kind(%d)", k.kind)
  1020  	}
  1021  	fmt.Fprintf(&buf, " %s [", k.key)
  1022  	fmt.Fprintf(&buf, "%s", k.span)
  1023  	fmt.Fprint(&buf, "]")
  1024  	return buf.String()
  1025  }