github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/merging_iter.go (about) 1 // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package keyspan 6 7 import ( 8 "bytes" 9 "fmt" 10 "sort" 11 12 "github.com/zuoyebang/bitalostable/internal/base" 13 "github.com/zuoyebang/bitalostable/internal/invariants" 14 "github.com/zuoyebang/bitalostable/internal/manifest" 15 ) 16 17 // TODO(jackson): Consider implementing an optimization to seek lower levels 18 // past higher levels' RANGEKEYDELs. This would be analaogous to the 19 // optimization bitalostable.mergingIter performs for RANGEDELs during point key 20 // seeks. It may not be worth it, because range keys are rare and cascading 21 // seeks would require introducing key comparisons to switchTo{Min,Max}Heap 22 // where there currently are none. 23 24 // Transformer defines a transformation to be applied to a Span. 25 type Transformer interface { 26 // Transform takes a Span as input and writes the transformed Span to the 27 // provided output *Span pointer. The output Span's Keys slice may be reused 28 // by Transform to reduce allocations. 29 Transform(cmp base.Compare, in Span, out *Span) error 30 } 31 32 // The TransformerFunc type is an adapter to allow the use of ordinary functions 33 // as Transformers. If f is a function with the appropriate signature, 34 // TransformerFunc(f) is a Transformer that calls f. 35 type TransformerFunc func(base.Compare, Span, *Span) error 36 37 // Transform calls f(cmp, in, out). 38 func (tf TransformerFunc) Transform(cmp base.Compare, in Span, out *Span) error { 39 return tf(cmp, in, out) 40 } 41 42 var noopTransform Transformer = TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { 43 dst.Start, dst.End = s.Start, s.End 44 dst.Keys = append(dst.Keys[:0], s.Keys...) 45 return nil 46 }) 47 48 // visibleTransform filters keys that are invisible at the provided snapshot 49 // sequence number. 50 func visibleTransform(snapshot uint64) Transformer { 51 return TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { 52 dst.Start, dst.End = s.Start, s.End 53 dst.Keys = dst.Keys[:0] 54 for _, k := range s.Keys { 55 if base.Visible(k.SeqNum(), snapshot) { 56 dst.Keys = append(dst.Keys, k) 57 } 58 } 59 return nil 60 }) 61 } 62 63 // MergingIter merges spans across levels of the LSM, exposing an iterator over 64 // spans that yields sets of spans fragmented at unique user key boundaries. 65 // 66 // A MergingIter is initialized with an arbitrary number of child iterators over 67 // fragmented spans. Each child iterator exposes fragmented key spans, such that 68 // overlapping keys are surfaced in a single Span. Key spans from one child 69 // iterator may overlap key spans from another child iterator arbitrarily. 70 // 71 // The spans combined by MergingIter will return spans with keys sorted by 72 // trailer descending. If the MergingIter is configured with a Transformer, it's 73 // permitted to modify the ordering of the spans' keys returned by MergingIter. 74 // 75 // # Algorithm 76 // 77 // The merging iterator wraps child iterators, merging and fragmenting spans 78 // across levels. The high-level algorithm is: 79 // 80 // 1. Initialize the heap with bound keys from child iterators' spans. 81 // 2. Find the next [or previous] two unique user keys' from bounds. 82 // 3. Consider the span formed between the two unique user keys a candidate 83 // span. 84 // 4. Determine if any of the child iterators' spans overlap the candidate 85 // span. 86 // 4a. If any of the child iterator's current bounds are end keys 87 // (during forward iteration) or start keys (during reverse 88 // iteration), then all the spans with that bound overlap the 89 // candidate span. 90 // 4b. Apply the configured transform, which may remove keys. 91 // 4b. If no spans overlap, forget the smallest (forward iteration) 92 // or largest (reverse iteration) unique user key and advance 93 // the iterators to the next unique user key. Start again from 3. 94 // 95 // # Detailed algorithm 96 // 97 // Each level (i0, i1, ...) has a user-provided input FragmentIterator. The 98 // merging iterator steps through individual boundaries of the underlying 99 // spans separately. If the underlying FragmentIterator has fragments 100 // [a,b){#2,#1} [b,c){#1} the mergingIterLevel.{next,prev} step through: 101 // 102 // (a, start), (b, end), (b, start), (c, end) 103 // 104 // Note that (a, start) and (b, end) are observed ONCE each, despite two keys 105 // sharing those bounds. Also note that (b, end) and (b, start) are two distinct 106 // iterator positions of a mergingIterLevel. 107 // 108 // The merging iterator maintains a heap (min during forward iteration, max 109 // during reverse iteration) containing the boundKeys. Each boundKey is a 110 // 3-tuple holding the bound user key, whether the bound is a start or end key 111 // and the set of keys from that level that have that bound. The heap orders 112 // based on the boundKey's user key only. 113 // 114 // The merging iterator is responsible for merging spans across levels to 115 // determine which span is next, but it's also responsible for fragmenting 116 // overlapping spans. Consider the example: 117 // 118 // i0: b---d e-----h 119 // i1: a---c h-----k 120 // i2: a------------------------------p 121 // 122 // fragments: a-b-c-d-e-----h-----k----------p 123 // 124 // None of the individual child iterators contain a span with the exact bounds 125 // [c,d), but the merging iterator must produce a span [c,d). To accomplish 126 // this, the merging iterator visits every span between unique boundary user 127 // keys. In the above example, this is: 128 // 129 // [a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p) 130 // 131 // The merging iterator first initializes the heap to prepare for iteration. 132 // The description below discusses the mechanics of forward iteration after a 133 // call to First, but the mechanics are similar for reverse iteration and 134 // other positioning methods. 135 // 136 // During a call to First, the heap is initialized by seeking every 137 // mergingIterLevel to the first bound of the first fragment. In the above 138 // example, this seeks the child iterators to: 139 // 140 // i0: (b, boundKindFragmentStart, [ [b,d) ]) 141 // i1: (a, boundKindFragmentStart, [ [a,c) ]) 142 // i2: (a, boundKindFragmentStart, [ [a,p) ]) 143 // 144 // After fixing up the heap, the root of the heap is a boundKey with the 145 // smallest user key ('a' in the example). Once the heap is setup for iteration 146 // in the appropriate direction and location, the merging iterator uses 147 // find{Next,Prev}FragmentSet to find the next/previous span bounds. 148 // 149 // During forward iteration, the root of the heap's user key is the start key 150 // key of next merged span. findNextFragmentSet sets m.start to this user 151 // key. The heap may contain other boundKeys with the same user key if another 152 // level has a fragment starting or ending at the same key, so the 153 // findNextFragmentSet method pulls from the heap until it finds the first key 154 // greater than m.start. This key is used as the end key. 155 // 156 // In the above example, this results in m.start = 'a', m.end = 'b' and child 157 // iterators in the following positions: 158 // 159 // i0: (b, boundKindFragmentStart, [ [b,d) ]) 160 // i1: (c, boundKindFragmentEnd, [ [a,c) ]) 161 // i2: (p, boundKindFragmentEnd, [ [a,p) ]) 162 // 163 // With the user key bounds of the next merged span established, 164 // findNextFragmentSet must determine which, if any, fragments overlap the span. 165 // During forward iteration any child iterator that is now positioned at an end 166 // boundary has an overlapping span. (Justification: The child iterator's end 167 // boundary is ≥ m.end. The corresponding start boundary must be ≤ m.start since 168 // there were no other user keys between m.start and m.end. So the fragments 169 // associated with the iterator's current end boundary have start and end bounds 170 // such that start ≤ m.start < m.end ≤ end). 171 // 172 // findNextFragmentSet iterates over the levels, collecting keys from any child 173 // iterators positioned at end boundaries. In the above example, i1 and i2 are 174 // positioned at end boundaries, so findNextFragmentSet collects the keys of 175 // [a,c) and [a,p). These spans contain the merging iterator's [m.start, m.end) 176 // span, but they may also extend beyond the m.start and m.end. The merging 177 // iterator returns the keys with the merging iter's m.start and m.end bounds, 178 // preserving the underlying keys' sequence numbers, key kinds and values. 179 // 180 // A MergingIter is configured with a Transform that's applied to the span 181 // before surfacing it to the iterator user. A Transform may remove keys 182 // arbitrarily, but it may not modify the values themselves. 183 // 184 // It may be the case that findNextFragmentSet finds no levels positioned at end 185 // boundaries, or that there are no spans remaining after applying a transform, 186 // in which case the span [m.start, m.end) overlaps with nothing. In this case 187 // findNextFragmentSet loops, repeating the above process again until it finds a 188 // span that does contain keys. 189 // 190 // # Memory safety 191 // 192 // The FragmentIterator interface only guarantees stability of a Span and its 193 // associated slices until the next positioning method is called. Adjacent Spans 194 // may be contained in different sstables, requring the FragmentIterator 195 // implementation to close one sstable, releasing its memory, before opening the 196 // next. Most of the state used by the MergingIter is derived from spans at 197 // current child iterator positions only, ensuring state is stable. The one 198 // exception is the start bound during forward iteration and the end bound 199 // during reverse iteration. 200 // 201 // If the heap root originates from an end boundary when findNextFragmentSet 202 // begins, a Next on the heap root level may invalidate the end boundary. To 203 // accommodate this, find{Next,Prev}FragmentSet copy the initial boundary if the 204 // subsequent Next/Prev would move to the next span. 205 type MergingIter struct { 206 levels []mergingIterLevel 207 heap mergingIterHeap 208 // start and end hold the bounds for the span currently under the 209 // iterator position. 210 // 211 // Invariant: None of the levels' iterators contain spans with a bound 212 // between start and end. For all bounds b, b ≤ start || b ≥ end. 213 start, end []byte 214 // buf is a buffer used to save [start, end) boundary keys. 215 buf []byte 216 // keys holds all of the keys across all levels that overlap the key span 217 // [start, end), sorted by Trailer descending. This slice is reconstituted 218 // in synthesizeKeys from each mergingIterLevel's keys every time the 219 // [start, end) bounds change. 220 // 221 // Each element points into a child iterator's memory, so the keys may not 222 // be directly modified. 223 keys keysBySeqNumKind 224 // transformer defines a transformation to be applied to a span before it's 225 // yielded to the user. Transforming may filter individual keys contained 226 // within the span. 227 transformer Transformer 228 // span holds the iterator's current span. This span is used as the 229 // destination for transforms. Every tranformed span overwrites the 230 // previous. 231 span Span 232 233 err error 234 dir int8 235 236 // alloc preallocates mergingIterLevel and mergingIterItems for use by the 237 // merging iterator. As long as the merging iterator is used with 238 // manifest.NumLevels+3 and fewer fragment iterators, the merging iterator 239 // will not need to allocate upon initialization. The value NumLevels+3 240 // mirrors the preallocated levels in iterAlloc used for point iterators. 241 // Invariant: cap(levels) == cap(items) 242 alloc struct { 243 levels [manifest.NumLevels + 3]mergingIterLevel 244 items [manifest.NumLevels + 3]mergingIterItem 245 } 246 } 247 248 // MergingIter implements the FragmentIterator interface. 249 var _ FragmentIterator = (*MergingIter)(nil) 250 251 type mergingIterLevel struct { 252 iter FragmentIterator 253 254 // heapKey holds the current key at this level for use within the heap. 255 heapKey boundKey 256 } 257 258 func (l *mergingIterLevel) next() { 259 if l.heapKey.kind == boundKindFragmentStart { 260 l.heapKey = boundKey{ 261 kind: boundKindFragmentEnd, 262 key: l.heapKey.span.End, 263 span: l.heapKey.span, 264 } 265 return 266 } 267 if s := l.iter.Next(); s == nil { 268 l.heapKey = boundKey{kind: boundKindInvalid} 269 } else { 270 l.heapKey = boundKey{ 271 kind: boundKindFragmentStart, 272 key: s.Start, 273 span: s, 274 } 275 } 276 } 277 278 func (l *mergingIterLevel) prev() { 279 if l.heapKey.kind == boundKindFragmentEnd { 280 l.heapKey = boundKey{ 281 kind: boundKindFragmentStart, 282 key: l.heapKey.span.Start, 283 span: l.heapKey.span, 284 } 285 return 286 } 287 if s := l.iter.Prev(); s == nil { 288 l.heapKey = boundKey{kind: boundKindInvalid} 289 } else { 290 l.heapKey = boundKey{ 291 kind: boundKindFragmentEnd, 292 key: s.End, 293 span: s, 294 } 295 } 296 } 297 298 // Init initializes the merging iterator with the provided fragment iterators. 299 func (m *MergingIter) Init(cmp base.Compare, transformer Transformer, iters ...FragmentIterator) { 300 levels, items := m.levels, m.heap.items 301 302 *m = MergingIter{ 303 heap: mergingIterHeap{cmp: cmp}, 304 transformer: transformer, 305 } 306 307 // Invariant: cap(levels) >= cap(items) 308 // Invariant: cap(alloc.levels) == cap(alloc.items) 309 if len(iters) <= len(m.alloc.levels) { 310 // The slices allocated on the MergingIter struct are large enough. 311 m.levels = m.alloc.levels[:len(iters)] 312 m.heap.items = m.alloc.items[:0] 313 } else if len(iters) <= cap(levels) { 314 // The existing heap-allocated slices are large enough, so reuse them. 315 m.levels = levels[:len(iters)] 316 m.heap.items = items[:0] 317 } else { 318 // Heap allocate new slices. 319 m.levels = make([]mergingIterLevel, len(iters)) 320 m.heap.items = make([]mergingIterItem, 0, len(iters)) 321 } 322 for i := range m.levels { 323 m.levels[i] = mergingIterLevel{iter: iters[i]} 324 } 325 } 326 327 // AddLevel adds a new level to the bottom of the merging iterator. AddLevel 328 // must be called after Init and before any other method. 329 func (m *MergingIter) AddLevel(iter FragmentIterator) { 330 m.levels = append(m.levels, mergingIterLevel{iter: iter}) 331 } 332 333 // SeekGE moves the iterator to the first span with a start key greater than or 334 // equal to key. 335 func (m *MergingIter) SeekGE(key []byte) *Span { 336 m.invalidate() // clear state about current position 337 for i := range m.levels { 338 l := &m.levels[i] 339 340 // A SeekGE requires we position each level at the smallest bound ≥ key. 341 // We must search through both inclusive start and exclusive end bounds. 342 // Note that this search requirement differs from FragmentIterator's 343 // .SeekGE'semantics, which returns the span with the smallest start key 344 // ≥ key. To remedy this difference, we find the last span less than 345 // key. If its end boundary is greater than or equal to key, we use it. 346 // Otherwise we use the start boundary of the next span which 347 // necessarily has a start ≥ key. 348 s := l.iter.SeekLT(key) 349 if s != nil && m.cmp(s.End, key) >= 0 { 350 // s.End ≥ key 351 // We need to use this span's end bound. 352 l.heapKey = boundKey{ 353 kind: boundKindFragmentEnd, 354 key: s.End, 355 span: s, 356 } 357 continue 358 } 359 // s.End < key 360 // The span `s` ends before key. Next to the first span with a Start ≥ 361 // key, and use that. 362 if s = l.iter.Next(); s == nil { 363 l.heapKey = boundKey{kind: boundKindInvalid} 364 } else { 365 l.heapKey = boundKey{ 366 kind: boundKindFragmentStart, 367 key: s.Start, 368 span: s, 369 } 370 } 371 } 372 m.initMinHeap() 373 return m.findNextFragmentSet() 374 } 375 376 // SeekLT moves the iterator to the last span with a start key less than key. 377 func (m *MergingIter) SeekLT(key []byte) *Span { 378 // TODO(jackson): Evaluate whether there's an implementation of SeekLT 379 // independent of SeekGE that is more efficient. It's tricky, because the 380 // span we should return might straddle `key` itself. 381 // 382 // Consider the scenario: 383 // a----------l #2 384 // b-----------m #1 385 // 386 // The merged, fully-fragmented spans that MergingIter exposes to the caller 387 // have bounds: 388 // a-b #2 389 // b--------l #2 390 // b--------l #1 391 // l-m #1 392 // 393 // A call SeekLT(c) must return the largest of the above spans with a 394 // Start user key < key: [b,l)#1. This requires examining bounds both < 'c' 395 // (the 'b' of [b,m)#1's start key) and bounds ≥ 'c' (the 'l' of ([a,l)#2's 396 // end key). 397 if s := m.SeekGE(key); s == nil && m.err != nil { 398 return nil 399 } 400 // Prev to the previous span. 401 return m.Prev() 402 } 403 404 // First seeks the iterator to the first span. 405 func (m *MergingIter) First() *Span { 406 m.invalidate() // clear state about current position 407 for i := range m.levels { 408 if s := m.levels[i].iter.First(); s == nil { 409 m.levels[i].heapKey = boundKey{kind: boundKindInvalid} 410 } else { 411 m.levels[i].heapKey = boundKey{ 412 kind: boundKindFragmentStart, 413 key: s.Start, 414 span: s, 415 } 416 } 417 } 418 m.initMinHeap() 419 return m.findNextFragmentSet() 420 } 421 422 // Last seeks the iterator to the last span. 423 func (m *MergingIter) Last() *Span { 424 m.invalidate() // clear state about current position 425 for i := range m.levels { 426 if s := m.levels[i].iter.Last(); s == nil { 427 m.levels[i].heapKey = boundKey{kind: boundKindInvalid} 428 } else { 429 m.levels[i].heapKey = boundKey{ 430 kind: boundKindFragmentEnd, 431 key: s.End, 432 span: s, 433 } 434 } 435 } 436 m.initMaxHeap() 437 return m.findPrevFragmentSet() 438 } 439 440 // Next advances the iterator to the next span. 441 func (m *MergingIter) Next() *Span { 442 if m.err != nil { 443 return nil 444 } 445 if m.dir == +1 && (m.end == nil || m.start == nil) { 446 return nil 447 } 448 if m.dir != +1 { 449 m.switchToMinHeap() 450 } 451 return m.findNextFragmentSet() 452 } 453 454 // Prev advances the iterator to the previous span. 455 func (m *MergingIter) Prev() *Span { 456 if m.err != nil { 457 return nil 458 } 459 if m.dir == -1 && (m.end == nil || m.start == nil) { 460 return nil 461 } 462 if m.dir != -1 { 463 m.switchToMaxHeap() 464 } 465 return m.findPrevFragmentSet() 466 } 467 468 // Error returns any accumulated error. 469 func (m *MergingIter) Error() error { 470 if m.heap.len() == 0 || m.err != nil { 471 return m.err 472 } 473 return m.levels[m.heap.items[0].index].iter.Error() 474 } 475 476 // Close closes the iterator, releasing all acquired resources. 477 func (m *MergingIter) Close() error { 478 for i := range m.levels { 479 if err := m.levels[i].iter.Close(); err != nil && m.err == nil { 480 m.err = err 481 } 482 } 483 m.levels = nil 484 m.heap.items = m.heap.items[:0] 485 return m.err 486 } 487 488 // String implements fmt.Stringer. 489 func (m *MergingIter) String() string { 490 return "merging-keyspan" 491 } 492 493 func (m *MergingIter) initMinHeap() { 494 m.dir = +1 495 m.heap.reverse = false 496 m.initHeap() 497 } 498 499 func (m *MergingIter) initMaxHeap() { 500 m.dir = -1 501 m.heap.reverse = true 502 m.initHeap() 503 } 504 505 func (m *MergingIter) initHeap() { 506 m.heap.items = m.heap.items[:0] 507 for i := range m.levels { 508 if l := &m.levels[i]; l.heapKey.kind != boundKindInvalid { 509 m.heap.items = append(m.heap.items, mergingIterItem{ 510 index: i, 511 boundKey: &l.heapKey, 512 }) 513 } else { 514 m.err = firstError(m.err, l.iter.Error()) 515 if m.err != nil { 516 return 517 } 518 } 519 } 520 m.heap.init() 521 } 522 523 func (m *MergingIter) switchToMinHeap() { 524 // switchToMinHeap reorients the heap for forward iteration, without moving 525 // the current MergingIter position. 526 527 // The iterator is currently positioned at the span [m.start, m.end), 528 // oriented in the reverse direction, so each level's iterator is positioned 529 // to the largest key ≤ m.start. To reorient in the forward direction, we 530 // must advance each level's iterator to the smallest key ≥ m.end. Consider 531 // this three-level example. 532 // 533 // i0: b---d e-----h 534 // i1: a---c h-----k 535 // i2: a------------------------------p 536 // 537 // merged: a-b-c-d-e-----h-----k----------p 538 // 539 // If currently positioned at the merged span [c,d), then the level 540 // iterators' heap keys are: 541 // 542 // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p)) 543 // 544 // Reversing the heap should not move the merging iterator and should not 545 // change the current [m.start, m.end) bounds. It should only prepare for 546 // forward iteration by updating the child iterators' heap keys to: 547 // 548 // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p)) 549 // 550 // In every level the first key ≥ m.end is the next in the iterator. 551 // Justification: Suppose not and a level iterator's next key was some key k 552 // such that k < m.end. The max-heap invariant dictates that the current 553 // iterator position is the largest entry with a user key ≥ m.start. This 554 // means k > m.start. We started with the assumption that k < m.end, so 555 // m.start < k < m.end. But then k is between our current span bounds, 556 // and reverse iteration would have constructed the current interval to be 557 // [k, m.end) not [m.start, m.end). 558 559 if invariants.Enabled { 560 for i := range m.levels { 561 l := &m.levels[i] 562 if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.start) > 0 { 563 panic("bitalostable: invariant violation: max-heap key > m.start") 564 } 565 } 566 } 567 568 for i := range m.levels { 569 m.levels[i].next() 570 } 571 m.initMinHeap() 572 } 573 574 func (m *MergingIter) switchToMaxHeap() { 575 // switchToMaxHeap reorients the heap for reverse iteration, without moving 576 // the current MergingIter position. 577 578 // The iterator is currently positioned at the span [m.start, m.end), 579 // oriented in the forward direction. Each level's iterator is positioned at 580 // the smallest bound ≥ m.end. To reorient in the reverse direction, we must 581 // move each level's iterator to the largest key ≤ m.start. Consider this 582 // three-level example. 583 // 584 // i0: b---d e-----h 585 // i1: a---c h-----k 586 // i2: a------------------------------p 587 // 588 // merged: a-b-c-d-e-----h-----k----------p 589 // 590 // If currently positioned at the merged span [c,d), then the level 591 // iterators' heap keys are: 592 // 593 // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p)) 594 // 595 // Reversing the heap should not move the merging iterator and should not 596 // change the current [m.start, m.end) bounds. It should only prepare for 597 // reverse iteration by updating the child iterators' heap keys to: 598 // 599 // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p)) 600 // 601 // In every level the largest key ≤ m.start is the prev in the iterator. 602 // Justification: Suppose not and a level iterator's prev key was some key k 603 // such that k > m.start. The min-heap invariant dictates that the current 604 // iterator position is the smallest entry with a user key ≥ m.end. This 605 // means k < m.end, otherwise the iterator would be positioned at k. We 606 // started with the assumption that k > m.start, so m.start < k < m.end. But 607 // then k is between our current span bounds, and reverse iteration 608 // would have constructed the current interval to be [m.start, k) not 609 // [m.start, m.end). 610 611 if invariants.Enabled { 612 for i := range m.levels { 613 l := &m.levels[i] 614 if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.end) < 0 { 615 panic("bitalostable: invariant violation: min-heap key < m.end") 616 } 617 } 618 } 619 620 for i := range m.levels { 621 m.levels[i].prev() 622 } 623 m.initMaxHeap() 624 } 625 626 func (m *MergingIter) cmp(a, b []byte) int { 627 return m.heap.cmp(a, b) 628 } 629 630 func (m *MergingIter) findNextFragmentSet() *Span { 631 // Each iteration of this loop considers a new merged span between unique 632 // user keys. An iteration may find that there exists no overlap for a given 633 // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the 634 // below loop will still consider [b,d) before continuing to [d, e)). It 635 // returns when it finds a span that is covered by at least one key. 636 637 for m.heap.len() > 0 && m.err == nil { 638 // Initialize the next span's start bound. SeekGE and First prepare the 639 // heap without advancing. Next leaves the heap in a state such that the 640 // root is the smallest bound key equal to the returned span's end key, 641 // so the heap is already positioned at the next merged span's start key. 642 643 // NB: m.heapRoot() might be either an end boundary OR a start boundary 644 // of a level's span. Both end and start boundaries may still be a start 645 // key of a span in the set of fragmented spans returned by MergingIter. 646 // Consider the scenario: 647 // a----------l #1 648 // b-----------m #2 649 // 650 // The merged, fully-fragmented spans that MergingIter exposes to the caller 651 // have bounds: 652 // a-b #1 653 // b--------l #1 654 // b--------l #2 655 // l-m #2 656 // 657 // When advancing to l-m#2, we must set m.start to 'l', which originated 658 // from [a,l)#1's end boundary. 659 m.start = m.heap.items[0].boundKey.key 660 661 // Before calling nextEntry, consider whether it might invalidate our 662 // start boundary. If the start boundary key originated from an end 663 // boundary, then we need to copy the start key before advancing the 664 // underlying iterator to the next Span. 665 if m.heap.items[0].boundKey.kind == boundKindFragmentEnd { 666 m.buf = append(m.buf[:0], m.start...) 667 m.start = m.buf 668 } 669 670 // There may be many entries all with the same user key. Spans in other 671 // levels may also start or end at this same user key. For eg: 672 // L1: [a, c) [c, d) 673 // L2: [c, e) 674 // If we're positioned at L1's end(c) end boundary, we want to advance 675 // to the first bound > c. 676 m.nextEntry() 677 for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.start) == 0 { 678 m.nextEntry() 679 } 680 if len(m.heap.items) == 0 || m.err != nil { 681 break 682 } 683 684 // The current entry at the top of the heap is the first key > m.start. 685 // It must become the end bound for the span we will return to the user. 686 // In the above example, the root of the heap is L1's end(d). 687 m.end = m.heap.items[0].boundKey.key 688 689 // Each level within m.levels may have a span that overlaps the 690 // fragmented key span [m.start, m.end). Update m.keys to point to them 691 // and sort them by kind, sequence number. There may not be any keys 692 // defined over [m.start, m.end) if we're between the end of one span 693 // and the start of the next, OR if the configured transform filters any 694 // keys out. We allow empty spans that were emitted by child iterators, but 695 // we elide empty spans created by the mergingIter itself that don't overlap 696 // with any child iterator returned spans (i.e. empty spans that bridge two 697 // distinct child-iterator-defined spans). 698 if found, s := m.synthesizeKeys(+1); found && s != nil { 699 return s 700 } 701 } 702 // Exhausted. 703 m.clear() 704 return nil 705 } 706 707 func (m *MergingIter) findPrevFragmentSet() *Span { 708 // Each iteration of this loop considers a new merged span between unique 709 // user keys. An iteration may find that there exists no overlap for a given 710 // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the 711 // below loop will still consider [b,d) before continuing to [a, b)). It 712 // returns when it finds a span that is covered by at least one key. 713 714 for m.heap.len() > 0 && m.err == nil { 715 // Initialize the next span's end bound. SeekLT and Last prepare the 716 // heap without advancing. Prev leaves the heap in a state such that the 717 // root is the largest bound key equal to the returned span's start key, 718 // so the heap is already positioned at the next merged span's end key. 719 720 // NB: m.heapRoot() might be either an end boundary OR a start boundary 721 // of a level's span. Both end and start boundaries may still be a start 722 // key of a span returned by MergingIter. Consider the scenario: 723 // a----------l #2 724 // b-----------m #1 725 // 726 // The merged, fully-fragmented spans that MergingIter exposes to the caller 727 // have bounds: 728 // a-b #2 729 // b--------l #2 730 // b--------l #1 731 // l-m #1 732 // 733 // When Preving to a-b#2, we must set m.end to 'b', which originated 734 // from [b,m)#1's start boundary. 735 m.end = m.heap.items[0].boundKey.key 736 737 // Before calling prevEntry, consider whether it might invalidate our 738 // end boundary. If the end boundary key originated from a start 739 // boundary, then we need to copy the end key before advancing the 740 // underlying iterator to the previous Span. 741 if m.heap.items[0].boundKey.kind == boundKindFragmentStart { 742 m.buf = append(m.buf[:0], m.end...) 743 m.end = m.buf 744 } 745 746 // There may be many entries all with the same user key. Spans in other 747 // levels may also start or end at this same user key. For eg: 748 // L1: [a, c) [c, d) 749 // L2: [c, e) 750 // If we're positioned at L1's start(c) start boundary, we want to prev 751 // to move to the first bound < c. 752 m.prevEntry() 753 for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.end) == 0 { 754 m.prevEntry() 755 } 756 if len(m.heap.items) == 0 || m.err != nil { 757 break 758 } 759 760 // The current entry at the top of the heap is the first key < m.end. 761 // It must become the start bound for the span we will return to the 762 // user. In the above example, the root of the heap is L1's start(a). 763 m.start = m.heap.items[0].boundKey.key 764 765 // Each level within m.levels may have a set of keys that overlap the 766 // fragmented key span [m.start, m.end). Update m.keys to point to them 767 // and sort them by kind, sequence number. There may not be any keys 768 // spanning [m.start, m.end) if we're between the end of one span and 769 // the start of the next, OR if the configured transform filters any 770 // keys out. We allow empty spans that were emitted by child iterators, but 771 // we elide empty spans created by the mergingIter itself that don't overlap 772 // with any child iterator returned spans (i.e. empty spans that bridge two 773 // distinct child-iterator-defined spans). 774 if found, s := m.synthesizeKeys(-1); found && s != nil { 775 return s 776 } 777 } 778 // Exhausted. 779 m.clear() 780 return nil 781 } 782 783 func (m *MergingIter) heapRoot() []byte { 784 return m.heap.items[0].boundKey.key 785 } 786 787 // synthesizeKeys is called by find{Next,Prev}FragmentSet to populate and 788 // sort the set of keys overlapping [m.start, m.end). 789 // 790 // During forward iteration, if the current heap item is a fragment end, 791 // then the fragment's start must be ≤ m.start and the fragment overlaps the 792 // current iterator position of [m.start, m.end). 793 // 794 // During reverse iteration, if the current heap item is a fragment start, 795 // then the fragment's end must be ≥ m.end and the fragment overlaps the 796 // current iteration position of [m.start, m.end). 797 // 798 // The boolean return value, `found`, is true if the returned span overlaps 799 // with a span returned by a child iterator. 800 func (m *MergingIter) synthesizeKeys(dir int8) (bool, *Span) { 801 if invariants.Enabled { 802 if m.cmp(m.start, m.end) >= 0 { 803 panic(fmt.Sprintf("bitalostable: invariant violation: span start ≥ end: %s >= %s", m.start, m.end)) 804 } 805 } 806 807 m.keys = m.keys[:0] 808 found := false 809 for i := range m.levels { 810 if dir == +1 && m.levels[i].heapKey.kind == boundKindFragmentEnd || 811 dir == -1 && m.levels[i].heapKey.kind == boundKindFragmentStart { 812 m.keys = append(m.keys, m.levels[i].heapKey.span.Keys...) 813 found = true 814 } 815 } 816 // TODO(jackson): We should be able to remove this sort and instead 817 // guarantee that we'll return keys in the order of the levels they're from. 818 // With careful iterator construction, this would guarantee that they're 819 // sorted by trailer descending for the range key iteration use case. 820 sort.Sort(&m.keys) 821 822 // Apply the configured transform. See visibleTransform. 823 s := Span{ 824 Start: m.start, 825 End: m.end, 826 Keys: m.keys, 827 KeysOrder: ByTrailerDesc, 828 } 829 if err := m.transformer.Transform(m.cmp, s, &m.span); err != nil { 830 m.err = err 831 return false, nil 832 } 833 return found, &m.span 834 } 835 836 func (m *MergingIter) invalidate() { 837 m.err = nil 838 } 839 840 func (m *MergingIter) clear() { 841 for fi := range m.keys { 842 m.keys[fi] = Key{} 843 } 844 m.keys = m.keys[:0] 845 } 846 847 // nextEntry steps to the next entry. 848 func (m *MergingIter) nextEntry() { 849 l := &m.levels[m.heap.items[0].index] 850 l.next() 851 if !l.heapKey.valid() { 852 // l.iter is exhausted. 853 m.err = l.iter.Error() 854 if m.err == nil { 855 m.heap.pop() 856 } 857 return 858 } 859 860 if m.heap.len() > 1 { 861 m.heap.fix(0) 862 } 863 } 864 865 // prevEntry steps to the previous entry. 866 func (m *MergingIter) prevEntry() { 867 l := &m.levels[m.heap.items[0].index] 868 l.prev() 869 if !l.heapKey.valid() { 870 // l.iter is exhausted. 871 m.err = l.iter.Error() 872 if m.err == nil { 873 m.heap.pop() 874 } 875 return 876 } 877 878 if m.heap.len() > 1 { 879 m.heap.fix(0) 880 } 881 } 882 883 // DebugString returns a string representing the current internal state of the 884 // merging iterator and its heap for debugging purposes. 885 func (m *MergingIter) DebugString() string { 886 var buf bytes.Buffer 887 fmt.Fprintf(&buf, "Current bounds: [%q, %q)\n", m.start, m.end) 888 for i := range m.levels { 889 fmt.Fprintf(&buf, "%d: heap key %s\n", i, m.levels[i].heapKey) 890 } 891 return buf.String() 892 } 893 894 type mergingIterItem struct { 895 // boundKey points to the corresponding mergingIterLevel's `iterKey`. 896 *boundKey 897 // index is the index of this level within the MergingIter's levels field. 898 index int 899 } 900 901 // mergingIterHeap is copied from mergingIterHeap defined in the root bitalostable 902 // package for use with point keys. 903 904 type mergingIterHeap struct { 905 cmp base.Compare 906 reverse bool 907 items []mergingIterItem 908 } 909 910 func (h *mergingIterHeap) len() int { 911 return len(h.items) 912 } 913 914 func (h *mergingIterHeap) less(i, j int) bool { 915 // This key comparison only uses the user key and not the boundKind. Bound 916 // kind doesn't matter because when stepping over a user key, 917 // findNextFragmentSet and findPrevFragmentSet skip past all heap items with 918 // that user key, and makes no assumptions on ordering. All other heap 919 // examinations only consider the user key. 920 ik, jk := h.items[i].key, h.items[j].key 921 c := h.cmp(ik, jk) 922 if h.reverse { 923 return c > 0 924 } 925 return c < 0 926 } 927 928 func (h *mergingIterHeap) swap(i, j int) { 929 h.items[i], h.items[j] = h.items[j], h.items[i] 930 } 931 932 // init, fix, up and down are copied from the go stdlib. 933 func (h *mergingIterHeap) init() { 934 // heapify 935 n := h.len() 936 for i := n/2 - 1; i >= 0; i-- { 937 h.down(i, n) 938 } 939 } 940 941 func (h *mergingIterHeap) fix(i int) { 942 if !h.down(i, h.len()) { 943 h.up(i) 944 } 945 } 946 947 func (h *mergingIterHeap) pop() *mergingIterItem { 948 n := h.len() - 1 949 h.swap(0, n) 950 h.down(0, n) 951 item := &h.items[n] 952 h.items = h.items[:n] 953 return item 954 } 955 956 func (h *mergingIterHeap) up(j int) { 957 for { 958 i := (j - 1) / 2 // parent 959 if i == j || !h.less(j, i) { 960 break 961 } 962 h.swap(i, j) 963 j = i 964 } 965 } 966 967 func (h *mergingIterHeap) down(i0, n int) bool { 968 i := i0 969 for { 970 j1 := 2*i + 1 971 if j1 >= n || j1 < 0 { // j1 < 0 after int overflow 972 break 973 } 974 j := j1 // left child 975 if j2 := j1 + 1; j2 < n && h.less(j2, j1) { 976 j = j2 // = 2*i + 2 // right child 977 } 978 if !h.less(j, i) { 979 break 980 } 981 h.swap(i, j) 982 i = j 983 } 984 return i > i0 985 } 986 987 type boundKind int8 988 989 const ( 990 boundKindInvalid boundKind = iota 991 boundKindFragmentStart 992 boundKindFragmentEnd 993 ) 994 995 type boundKey struct { 996 kind boundKind 997 key []byte 998 // span holds the span the bound key comes from. 999 // 1000 // If kind is boundKindFragmentStart, then key is span.Start. If kind is 1001 // boundKindFragmentEnd, then key is span.End. 1002 span *Span 1003 } 1004 1005 func (k boundKey) valid() bool { 1006 return k.kind != boundKindInvalid 1007 } 1008 1009 func (k boundKey) String() string { 1010 var buf bytes.Buffer 1011 switch k.kind { 1012 case boundKindInvalid: 1013 fmt.Fprint(&buf, "invalid") 1014 case boundKindFragmentStart: 1015 fmt.Fprint(&buf, "fragment-start") 1016 case boundKindFragmentEnd: 1017 fmt.Fprint(&buf, "fragment-end ") 1018 default: 1019 fmt.Fprintf(&buf, "unknown-kind(%d)", k.kind) 1020 } 1021 fmt.Fprintf(&buf, " %s [", k.key) 1022 fmt.Fprintf(&buf, "%s", k.span) 1023 fmt.Fprint(&buf, "]") 1024 return buf.String() 1025 }