github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/merging_iter.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "bytes" 9 "fmt" 10 "runtime/debug" 11 12 "github.com/cockroachdb/errors" 13 "github.com/zuoyebang/bitalostable/internal/base" 14 "github.com/zuoyebang/bitalostable/internal/invariants" 15 "github.com/zuoyebang/bitalostable/internal/keyspan" 16 ) 17 18 type mergingIterLevel struct { 19 iter internalIterator 20 // rangeDelIter is set to the range-deletion iterator for the level. When 21 // configured with a levelIter, this pointer changes as sstable boundaries 22 // are crossed. See levelIter.initRangeDel and the Range Deletions comment 23 // below. 24 rangeDelIter keyspan.FragmentIterator 25 // iterKey and iterValue cache the current key and value iter are pointed at. 26 iterKey *InternalKey 27 iterValue []byte 28 29 // levelIterBoundaryContext's fields are set when using levelIter, in order 30 // to surface sstable boundary keys and file-level context. See levelIter 31 // comment and the Range Deletions comment below. 32 levelIterBoundaryContext 33 34 // tombstone caches the tombstone rangeDelIter is currently pointed at. If 35 // tombstone is nil, there are no further tombstones within the 36 // current sstable in the current iterator direction. The cached tombstone is 37 // only valid for the levels in the range [0,heap[0].index]. This avoids 38 // positioning tombstones at lower levels which cannot possibly shadow the 39 // current key. 40 tombstone *keyspan.Span 41 } 42 43 type levelIterBoundaryContext struct { 44 // smallestUserKey and largestUserKey are populated with the smallest and 45 // largest boundaries of the current file. 46 smallestUserKey, largestUserKey []byte 47 // isLargestUserKeyRangeDelSentinel is set to true when a file's largest 48 // boundary is an exclusive range deletion sentinel. If true, the file does 49 // not contain any keys with the provided user key, and the largestUserKey 50 // bound is exclusive. 51 isLargestUserKeyRangeDelSentinel bool 52 // isSyntheticIterBoundsKey is set to true iff the key returned by the level 53 // iterator is a synthetic key derived from the iterator bounds. This is 54 // used to prevent the mergingIter from being stuck at such a synthetic key 55 // if it becomes the top element of the heap. 56 isSyntheticIterBoundsKey bool 57 // isIgnorableBoundaryKey is set to true iff the key returned by the level 58 // iterator is a file boundary key that should be ignored. This is used to 59 // keep a levelIter file's range deletion iterator open as long as other 60 // levels within the merging iterator require it. 61 isIgnorableBoundaryKey bool 62 } 63 64 // mergingIter provides a merged view of multiple iterators from different 65 // levels of the LSM. 66 // 67 // The core of a mergingIter is a heap of internalIterators (see 68 // mergingIterHeap). The heap can operate as either a min-heap, used during 69 // forward iteration (First, SeekGE, Next) or a max-heap, used during reverse 70 // iteration (Last, SeekLT, Prev). The heap is initialized in calls to First, 71 // Last, SeekGE, and SeekLT. A call to Next or Prev takes the current top 72 // element on the heap, advances its iterator, and then "fixes" the heap 73 // property. When one of the child iterators is exhausted during Next/Prev 74 // iteration, it is removed from the heap. 75 // 76 // # Range Deletions 77 // 78 // A mergingIter can optionally be configured with a slice of range deletion 79 // iterators. The range deletion iterator slice must exactly parallel the point 80 // iterators and the range deletion iterator must correspond to the same level 81 // in the LSM as the point iterator. Note that each memtable and each table in 82 // L0 is a different "level" from the mergingIter perspective. So level 0 below 83 // does not correspond to L0 in the LSM. 84 // 85 // A range deletion iterator iterates over fragmented range tombstones. Range 86 // tombstones are fragmented by splitting them at any overlapping points. This 87 // fragmentation guarantees that within an sstable tombstones will either be 88 // distinct or will have identical start and end user keys. While range 89 // tombstones are fragmented within an sstable, the start and end keys are not truncated 90 // to sstable boundaries. This is necessary because the tombstone end key is 91 // exclusive and does not have a sequence number. Consider an sstable 92 // containing the range tombstone [a,c)#9 and the key "b#8". The tombstone must 93 // delete "b#8", yet older versions of "b" might spill over to the next 94 // sstable. So the boundary key for this sstable must be "b#8". Adjusting the 95 // end key of tombstones to be optionally inclusive or contain a sequence 96 // number would be possible solutions (such solutions have potentially serious 97 // issues: tombstones have exclusive end keys since an inclusive deletion end can 98 // be converted to an exclusive one while the reverse transformation is not possible; 99 // the semantics of a sequence number for the end key of a range tombstone are murky). 100 // 101 // The approach taken here performs an 102 // implicit truncation of the tombstone to the sstable boundaries. 103 // 104 // During initialization of a mergingIter, the range deletion iterators for 105 // batches, memtables, and L0 tables are populated up front. Note that Batches 106 // and memtables index unfragmented tombstones. Batch.newRangeDelIter() and 107 // memTable.newRangeDelIter() fragment and cache the tombstones on demand. The 108 // L1-L6 range deletion iterators are populated by levelIter. When configured 109 // to load range deletion iterators, whenever a levelIter loads a table it 110 // loads both the point iterator and the range deletion 111 // iterator. levelIter.rangeDelIter is configured to point to the right entry 112 // in mergingIter.levels. The effect of this setup is that 113 // mergingIter.levels[i].rangeDelIter always contains the fragmented range 114 // tombstone for the current table in level i that the levelIter has open. 115 // 116 // Another crucial mechanism of levelIter is that it materializes fake point 117 // entries for the table boundaries if the boundary is range deletion 118 // key. Consider a table that contains only a range tombstone [a-e)#10. The 119 // sstable boundaries for this table will be a#10,15 and 120 // e#72057594037927935,15. During forward iteration levelIter will return 121 // e#72057594037927935,15 as a key. During reverse iteration levelIter will 122 // return a#10,15 as a key. These sentinel keys act as bookends to point 123 // iteration and allow mergingIter to keep a table and its associated range 124 // tombstones loaded as long as there are keys at lower levels that are within 125 // the bounds of the table. 126 // 127 // The final piece to the range deletion puzzle is the LSM invariant that for a 128 // given key K newer versions of K can only exist earlier in the level, or at 129 // higher levels of the tree. For example, if K#4 exists in L3, k#5 can only 130 // exist earlier in the L3 or in L0, L1, L2 or a memtable. Get very explicitly 131 // uses this invariant to find the value for a key by walking the LSM level by 132 // level. For range deletions, this invariant means that a range deletion at 133 // level N will necessarily shadow any keys within its bounds in level Y where 134 // Y > N. One wrinkle to this statement is that it only applies to keys that 135 // lie within the sstable bounds as well, but we get that guarantee due to the 136 // way the range deletion iterator and point iterator are bound together by a 137 // levelIter. 138 // 139 // Tying the above all together, we get a picture where each level (index in 140 // mergingIter.levels) is composed of both point operations (pX) and range 141 // deletions (rX). The range deletions for level X shadow both the point 142 // operations and range deletions for level Y where Y > X allowing mergingIter 143 // to skip processing entries in that shadow. For example, consider the 144 // scenario: 145 // 146 // r0: a---e 147 // r1: d---h 148 // r2: g---k 149 // r3: j---n 150 // r4: m---q 151 // 152 // This is showing 5 levels of range deletions. Consider what happens upon 153 // SeekGE("b"). We first seek the point iterator for level 0 (the point values 154 // are not shown above) and we then seek the range deletion iterator. That 155 // returns the tombstone [a,e). This tombstone tells us that all keys in the 156 // range [a,e) in lower levels are deleted so we can skip them. So we can 157 // adjust the seek key to "e", the tombstone end key. For level 1 we seek to 158 // "e" and find the range tombstone [d,h) and similar logic holds. By the time 159 // we get to level 4 we're seeking to "n". 160 // 161 // One consequence of not truncating tombstone end keys to sstable boundaries 162 // is the seeking process described above cannot always seek to the tombstone 163 // end key in the older level. For example, imagine in the above example r3 is 164 // a partitioned level (i.e., L1+ in our LSM), and the sstable containing [j, 165 // n) has "k" as its upper boundary. In this situation, compactions involving 166 // keys at or after "k" can output those keys to r4+, even if they're newer 167 // than our tombstone [j, n). So instead of seeking to "n" in r4 we can only 168 // seek to "k". To achieve this, the instance variable `largestUserKey.` 169 // maintains the upper bounds of the current sstables in the partitioned 170 // levels. In this example, `levels[3].largestUserKey` holds "k", telling us to 171 // limit the seek triggered by a tombstone in r3 to "k". 172 // 173 // During actual iteration levels can contain both point operations and range 174 // deletions. Within a level, when a range deletion contains a point operation 175 // the sequence numbers must be checked to determine if the point operation is 176 // newer or older than the range deletion tombstone. The mergingIter maintains 177 // the invariant that the range deletion iterators for all levels newer that 178 // the current iteration key (L < m.heap.items[0].index) are positioned at the 179 // next (or previous during reverse iteration) range deletion tombstone. We 180 // know those levels don't contain a range deletion tombstone that covers the 181 // current key because if they did the current key would be deleted. The range 182 // deletion iterator for the current key's level is positioned at a range 183 // tombstone covering or past the current key. The position of all of other 184 // range deletion iterators is unspecified. Whenever a key from those levels 185 // becomes the current key, their range deletion iterators need to be 186 // positioned. This lazy positioning avoids seeking the range deletion 187 // iterators for keys that are never considered. (A similar bit of lazy 188 // evaluation can be done for the point iterators, but is still TBD). 189 // 190 // For a full example, consider the following setup: 191 // 192 // p0: o 193 // r0: m---q 194 // 195 // p1: n p 196 // r1: g---k 197 // 198 // p2: b d i 199 // r2: a---e q----v 200 // 201 // p3: e 202 // r3: 203 // 204 // If we start iterating from the beginning, the first key we encounter is "b" 205 // in p2. When the mergingIter is pointing at a valid entry, the range deletion 206 // iterators for all of the levels < m.heap.items[0].index are positioned at 207 // the next range tombstone past the current key. So r0 will point at [m,q) and 208 // r1 at [g,k). When the key "b" is encountered, we check to see if the current 209 // tombstone for r0 or r1 contains it, and whether the tombstone for r2, [a,e), 210 // contains and is newer than "b". 211 // 212 // Advancing the iterator finds the next key at "d". This is in the same level 213 // as the previous key "b" so we don't have to reposition any of the range 214 // deletion iterators, but merely check whether "d" is now contained by any of 215 // the range tombstones at higher levels or has stepped past the range 216 // tombstone in its own level or higher levels. In this case, there is nothing to be done. 217 // 218 // Advancing the iterator again finds "e". Since "e" comes from p3, we have to 219 // position the r3 range deletion iterator, which is empty. "e" is past the r2 220 // tombstone of [a,e) so we need to advance the r2 range deletion iterator to 221 // [q,v). 222 // 223 // The next key is "i". Because this key is in p2, a level above "e", we don't 224 // have to reposition any range deletion iterators and instead see that "i" is 225 // covered by the range tombstone [g,k). The iterator is immediately advanced 226 // to "n" which is covered by the range tombstone [m,q) causing the iterator to 227 // advance to "o" which is visible. 228 // 229 // TODO(peter,rangedel): For testing, advance the iterator through various 230 // scenarios and have each step display the current state (i.e. the current 231 // heap and range-del iterator positioning). 232 type mergingIter struct { 233 logger Logger 234 split Split 235 dir int 236 snapshot uint64 237 levels []mergingIterLevel 238 heap mergingIterHeap 239 err error 240 prefix []byte 241 lower []byte 242 upper []byte 243 stats *InternalIteratorStats 244 245 combinedIterState *combinedIterState 246 247 // Elide range tombstones from being returned during iteration. Set to true 248 // when mergingIter is a child of Iterator and the mergingIter is processing 249 // range tombstones. 250 elideRangeTombstones bool 251 } 252 253 // mergingIter implements the base.InternalIterator interface. 254 var _ base.InternalIterator = (*mergingIter)(nil) 255 256 // newMergingIter returns an iterator that merges its input. Walking the 257 // resultant iterator will return all key/value pairs of all input iterators 258 // in strictly increasing key order, as defined by cmp. It is permissible to 259 // pass a nil split parameter if the caller is never going to call 260 // SeekPrefixGE. 261 // 262 // The input's key ranges may overlap, but there are assumed to be no duplicate 263 // keys: if iters[i] contains a key k then iters[j] will not contain that key k. 264 // 265 // None of the iters may be nil. 266 func newMergingIter( 267 logger Logger, 268 stats *base.InternalIteratorStats, 269 cmp Compare, 270 split Split, 271 iters ...internalIterator, 272 ) *mergingIter { 273 m := &mergingIter{} 274 levels := make([]mergingIterLevel, len(iters)) 275 for i := range levels { 276 levels[i].iter = iters[i] 277 } 278 m.init(&IterOptions{logger: logger}, stats, cmp, split, levels...) 279 return m 280 } 281 282 func (m *mergingIter) init( 283 opts *IterOptions, 284 stats *base.InternalIteratorStats, 285 cmp Compare, 286 split Split, 287 levels ...mergingIterLevel, 288 ) { 289 m.err = nil 290 m.logger = opts.getLogger() 291 if opts != nil { 292 m.lower = opts.LowerBound 293 m.upper = opts.UpperBound 294 } 295 m.snapshot = InternalKeySeqNumMax 296 m.levels = levels 297 m.heap.cmp = cmp 298 m.split = split 299 m.stats = stats 300 if cap(m.heap.items) < len(levels) { 301 m.heap.items = make([]mergingIterItem, 0, len(levels)) 302 } else { 303 m.heap.items = m.heap.items[:0] 304 } 305 } 306 307 func (m *mergingIter) initHeap() { 308 m.heap.items = m.heap.items[:0] 309 for i := range m.levels { 310 if l := &m.levels[i]; l.iterKey != nil { 311 m.heap.items = append(m.heap.items, mergingIterItem{ 312 index: i, 313 key: *l.iterKey, 314 value: l.iterValue, 315 }) 316 } else { 317 m.err = firstError(m.err, l.iter.Error()) 318 if m.err != nil { 319 return 320 } 321 } 322 } 323 m.heap.init() 324 } 325 326 func (m *mergingIter) initMinHeap() { 327 m.dir = 1 328 m.heap.reverse = false 329 m.initHeap() 330 m.initMinRangeDelIters(-1) 331 } 332 333 // The level of the previous top element was oldTopLevel. Note that all range delete 334 // iterators < oldTopLevel are positioned past the key of the previous top element and 335 // the range delete iterator == oldTopLevel is positioned at or past the key of the 336 // previous top element. We need to position the range delete iterators from oldTopLevel + 1 337 // to the level of the current top element. 338 func (m *mergingIter) initMinRangeDelIters(oldTopLevel int) { 339 if m.heap.len() == 0 { 340 return 341 } 342 343 // Position the range-del iterators at levels <= m.heap.items[0].index. 344 item := &m.heap.items[0] 345 for level := oldTopLevel + 1; level <= item.index; level++ { 346 l := &m.levels[level] 347 if l.rangeDelIter == nil { 348 continue 349 } 350 l.tombstone = keyspan.SeekGE(m.heap.cmp, l.rangeDelIter, item.key.UserKey) 351 } 352 } 353 354 func (m *mergingIter) initMaxHeap() { 355 m.dir = -1 356 m.heap.reverse = true 357 m.initHeap() 358 m.initMaxRangeDelIters(-1) 359 } 360 361 // The level of the previous top element was oldTopLevel. Note that all range delete 362 // iterators < oldTopLevel are positioned before the key of the previous top element and 363 // the range delete iterator == oldTopLevel is positioned at or before the key of the 364 // previous top element. We need to position the range delete iterators from oldTopLevel + 1 365 // to the level of the current top element. 366 func (m *mergingIter) initMaxRangeDelIters(oldTopLevel int) { 367 if m.heap.len() == 0 { 368 return 369 } 370 // Position the range-del iterators at levels <= m.heap.items[0].index. 371 item := &m.heap.items[0] 372 for level := oldTopLevel + 1; level <= item.index; level++ { 373 l := &m.levels[level] 374 if l.rangeDelIter == nil { 375 continue 376 } 377 l.tombstone = keyspan.SeekLE(m.heap.cmp, l.rangeDelIter, item.key.UserKey) 378 } 379 } 380 381 func (m *mergingIter) switchToMinHeap() { 382 if m.heap.len() == 0 { 383 if m.lower != nil { 384 m.SeekGE(m.lower, base.SeekGEFlagsNone) 385 } else { 386 m.First() 387 } 388 return 389 } 390 391 // We're switching from using a max heap to a min heap. We need to advance 392 // any iterator that is less than or equal to the current key. Consider the 393 // scenario where we have 2 iterators being merged (user-key:seq-num): 394 // 395 // i1: *a:2 b:2 396 // i2: a:1 b:1 397 // 398 // The current key is a:2 and i2 is pointed at a:1. When we switch to forward 399 // iteration, we want to return a key that is greater than a:2. 400 401 key := m.heap.items[0].key 402 cur := &m.levels[m.heap.items[0].index] 403 404 for i := range m.levels { 405 l := &m.levels[i] 406 if l == cur { 407 continue 408 } 409 410 // If the iterator is exhausted, it may be out of bounds if range 411 // deletions modified our search key as we descended. we need to 412 // reposition it within the search bounds. If the current key is a 413 // range tombstone, the iterator might still be exhausted but at a 414 // sstable boundary sentinel. It would be okay to reposition an 415 // interator like this only through successive Next calls, except that 416 // it would violate the levelIter's invariants by causing it to return 417 // a key before the lower bound. 418 // 419 // bounds = [ f, _ ) 420 // L0: [ b ] [ f* z ] 421 // L1: [ a |----| k y ] 422 // L2: [ c (d) ] [ e g m ] 423 // L3: [ x ] 424 // 425 // * - current key [] - table bounds () - heap item 426 // 427 // In the above diagram, the L2 iterator is positioned at a sstable 428 // boundary (d) outside the lower bound (f). It arrived here from a 429 // seek whose seek-key was modified by a range tombstone. If we called 430 // Next on the L2 iterator, it would return e, violating its lower 431 // bound. Instead, we seek it to >= f and Next from there. 432 433 if l.iterKey == nil || (m.lower != nil && l.isSyntheticIterBoundsKey && 434 l.iterKey.IsExclusiveSentinel() && 435 m.heap.cmp(l.iterKey.UserKey, m.lower) <= 0) { 436 if m.lower != nil { 437 l.iterKey, l.iterValue = l.iter.SeekGE(m.lower, base.SeekGEFlagsNone) 438 } else { 439 l.iterKey, l.iterValue = l.iter.First() 440 } 441 } 442 for ; l.iterKey != nil; l.iterKey, l.iterValue = l.iter.Next() { 443 if base.InternalCompare(m.heap.cmp, key, *l.iterKey) < 0 { 444 // key < iter-key 445 break 446 } 447 // key >= iter-key 448 } 449 } 450 451 // Special handling for the current iterator because we were using its key 452 // above. The iterator cur.iter may still be exhausted at a sstable boundary 453 // sentinel. Similar to the logic applied to the other levels, in these 454 // cases we seek the iterator to the first key in order to avoid violating 455 // levelIter's invariants. See the example in the for loop above. 456 if m.lower != nil && cur.isSyntheticIterBoundsKey && cur.iterKey.IsExclusiveSentinel() && 457 m.heap.cmp(cur.iterKey.UserKey, m.lower) <= 0 { 458 cur.iterKey, cur.iterValue = cur.iter.SeekGE(m.lower, base.SeekGEFlagsNone) 459 } else { 460 cur.iterKey, cur.iterValue = cur.iter.Next() 461 } 462 m.initMinHeap() 463 } 464 465 func (m *mergingIter) switchToMaxHeap() { 466 if m.heap.len() == 0 { 467 if m.upper != nil { 468 m.SeekLT(m.upper, base.SeekLTFlagsNone) 469 } else { 470 m.Last() 471 } 472 return 473 } 474 475 // We're switching from using a min heap to a max heap. We need to backup any 476 // iterator that is greater than or equal to the current key. Consider the 477 // scenario where we have 2 iterators being merged (user-key:seq-num): 478 // 479 // i1: a:2 *b:2 480 // i2: a:1 b:1 481 // 482 // The current key is b:2 and i2 is pointing at b:1. When we switch to 483 // reverse iteration, we want to return a key that is less than b:2. 484 key := m.heap.items[0].key 485 cur := &m.levels[m.heap.items[0].index] 486 487 for i := range m.levels { 488 l := &m.levels[i] 489 if l == cur { 490 continue 491 } 492 493 // If the iterator is exhausted, it may be out of bounds if range 494 // deletions modified our search key as we descended. we need to 495 // reposition it within the search bounds. If the current key is a 496 // range tombstone, the iterator might still be exhausted but at a 497 // sstable boundary sentinel. It would be okay to reposition an 498 // interator like this only through successive Prev calls, except that 499 // it would violate the levelIter's invariants by causing it to return 500 // a key beyond the upper bound. 501 // 502 // bounds = [ _, g ) 503 // L0: [ b ] [ f* z ] 504 // L1: [ a |-------| k y ] 505 // L2: [ c d ] h [(i) m ] 506 // L3: [ e x ] 507 // 508 // * - current key [] - table bounds () - heap item 509 // 510 // In the above diagram, the L2 iterator is positioned at a sstable 511 // boundary (i) outside the upper bound (g). It arrived here from a 512 // seek whose seek-key was modified by a range tombstone. If we called 513 // Prev on the L2 iterator, it would return h, violating its upper 514 // bound. Instead, we seek it to < g, and Prev from there. 515 516 if l.iterKey == nil || (m.upper != nil && l.isSyntheticIterBoundsKey && 517 l.iterKey.IsExclusiveSentinel() && m.heap.cmp(l.iterKey.UserKey, m.upper) >= 0) { 518 if m.upper != nil { 519 l.iterKey, l.iterValue = l.iter.SeekLT(m.upper, base.SeekLTFlagsNone) 520 } else { 521 l.iterKey, l.iterValue = l.iter.Last() 522 } 523 } 524 for ; l.iterKey != nil; l.iterKey, l.iterValue = l.iter.Prev() { 525 if base.InternalCompare(m.heap.cmp, key, *l.iterKey) > 0 { 526 // key > iter-key 527 break 528 } 529 // key <= iter-key 530 } 531 } 532 533 // Special handling for the current iterator because we were using its key 534 // above. The iterator cur.iter may still be exhausted at a sstable boundary 535 // sentinel. Similar to the logic applied to the other levels, in these 536 // cases we seek the iterator to in order to avoid violating levelIter's 537 // invariants by Prev-ing through files. See the example in the for loop 538 // above. 539 if m.upper != nil && cur.isSyntheticIterBoundsKey && cur.iterKey.IsExclusiveSentinel() && 540 m.heap.cmp(cur.iterKey.UserKey, m.upper) >= 0 { 541 cur.iterKey, cur.iterValue = cur.iter.SeekLT(m.upper, base.SeekLTFlagsNone) 542 } else { 543 cur.iterKey, cur.iterValue = cur.iter.Prev() 544 } 545 m.initMaxHeap() 546 } 547 548 // Steps to the next entry. item is the current top item in the heap. 549 func (m *mergingIter) nextEntry(item *mergingIterItem) { 550 l := &m.levels[item.index] 551 oldTopLevel := item.index 552 oldRangeDelIter := l.rangeDelIter 553 if l.iterKey, l.iterValue = l.iter.Next(); l.iterKey != nil { 554 item.key, item.value = *l.iterKey, l.iterValue 555 if m.heap.len() > 1 { 556 m.heap.fix(0) 557 } 558 if l.rangeDelIter != oldRangeDelIter { 559 // The rangeDelIter changed which indicates that the l.iter moved to the 560 // next sstable. We have to update the tombstone for oldTopLevel as well. 561 oldTopLevel-- 562 } 563 } else { 564 m.err = l.iter.Error() 565 if m.err == nil { 566 m.heap.pop() 567 } 568 } 569 570 // The cached tombstones are only valid for the levels 571 // [0,oldTopLevel]. Updated the cached tombstones for any levels in the range 572 // [oldTopLevel+1,heap[0].index]. 573 m.initMinRangeDelIters(oldTopLevel) 574 } 575 576 // isNextEntryDeleted() starts from the current entry (as the next entry) and if it is deleted, 577 // moves the iterators forward as needed and returns true, else it returns false. item is the top 578 // item in the heap. 579 func (m *mergingIter) isNextEntryDeleted(item *mergingIterItem) bool { 580 // Look for a range deletion tombstone containing item.key at higher 581 // levels (level < item.index). If we find such a range tombstone we know 582 // it deletes the key in the current level. Also look for a range 583 // deletion at the current level (level == item.index). If we find such a 584 // range deletion we need to check whether it is newer than the current 585 // entry. 586 for level := 0; level <= item.index; level++ { 587 l := &m.levels[level] 588 if l.rangeDelIter == nil || l.tombstone == nil { 589 // If l.tombstone is nil, there are no further tombstones 590 // in the current sstable in the current (forward) iteration 591 // direction. 592 continue 593 } 594 if m.heap.cmp(l.tombstone.End, item.key.UserKey) <= 0 { 595 // The current key is at or past the tombstone end key. 596 // 597 // NB: for the case that this l.rangeDelIter is provided by a levelIter we know that 598 // the levelIter must be positioned at a key >= item.key. So it is sufficient to seek the 599 // current l.rangeDelIter (since any range del iterators that will be provided by the 600 // levelIter in the future cannot contain item.key). Also, it is possible that we 601 // will encounter parts of the range delete that should be ignored -- we handle that 602 // below. 603 l.tombstone = keyspan.SeekGE(m.heap.cmp, l.rangeDelIter, item.key.UserKey) 604 } 605 if l.tombstone == nil { 606 continue 607 } 608 609 // Reasoning for correctness of untruncated tombstone handling when the untruncated 610 // tombstone is at a higher level: 611 // The iterator corresponding to this tombstone is still in the heap so it must be 612 // positioned >= item.key. Which means the Largest key bound of the sstable containing this 613 // tombstone is >= item.key. So the upper limit of this tombstone cannot be file-bounds-constrained 614 // to < item.key. But it is possible that item.key < smallestUserKey, in which 615 // case this tombstone should be ignored. 616 // 617 // Example 1: 618 // sstable bounds [c#8, g#12] containing a tombstone [b, i)#7, and key is c#6. The 619 // smallestUserKey is c, so we know the key is within the file bounds and the tombstone 620 // [b, i) covers it. 621 // 622 // Example 2: 623 // Same sstable bounds but key is b#10. The smallestUserKey is c, so the tombstone [b, i) 624 // does not cover this key. 625 // 626 // For a tombstone at the same level as the key, the file bounds are trivially satisfied. 627 if (l.smallestUserKey == nil || m.heap.cmp(l.smallestUserKey, item.key.UserKey) <= 0) && 628 l.tombstone.VisibleAt(m.snapshot) && l.tombstone.Contains(m.heap.cmp, item.key.UserKey) { 629 if level < item.index { 630 // We could also do m.seekGE(..., level + 1). The levels from 631 // [level + 1, item.index) are already after item.key so seeking them may be 632 // wasteful. 633 634 // We can seek up to the min of largestUserKey and tombstone.End. 635 // 636 // Using example 1 above, we can seek to the smaller of g and i, which is g. 637 // 638 // Another example, where the sstable bounds are [c#8, i#InternalRangeDelSentinel], 639 // and the tombstone is [b, i)#8. Seeking to i is correct since it is seeking up to 640 // the exclusive bound of the tombstone. We do not need to look at 641 // isLargestKeyRangeDelSentinel. 642 // 643 // Progress argument: Since this file is at a higher level than item.key we know 644 // that the iterator in this file must be positioned within its bounds and at a key 645 // X > item.key (otherwise it would be the min of the heap). It is not 646 // possible for X.UserKey == item.key.UserKey, since it is incompatible with 647 // X > item.key (a lower version cannot be in a higher sstable), so it must be that 648 // X.UserKey > item.key.UserKey. Which means l.largestUserKey > item.key.UserKey. 649 // We also know that l.tombstone.End > item.key.UserKey. So the min of these, 650 // seekKey, computed below, is > item.key.UserKey, so the call to seekGE() will 651 // make forward progress. 652 seekKey := l.tombstone.End 653 if l.largestUserKey != nil && m.heap.cmp(l.largestUserKey, seekKey) < 0 { 654 seekKey = l.largestUserKey 655 } 656 // This seek is not directly due to a SeekGE call, so we don't 657 // know enough about the underlying iterator positions, and so 658 // we keep the try-seek-using-next optimization disabled. 659 // 660 // Additionally, we set the relative-seek flag. This is 661 // important when iterating with lazy combined iteration. If 662 // there's a range key between this level's current file and the 663 // file the seek will land on, we need to detect it in order to 664 // trigger construction of the combined iterator. 665 m.seekGE(seekKey, item.index, base.SeekGEFlagsNone.EnableRelativeSeek()) 666 return true 667 } 668 if l.tombstone.CoversAt(m.snapshot, item.key.SeqNum()) { 669 m.nextEntry(item) 670 return true 671 } 672 } 673 } 674 return false 675 } 676 677 // Starting from the current entry, finds the first (next) entry that can be returned. 678 func (m *mergingIter) findNextEntry() (*InternalKey, []byte) { 679 var reseeked bool 680 for m.heap.len() > 0 && m.err == nil { 681 item := &m.heap.items[0] 682 if m.levels[item.index].isSyntheticIterBoundsKey { 683 break 684 } 685 // For prefix iteration, stop if we already seeked the iterator due to a 686 // range tombstone and are now past the prefix. We could amortize the 687 // cost of this comparison, by doing it only after we have iterated in 688 // this for loop a few times. But unless we find a performance benefit 689 // to that, we do the simple thing and compare each time. Note that 690 // isNextEntryDeleted already did at least 4 key comparisons in order to 691 // return true, and additionally at least one heap comparison to step to 692 // the next entry. 693 // 694 // Note that we cannot move this comparison into the isNextEntryDeleted 695 // branch. Once isNextEntryDeleted determines a key is deleted and seeks 696 // the level's iterator, item.key's memory is potentially invalid. If 697 // the iterator is now exhausted, item.key may be garbage. 698 if m.prefix != nil && reseeked { 699 if n := m.split(item.key.UserKey); !bytes.Equal(m.prefix, item.key.UserKey[:n]) { 700 return nil, nil 701 } 702 } 703 704 m.addItemStats(item) 705 if m.isNextEntryDeleted(item) { 706 m.stats.PointsCoveredByRangeTombstones++ 707 reseeked = true 708 continue 709 } 710 if item.key.Visible1(m.snapshot) && 711 (!m.levels[item.index].isIgnorableBoundaryKey) && 712 (item.key.Kind() != InternalKeyKindRangeDelete || !m.elideRangeTombstones) { 713 return &item.key, item.value 714 } 715 m.nextEntry(item) 716 } 717 return nil, nil 718 } 719 720 // Steps to the prev entry. item is the current top item in the heap. 721 func (m *mergingIter) prevEntry(item *mergingIterItem) { 722 l := &m.levels[item.index] 723 oldTopLevel := item.index 724 oldRangeDelIter := l.rangeDelIter 725 if l.iterKey, l.iterValue = l.iter.Prev(); l.iterKey != nil { 726 item.key, item.value = *l.iterKey, l.iterValue 727 if m.heap.len() > 1 { 728 m.heap.fix(0) 729 } 730 if l.rangeDelIter != oldRangeDelIter && l.rangeDelIter != nil { 731 // The rangeDelIter changed which indicates that the l.iter moved to the 732 // previous sstable. We have to update the tombstone for oldTopLevel as 733 // well. 734 oldTopLevel-- 735 } 736 } else { 737 m.err = l.iter.Error() 738 if m.err == nil { 739 m.heap.pop() 740 } 741 } 742 743 // The cached tombstones are only valid for the levels 744 // [0,oldTopLevel]. Updated the cached tombstones for any levels in the range 745 // [oldTopLevel+1,heap[0].index]. 746 m.initMaxRangeDelIters(oldTopLevel) 747 } 748 749 // isPrevEntryDeleted() starts from the current entry (as the prev entry) and if it is deleted, 750 // moves the iterators backward as needed and returns true, else it returns false. item is the top 751 // item in the heap. 752 func (m *mergingIter) isPrevEntryDeleted(item *mergingIterItem) bool { 753 // Look for a range deletion tombstone containing item.key at higher 754 // levels (level < item.index). If we find such a range tombstone we know 755 // it deletes the key in the current level. Also look for a range 756 // deletion at the current level (level == item.index). If we find such a 757 // range deletion we need to check whether it is newer than the current 758 // entry. 759 for level := 0; level <= item.index; level++ { 760 l := &m.levels[level] 761 if l.rangeDelIter == nil || l.tombstone == nil { 762 // If l.tombstone is nil, there are no further tombstones 763 // in the current sstable in the current (reverse) iteration 764 // direction. 765 continue 766 } 767 if m.heap.cmp(item.key.UserKey, l.tombstone.Start) < 0 { 768 // The current key is before the tombstone start key. 769 // 770 // NB: for the case that this l.rangeDelIter is provided by a levelIter we know that 771 // the levelIter must be positioned at a key < item.key. So it is sufficient to seek the 772 // current l.rangeDelIter (since any range del iterators that will be provided by the 773 // levelIter in the future cannot contain item.key). Also, it is it is possible that we 774 // will encounter parts of the range delete that should be ignored -- we handle that 775 // below. 776 l.tombstone = keyspan.SeekLE(m.heap.cmp, l.rangeDelIter, item.key.UserKey) 777 } 778 if l.tombstone == nil { 779 continue 780 } 781 782 // Reasoning for correctness of untruncated tombstone handling when the untruncated 783 // tombstone is at a higher level: 784 // 785 // The iterator corresponding to this tombstone is still in the heap so it must be 786 // positioned <= item.key. Which means the Smallest key bound of the sstable containing this 787 // tombstone is <= item.key. So the lower limit of this tombstone cannot have been 788 // file-bounds-constrained to > item.key. But it is possible that item.key >= Largest 789 // key bound of this sstable, in which case this tombstone should be ignored. 790 // 791 // Example 1: 792 // sstable bounds [c#8, g#12] containing a tombstone [b, i)#7, and key is f#6. The 793 // largestUserKey is g, so we know the key is within the file bounds and the tombstone 794 // [b, i) covers it. 795 // 796 // Example 2: 797 // Same sstable but the key is g#6. This cannot happen since the [b, i)#7 untruncated 798 // tombstone was involved in a compaction which must have had a file to the right of this 799 // sstable that is part of the same atomic compaction group for future compactions. That 800 // file must have bounds that cover g#6 and this levelIter must be at that file. 801 // 802 // Example 3: 803 // sstable bounds [c#8, g#RangeDelSentinel] containing [b, i)#7 and the key is g#10. 804 // This key is not deleted by this tombstone. We need to look at 805 // isLargestUserKeyRangeDelSentinel. 806 // 807 // For a tombstone at the same level as the key, the file bounds are trivially satisfied. 808 809 // Default to within bounds. 810 withinLargestSSTableBound := true 811 if l.largestUserKey != nil { 812 cmpResult := m.heap.cmp(l.largestUserKey, item.key.UserKey) 813 withinLargestSSTableBound = cmpResult > 0 || (cmpResult == 0 && !l.isLargestUserKeyRangeDelSentinel) 814 } 815 if withinLargestSSTableBound && l.tombstone.Contains(m.heap.cmp, item.key.UserKey) && l.tombstone.VisibleAt(m.snapshot) { 816 if level < item.index { 817 // We could also do m.seekLT(..., level + 1). The levels from 818 // [level + 1, item.index) are already before item.key so seeking them may be 819 // wasteful. 820 821 // We can seek up to the max of smallestUserKey and tombstone.Start.UserKey. 822 // 823 // Using example 1 above, we can seek to the larger of c and b, which is c. 824 // 825 // Progress argument: We know that the iterator in this file is positioned within 826 // its bounds and at a key X < item.key (otherwise it would be the max of the heap). 827 // So smallestUserKey <= item.key.UserKey and we already know that 828 // l.tombstone.Start.UserKey <= item.key.UserKey. So the seekKey computed below 829 // is <= item.key.UserKey, and since we do a seekLT() we will make backwards 830 // progress. 831 seekKey := l.tombstone.Start 832 if l.smallestUserKey != nil && m.heap.cmp(l.smallestUserKey, seekKey) > 0 { 833 seekKey = l.smallestUserKey 834 } 835 // We set the relative-seek flag. This is important when 836 // iterating with lazy combined iteration. If there's a range 837 // key between this level's current file and the file the seek 838 // will land on, we need to detect it in order to trigger 839 // construction of the combined iterator. 840 m.seekLT(seekKey, item.index, base.SeekLTFlagsNone.EnableRelativeSeek()) 841 return true 842 } 843 if l.tombstone.CoversAt(m.snapshot, item.key.SeqNum()) { 844 m.prevEntry(item) 845 return true 846 } 847 } 848 } 849 return false 850 } 851 852 // Starting from the current entry, finds the first (prev) entry that can be returned. 853 func (m *mergingIter) findPrevEntry() (*InternalKey, []byte) { 854 for m.heap.len() > 0 && m.err == nil { 855 item := &m.heap.items[0] 856 if m.levels[item.index].isSyntheticIterBoundsKey { 857 break 858 } 859 m.addItemStats(item) 860 if m.isPrevEntryDeleted(item) { 861 m.stats.PointsCoveredByRangeTombstones++ 862 continue 863 } 864 if item.key.Visible1(m.snapshot) && 865 (!m.levels[item.index].isIgnorableBoundaryKey) && 866 (item.key.Kind() != InternalKeyKindRangeDelete || !m.elideRangeTombstones) { 867 return &item.key, item.value 868 } 869 m.prevEntry(item) 870 } 871 return nil, nil 872 } 873 874 // Seeks levels >= level to >= key. Additionally uses range tombstones to extend the seeks. 875 func (m *mergingIter) seekGE(key []byte, level int, flags base.SeekGEFlags) { 876 // When seeking, we can use tombstones to adjust the key we seek to on each 877 // level. Consider the series of range tombstones: 878 // 879 // 1: a---e 880 // 2: d---h 881 // 3: g---k 882 // 4: j---n 883 // 5: m---q 884 // 885 // If we SeekGE("b") we also find the tombstone "b" resides within in the 886 // first level which is [a,e). Regardless of whether this tombstone deletes 887 // "b" in that level, we know it deletes "b" in all lower levels, so we 888 // adjust the search key in the next level to the tombstone end key "e". We 889 // then SeekGE("e") in the second level and find the corresponding tombstone 890 // [d,h). This process continues and we end up seeking for "h" in the 3rd 891 // level, "k" in the 4th level and "n" in the last level. 892 // 893 // TODO(peter,rangedel): In addition to the above we can delay seeking a 894 // level (and any lower levels) when the current iterator position is 895 // contained within a range tombstone at a higher level. 896 897 for ; level < len(m.levels); level++ { 898 if invariants.Enabled && m.lower != nil && m.heap.cmp(key, m.lower) < 0 { 899 m.logger.Fatalf("mergingIter: lower bound violation: %s < %s\n%s", key, m.lower, debug.Stack()) 900 } 901 902 l := &m.levels[level] 903 if m.prefix != nil { 904 l.iterKey, l.iterValue = l.iter.SeekPrefixGE(m.prefix, key, flags) 905 } else { 906 l.iterKey, l.iterValue = l.iter.SeekGE(key, flags) 907 } 908 909 // If this level contains overlapping range tombstones, alter the seek 910 // key accordingly. Caveat: If we're performing lazy-combined iteration, 911 // we cannot alter the seek key: Range tombstones don't delete range 912 // keys, and there might exist live range keys within the range 913 // tombstone's span that need to be observed to trigger a switch to 914 // combined iteration. 915 if rangeDelIter := l.rangeDelIter; rangeDelIter != nil && 916 (m.combinedIterState == nil || m.combinedIterState.initialized) { 917 // The level has a range-del iterator. Find the tombstone containing 918 // the search key. 919 // 920 // For untruncated tombstones that are possibly file-bounds-constrained, we are using a 921 // levelIter which will set smallestUserKey and largestUserKey. Since the levelIter 922 // is at this file we know that largestUserKey >= key, so we know that the 923 // tombstone we find cannot be file-bounds-constrained in its upper bound to something < key. 924 // We do need to compare with smallestUserKey to ensure that the tombstone is not 925 // file-bounds-constrained in its lower bound. 926 // 927 // See the detailed comments in isNextEntryDeleted() on why similar containment and 928 // seeking logic is correct. The subtle difference here is that key is a user key, 929 // so we can have a sstable with bounds [c#8, i#InternalRangeDelSentinel], and the 930 // tombstone is [b, k)#8 and the seek key is i: levelIter.SeekGE(i) will move past 931 // this sstable since it realizes the largest key is a InternalRangeDelSentinel. 932 l.tombstone = keyspan.SeekGE(m.heap.cmp, rangeDelIter, key) 933 if l.tombstone != nil && l.tombstone.VisibleAt(m.snapshot) && l.tombstone.Contains(m.heap.cmp, key) && 934 (l.smallestUserKey == nil || m.heap.cmp(l.smallestUserKey, key) <= 0) { 935 // NB: Based on the comment above l.largestUserKey >= key, and based on the 936 // containment condition tombstone.End > key, so the assignment to key results 937 // in a monotonically non-decreasing key across iterations of this loop. 938 // 939 // The adjustment of key here can only move it to a larger key. Since 940 // the caller of seekGE guaranteed that the original key was greater 941 // than or equal to m.lower, the new key will continue to be greater 942 // than or equal to m.lower. 943 if l.largestUserKey != nil && 944 m.heap.cmp(l.largestUserKey, l.tombstone.End) < 0 { 945 // Truncate the tombstone for seeking purposes. Note that this can over-truncate 946 // but that is harmless for this seek optimization. 947 key = l.largestUserKey 948 } else { 949 key = l.tombstone.End 950 } 951 } 952 } 953 } 954 955 m.initMinHeap() 956 } 957 958 func (m *mergingIter) String() string { 959 return "merging" 960 } 961 962 // SeekGE implements base.InternalIterator.SeekGE. Note that SeekGE only checks 963 // the upper bound. It is up to the caller to ensure that key is greater than 964 // or equal to the lower bound. 965 func (m *mergingIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, []byte) { 966 m.err = nil 967 m.prefix = nil 968 m.seekGE(key, 0 /* start level */, flags) 969 return m.findNextEntry() 970 } 971 972 // SeekPrefixGE implements base.InternalIterator.SeekPrefixGE. Note that 973 // SeekPrefixGE only checks the upper bound. It is up to the caller to ensure 974 // that key is greater than or equal to the lower bound. 975 func (m *mergingIter) SeekPrefixGE( 976 prefix, key []byte, flags base.SeekGEFlags, 977 ) (*base.InternalKey, []byte) { 978 m.err = nil 979 m.prefix = prefix 980 m.seekGE(key, 0 /* start level */, flags) 981 return m.findNextEntry() 982 } 983 984 // Seeks levels >= level to < key. Additionally uses range tombstones to extend the seeks. 985 func (m *mergingIter) seekLT(key []byte, level int, flags base.SeekLTFlags) { 986 // See the comment in seekGE regarding using tombstones to adjust the seek 987 // target per level. 988 m.prefix = nil 989 for ; level < len(m.levels); level++ { 990 if invariants.Enabled && m.upper != nil && m.heap.cmp(key, m.upper) > 0 { 991 m.logger.Fatalf("mergingIter: upper bound violation: %s > %s\n%s", key, m.upper, debug.Stack()) 992 } 993 994 l := &m.levels[level] 995 l.iterKey, l.iterValue = l.iter.SeekLT(key, flags) 996 997 // If this level contains overlapping range tombstones, alter the seek 998 // key accordingly. Caveat: If we're performing lazy-combined iteration, 999 // we cannot alter the seek key: Range tombstones don't delete range 1000 // keys, and there might exist live range keys within the range 1001 // tombstone's span that need to be observed to trigger a switch to 1002 // combined iteration. 1003 if rangeDelIter := l.rangeDelIter; rangeDelIter != nil && 1004 (m.combinedIterState == nil || m.combinedIterState.initialized) { 1005 // The level has a range-del iterator. Find the tombstone containing 1006 // the search key. 1007 // 1008 // For untruncated tombstones that are possibly file-bounds-constrained we are using a 1009 // levelIter which will set smallestUserKey and largestUserKey. Since the levelIter 1010 // is at this file we know that smallestUserKey <= key, so we know that the 1011 // tombstone we find cannot be file-bounds-constrained in its lower bound to something > key. 1012 // We do need to compare with largestUserKey to ensure that the tombstone is not 1013 // file-bounds-constrained in its upper bound. 1014 // 1015 // See the detailed comments in isPrevEntryDeleted() on why similar containment and 1016 // seeking logic is correct. 1017 1018 // Default to within bounds. 1019 withinLargestSSTableBound := true 1020 if l.largestUserKey != nil { 1021 cmpResult := m.heap.cmp(l.largestUserKey, key) 1022 withinLargestSSTableBound = cmpResult > 0 || (cmpResult == 0 && !l.isLargestUserKeyRangeDelSentinel) 1023 } 1024 1025 l.tombstone = keyspan.SeekLE(m.heap.cmp, rangeDelIter, key) 1026 if l.tombstone != nil && l.tombstone.VisibleAt(m.snapshot) && 1027 l.tombstone.Contains(m.heap.cmp, key) && withinLargestSSTableBound { 1028 // NB: Based on the comment above l.smallestUserKey <= key, and based 1029 // on the containment condition tombstone.Start.UserKey <= key, so the 1030 // assignment to key results in a monotonically non-increasing key 1031 // across iterations of this loop. 1032 // 1033 // The adjustment of key here can only move it to a smaller key. Since 1034 // the caller of seekLT guaranteed that the original key was less than 1035 // or equal to m.upper, the new key will continue to be less than or 1036 // equal to m.upper. 1037 if l.smallestUserKey != nil && 1038 m.heap.cmp(l.smallestUserKey, l.tombstone.Start) >= 0 { 1039 // Truncate the tombstone for seeking purposes. Note that this can over-truncate 1040 // but that is harmless for this seek optimization. 1041 key = l.smallestUserKey 1042 } else { 1043 key = l.tombstone.Start 1044 } 1045 } 1046 } 1047 } 1048 1049 m.initMaxHeap() 1050 } 1051 1052 // SeekLT implements base.InternalIterator.SeekLT. Note that SeekLT only checks 1053 // the lower bound. It is up to the caller to ensure that key is less than the 1054 // upper bound. 1055 func (m *mergingIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, []byte) { 1056 m.err = nil 1057 m.prefix = nil 1058 m.seekLT(key, 0 /* start level */, flags) 1059 return m.findPrevEntry() 1060 } 1061 1062 // First implements base.InternalIterator.First. Note that First only checks 1063 // the upper bound. It is up to the caller to ensure that key is greater than 1064 // or equal to the lower bound (e.g. via a call to SeekGE(lower)). 1065 func (m *mergingIter) First() (*InternalKey, []byte) { 1066 m.err = nil 1067 m.prefix = nil 1068 m.heap.items = m.heap.items[:0] 1069 for i := range m.levels { 1070 l := &m.levels[i] 1071 l.iterKey, l.iterValue = l.iter.First() 1072 } 1073 m.initMinHeap() 1074 return m.findNextEntry() 1075 } 1076 1077 // Last implements base.InternalIterator.Last. Note that Last only checks the 1078 // lower bound. It is up to the caller to ensure that key is less than the 1079 // upper bound (e.g. via a call to SeekLT(upper)) 1080 func (m *mergingIter) Last() (*InternalKey, []byte) { 1081 m.err = nil 1082 m.prefix = nil 1083 for i := range m.levels { 1084 l := &m.levels[i] 1085 l.iterKey, l.iterValue = l.iter.Last() 1086 } 1087 m.initMaxHeap() 1088 return m.findPrevEntry() 1089 } 1090 1091 func (m *mergingIter) Next() (*InternalKey, []byte) { 1092 if m.err != nil { 1093 return nil, nil 1094 } 1095 1096 if m.dir != 1 { 1097 m.switchToMinHeap() 1098 return m.findNextEntry() 1099 } 1100 1101 if m.heap.len() == 0 { 1102 return nil, nil 1103 } 1104 1105 m.nextEntry(&m.heap.items[0]) 1106 return m.findNextEntry() 1107 } 1108 1109 func (m *mergingIter) Prev() (*InternalKey, []byte) { 1110 if m.err != nil { 1111 return nil, nil 1112 } 1113 1114 if m.dir != -1 { 1115 if m.prefix != nil { 1116 m.err = errors.New("bitalostable: unsupported reverse prefix iteration") 1117 return nil, nil 1118 } 1119 m.switchToMaxHeap() 1120 return m.findPrevEntry() 1121 } 1122 1123 if m.heap.len() == 0 { 1124 return nil, nil 1125 } 1126 1127 m.prevEntry(&m.heap.items[0]) 1128 return m.findPrevEntry() 1129 } 1130 1131 func (m *mergingIter) Error() error { 1132 if m.heap.len() == 0 || m.err != nil { 1133 return m.err 1134 } 1135 return m.levels[m.heap.items[0].index].iter.Error() 1136 } 1137 1138 func (m *mergingIter) Close() error { 1139 for i := range m.levels { 1140 iter := m.levels[i].iter 1141 if err := iter.Close(); err != nil && m.err == nil { 1142 m.err = err 1143 } 1144 if rangeDelIter := m.levels[i].rangeDelIter; rangeDelIter != nil { 1145 if err := rangeDelIter.Close(); err != nil && m.err == nil { 1146 m.err = err 1147 } 1148 } 1149 } 1150 m.levels = nil 1151 m.heap.items = m.heap.items[:0] 1152 return m.err 1153 } 1154 1155 func (m *mergingIter) SetBounds(lower, upper []byte) { 1156 m.prefix = nil 1157 m.lower = lower 1158 m.upper = upper 1159 for i := range m.levels { 1160 m.levels[i].iter.SetBounds(lower, upper) 1161 } 1162 m.heap.clear() 1163 } 1164 1165 func (m *mergingIter) DebugString() string { 1166 var buf bytes.Buffer 1167 sep := "" 1168 for m.heap.len() > 0 { 1169 item := m.heap.pop() 1170 fmt.Fprintf(&buf, "%s%s", sep, item.key) 1171 sep = " " 1172 } 1173 if m.dir == 1 { 1174 m.initMinHeap() 1175 } else { 1176 m.initMaxHeap() 1177 } 1178 return buf.String() 1179 } 1180 1181 func (m *mergingIter) ForEachLevelIter(fn func(li *levelIter) bool) { 1182 for _, iter := range m.levels { 1183 if li, ok := iter.iter.(*levelIter); ok { 1184 if done := fn(li); done { 1185 break 1186 } 1187 } 1188 } 1189 } 1190 1191 func (m *mergingIter) addItemStats(item *mergingIterItem) { 1192 m.stats.PointCount++ 1193 m.stats.KeyBytes += uint64(len(item.key.UserKey)) 1194 m.stats.ValueBytes += uint64(len(item.value)) 1195 } 1196 1197 var _ internalIterator = &mergingIter{}