github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/range_keys.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "github.com/zuoyebang/bitalostable/internal/base" 9 "github.com/zuoyebang/bitalostable/internal/invariants" 10 "github.com/zuoyebang/bitalostable/internal/keyspan" 11 "github.com/zuoyebang/bitalostable/internal/manifest" 12 "github.com/zuoyebang/bitalostable/sstable" 13 ) 14 15 // constructRangeKeyIter constructs the range-key iterator stack, populating 16 // i.rangeKey.rangeKeyIter with the resulting iterator. 17 func (i *Iterator) constructRangeKeyIter() { 18 i.rangeKey.rangeKeyIter = i.rangeKey.iterConfig.Init( 19 &i.comparer, i.seqNum, i.opts.LowerBound, i.opts.UpperBound, 20 &i.hasPrefix, &i.prefixOrFullSeekKey) 21 22 // If there's an indexed batch with range keys, include it. 23 if i.batch != nil { 24 if i.batch.index == nil { 25 i.rangeKey.iterConfig.AddLevel(newErrorKeyspanIter(ErrNotIndexed)) 26 } else { 27 // Only include the batch's range key iterator if it has any keys. 28 // NB: This can force reconstruction of the rangekey iterator stack 29 // in SetOptions if subsequently range keys are added. See 30 // SetOptions. 31 if i.batch.countRangeKeys > 0 { 32 i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, i.batchSeqNum) 33 i.rangeKey.iterConfig.AddLevel(&i.batchRangeKeyIter) 34 } 35 } 36 } 37 38 // Next are the flushables: memtables and large batches. 39 for j := len(i.readState.memtables) - 1; j >= 0; j-- { 40 mem := i.readState.memtables[j] 41 // We only need to read from memtables which contain sequence numbers older 42 // than seqNum. 43 if logSeqNum := mem.logSeqNum; logSeqNum >= i.seqNum { 44 continue 45 } 46 if rki := mem.newRangeKeyIter(&i.opts); rki != nil { 47 i.rangeKey.iterConfig.AddLevel(rki) 48 } 49 } 50 51 current := i.readState.current 52 // Next are the file levels: L0 sub-levels followed by lower levels. 53 // 54 // Add file-specific iterators for L0 files containing range keys. This is less 55 // efficient than using levelIters for sublevels of L0 files containing 56 // range keys, but range keys are expected to be sparse anyway, reducing the 57 // cost benefit of maintaining a separate L0Sublevels instance for range key 58 // files and then using it here. 59 // 60 // NB: We iterate L0's files in reverse order. They're sorted by 61 // LargestSeqNum ascending, and we need to add them to the merging iterator 62 // in LargestSeqNum descending to preserve the merging iterator's invariants 63 // around Key Trailer order. 64 iter := current.RangeKeyLevels[0].Iter() 65 for f := iter.Last(); f != nil; f = iter.Prev() { 66 spanIterOpts := &keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} 67 spanIter, err := i.newIterRangeKey(f, spanIterOpts) 68 if err != nil { 69 i.rangeKey.iterConfig.AddLevel(&errorKeyspanIter{err: err}) 70 continue 71 } 72 i.rangeKey.iterConfig.AddLevel(spanIter) 73 } 74 75 // Add level iterators for the non-empty non-L0 levels. 76 for level := 1; level < len(current.RangeKeyLevels); level++ { 77 if current.RangeKeyLevels[level].Empty() { 78 continue 79 } 80 li := i.rangeKey.iterConfig.NewLevelIter() 81 spanIterOpts := keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters} 82 li.Init(spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), 83 manifest.Level(level), i.opts.logger, manifest.KeyTypeRange) 84 i.rangeKey.iterConfig.AddLevel(li) 85 } 86 } 87 88 // Range key masking 89 // 90 // Pebble iterators may be configured such that range keys with suffixes mask 91 // point keys with lower suffixes. The intended use is implementing a MVCC 92 // delete range operation using range keys, when suffixes are MVCC timestamps. 93 // 94 // To enable masking, the user populates the IterOptions's RangeKeyMasking 95 // field. The Suffix field configures which range keys act as masks. The 96 // intended use is to hold a MVCC read timestamp. When implementing a MVCC 97 // delete range operation, only range keys that are visible at the read 98 // timestamp should be visible. If a range key has a suffix ≤ 99 // RangeKeyMasking.Suffix, it acts as a mask. 100 // 101 // Range key masking is facilitated by the keyspan.InterleavingIter. The 102 // interleaving iterator interleaves range keys and point keys during combined 103 // iteration. During user iteration, the interleaving iterator is configured 104 // with a keyspan.SpanMask, implemented by the rangeKeyMasking struct below. 105 // The SpanMask interface defines two methods: SpanChanged and SkipPoint. 106 // 107 // SpanChanged is used to keep the current mask up-to-date. Whenever the point 108 // iterator has stepped into or out of the bounds of a range key, the 109 // interleaving iterator invokes SpanChanged passing the current covering range 110 // key. The below rangeKeyMasking implementation scans the range keys looking 111 // for the range key with the largest suffix that's still ≤ the suffix supplied 112 // to IterOptions.RangeKeyMasking.Suffix (the "read timestamp"). If it finds a 113 // range key that meets the condition, the range key should act as a mask. The 114 // span and the relevant range key's suffix are saved. 115 // 116 // The above ensures that `rangeKeyMasking.maskActiveSuffix` always contains the 117 // current masking suffix such that any point keys with lower suffixes should be 118 // skipped. 119 // 120 // There are two ways in which masked point keys are skipped. 121 // 122 // 1. Interleaving iterator SkipPoint 123 // 124 // Whenever the interleaving iterator encounters a point key that falls within 125 // the bounds of a range key, it invokes SkipPoint. The interleaving iterator 126 // guarantees that the SpanChanged method described above has already been 127 // invoked with the covering range key. The below rangeKeyMasking implementation 128 // of SkipPoint splits the key into prefix and suffix, compares the suffix to 129 // the `maskActiveSuffix` updated by SpanChanged and returns true if 130 // suffix(point) < maskActiveSuffix. 131 // 132 // The SkipPoint logic is sufficient to ensure that the Pebble iterator filters 133 // out all masked point keys. However, it requires the iterator read each masked 134 // point key. For broad range keys that mask many points, this may be expensive. 135 // 136 // 2. Block property filter 137 // 138 // For more efficient handling of braad range keys that mask many points, the 139 // IterOptions.RangeKeyMasking field has an optional Filter option. This Filter 140 // field takes a superset of the block-property filter interface, adding a 141 // method to dynamically configure the filter's filtering criteria. 142 // 143 // To make use of the Filter option, the user is required to define and 144 // configure a block-property collector that collects a property containing at 145 // least the maximum suffix of a key within a block. 146 // 147 // When the SpanChanged method described above is invoked, rangeKeyMasking also 148 // reconfigures the user-provided filter. It invokes a SetSuffix method, 149 // providing the `maskActiveSuffix`, requesting that from now on the 150 // block-property filter return Intersects()=false for any properties indicating 151 // that a block contains exclusively keys with suffixes greater than the 152 // provided suffix. 153 // 154 // Note that unlike other block-property filters, the filter used for masking 155 // must not apply across the entire keyspace. It must only filter blocks that 156 // lie within the bounds of the range key that set the mask suffix. To 157 // accommodate this, rangeKeyMasking implements a special interface: 158 // sstable.BoundLimitedBlockPropertyFilter. This interface extends the block 159 // property filter interface with two new methods: KeyIsWithinLowerBound and 160 // KeyIsWithinUpperBound. The rangeKeyMasking type wraps the user-provided block 161 // property filter, implementing these two methods and overriding Intersects to 162 // always return true if there is no active mask. 163 // 164 // The logic to ensure that a mask block-property filter is only applied within 165 // the bounds of the masking range key is subtle. The interleaving iterator 166 // guarantees that it never invokes SpanChanged until the point iterator is 167 // positioned within the range key. During forward iteration, this guarantees 168 // that any block that a sstable reader might attempt to load contains only keys 169 // greater than or equal to the range key's lower bound. During backward 170 // iteration, it provides the analagous guarantee on the range key's upper 171 // bound. 172 // 173 // The above ensures that an sstable reader only needs to verify that a block 174 // that it skips meets the opposite bound. This is where the 175 // KeyIsWithinLowerBound and KeyIsWithinUpperBound methods are used. When an 176 // sstable iterator is configured with a BoundLimitedBlockPropertyFilter, it 177 // checks for intersection with the block-property filter before every block 178 // load, like ordinary block-property filters. However, if the bound-limited 179 // block property filter indicates that it does NOT intersect, the filter's 180 // relevant KeyIsWithin{Lower,Upper}Bound method is queried, using a block 181 // index separator as the bound. If the method indicates that the provided index 182 // separator does not fall within the range key bounds, the no-intersection 183 // result is ignored, and the block is read. 184 185 type rangeKeyMasking struct { 186 cmp base.Compare 187 split base.Split 188 filter BlockPropertyFilterMask 189 // maskActiveSuffix holds the suffix of a range key currently acting as a 190 // mask, hiding point keys with suffixes greater than it. maskActiveSuffix 191 // is only ever non-nil if IterOptions.RangeKeyMasking.Suffix is non-nil. 192 // maskActiveSuffix is updated whenever the iterator passes over a new range 193 // key. The maskActiveSuffix should only be used if maskSpan is non-nil. 194 // 195 // See SpanChanged. 196 maskActiveSuffix []byte 197 // maskSpan holds the span from which the active mask suffix was extracted. 198 // The span is used for bounds comparisons, to ensure that a range-key mask 199 // is not applied beyond the bounds of the range key. 200 maskSpan *keyspan.Span 201 parent *Iterator 202 } 203 204 func (m *rangeKeyMasking) init(parent *Iterator, cmp base.Compare, split base.Split) { 205 m.cmp = cmp 206 m.split = split 207 if parent.opts.RangeKeyMasking.Filter != nil { 208 m.filter = parent.opts.RangeKeyMasking.Filter() 209 } 210 m.parent = parent 211 } 212 213 // SpanChanged implements the keyspan.SpanMask interface, used during range key 214 // iteration. 215 func (m *rangeKeyMasking) SpanChanged(s *keyspan.Span) { 216 if s == nil && m.maskSpan == nil { 217 return 218 } 219 m.maskSpan = nil 220 m.maskActiveSuffix = m.maskActiveSuffix[:0] 221 222 // Find the smallest suffix of a range key contained within the Span, 223 // excluding suffixes less than m.opts.RangeKeyMasking.Suffix. 224 if s != nil { 225 m.parent.rangeKey.stale = true 226 if m.parent.opts.RangeKeyMasking.Suffix != nil { 227 for j := range s.Keys { 228 if s.Keys[j].Suffix == nil { 229 continue 230 } 231 if m.cmp(s.Keys[j].Suffix, m.parent.opts.RangeKeyMasking.Suffix) < 0 { 232 continue 233 } 234 if len(m.maskActiveSuffix) == 0 || m.cmp(m.maskActiveSuffix, s.Keys[j].Suffix) > 0 { 235 m.maskSpan = s 236 m.maskActiveSuffix = append(m.maskActiveSuffix[:0], s.Keys[j].Suffix...) 237 } 238 } 239 } 240 } 241 242 if m.maskSpan != nil && m.parent.opts.RangeKeyMasking.Filter != nil { 243 // Update the block-property filter to filter point keys with suffixes 244 // greater than m.maskActiveSuffix. 245 err := m.filter.SetSuffix(m.maskActiveSuffix) 246 if err != nil { 247 m.parent.err = err 248 } 249 } 250 // If no span is active, we leave the inner block-property filter configured 251 // with its existing suffix. That's okay, because Intersects calls are first 252 // evaluated by iteratorRangeKeyState.Intersects, which considers all blocks 253 // as intersecting if there's no active mask. 254 } 255 256 // SkipPoint implements the keyspan.SpanMask interface, used during range key 257 // iteration. Whenever a point key is covered by a non-empty Span, the 258 // interleaving iterator invokes SkipPoint. This function is responsible for 259 // performing range key masking. 260 // 261 // If a non-nil IterOptions.RangeKeyMasking.Suffix is set, range key masking is 262 // enabled. Masking hides point keys, transparently skipping over the keys. 263 // Whether or not a point key is masked is determined by comparing the point 264 // key's suffix, the overlapping span's keys' suffixes, and the user-configured 265 // IterOption's RangeKeyMasking.Suffix. When configured with a masking threshold 266 // _t_, and there exists a span with suffix _r_ covering a point key with suffix 267 // _p_, and 268 // 269 // _t_ ≤ _r_ < _p_ 270 // 271 // then the point key is elided. Consider the following rendering, where using 272 // integer suffixes with higher integers sort before suffixes with lower 273 // integers, (for example @7 ≤ @6 < @5): 274 // 275 // ^ 276 // @9 | •―――――――――――――――○ [e,m)@9 277 // s 8 | • l@8 278 // u 7 |------------------------------------ @7 RangeKeyMasking.Suffix 279 // f 6 | [h,q)@6 •―――――――――――――――――○ (threshold) 280 // f 5 | • h@5 281 // f 4 | • n@4 282 // i 3 | •―――――――――――○ [f,l)@3 283 // x 2 | • b@2 284 // 1 | 285 // 0 |___________________________________ 286 // a b c d e f g h i j k l m n o p q 287 // 288 // An iterator scanning the entire keyspace with the masking threshold set to @7 289 // will observe point keys b@2 and l@8. The span keys [h,q)@6 and [f,l)@3 serve 290 // as masks, because cmp(@6,@7) ≥ 0 and cmp(@3,@7) ≥ 0. The span key [e,m)@9 291 // does not serve as a mask, because cmp(@9,@7) < 0. 292 // 293 // Although point l@8 falls within the user key bounds of [e,m)@9, [e,m)@9 is 294 // non-masking due to its suffix. The point key l@8 also falls within the user 295 // key bounds of [h,q)@6, but since cmp(@6,@8) ≥ 0, l@8 is unmasked. 296 // 297 // Invariant: The userKey is within the user key bounds of the span most 298 // recently provided to `SpanChanged`. 299 func (m *rangeKeyMasking) SkipPoint(userKey []byte) bool { 300 if m.maskSpan == nil { 301 // No range key is currently acting as a mask, so don't skip. 302 return false 303 } 304 // Range key masking is enabled and the current span includes a range key 305 // that is being used as a mask. (NB: SpanChanged already verified that the 306 // range key's suffix is ≥ RangeKeyMasking.Suffix). 307 // 308 // This point key falls within the bounds of the range key (guaranteed by 309 // the InterleavingIter). Skip the point key if the range key's suffix is 310 // greater than the point key's suffix. 311 pointSuffix := userKey[m.split(userKey):] 312 return len(pointSuffix) > 0 && m.cmp(m.maskActiveSuffix, pointSuffix) < 0 313 } 314 315 // The iteratorRangeKeyState type implements the sstable package's 316 // BoundLimitedBlockPropertyFilter interface in order to use block property 317 // filters for range key masking. The iteratorRangeKeyState implementation wraps 318 // the block-property filter provided in Options.RangeKeyMasking.Filter. 319 // 320 // Using a block-property filter for range-key masking requires limiting the 321 // filter's effect to the bounds of the range key currently acting as a mask. 322 // Consider the range key [a,m)@10, and an iterator positioned just before the 323 // below block, bounded by index separators `c` and `z`: 324 // 325 // c z 326 // x | c@9 c@5 c@1 d@7 e@4 y@4 | ... 327 // iter pos 328 // 329 // The next block cannot be skipped, despite the range key suffix @10 is greater 330 // than all the block's keys' suffixes, because it contains a key (y@4) outside 331 // the bounds of the range key. 332 // 333 // This extended BoundLimitedBlockPropertyFilter interface adds two new methods, 334 // KeyIsWithinLowerBound and KeyIsWithinUpperBound, for testing whether a 335 // particular block is within bounds. 336 // 337 // The iteratorRangeKeyState implements these new methods by first checking if 338 // the iterator is currently positioned within a range key. If not, the provided 339 // key is considered out-of-bounds. If the iterator is positioned within a range 340 // key, it compares the corresponding range key bound. 341 var _ sstable.BoundLimitedBlockPropertyFilter = (*rangeKeyMasking)(nil) 342 343 // Name implements the limitedBlockPropertyFilter interface defined in the 344 // sstable package by passing through to the user-defined block property filter. 345 func (m *rangeKeyMasking) Name() string { 346 return m.filter.Name() 347 } 348 349 // Intersects implements the limitedBlockPropertyFilter interface defined in the 350 // sstable package by passing the intersection decision to the user-provided 351 // block property filter only if a range key is covering the current iterator 352 // position. 353 func (m *rangeKeyMasking) Intersects(prop []byte) (bool, error) { 354 if m.maskSpan == nil { 355 // No span is actively masking. 356 return true, nil 357 } 358 return m.filter.Intersects(prop) 359 } 360 361 // KeyIsWithinLowerBound implements the limitedBlockPropertyFilter interface 362 // defined in the sstable package. It's used to restrict the masking block 363 // property filter to only applying within the bounds of the active range key. 364 func (m *rangeKeyMasking) KeyIsWithinLowerBound(ik *InternalKey) bool { 365 // Invariant: m.maskSpan != nil 366 // 367 // The provided `ik` is an inclusive lower bound of the block we're 368 // considering skipping. 369 return m.cmp(m.maskSpan.Start, ik.UserKey) <= 0 370 } 371 372 // KeyIsWithinUpperBound implements the limitedBlockPropertyFilter interface 373 // defined in the sstable package. It's used to restrict the masking block 374 // property filter to only applying within the bounds of the active range key. 375 func (m *rangeKeyMasking) KeyIsWithinUpperBound(ik *InternalKey) bool { 376 // Invariant: m.maskSpan != nil 377 // 378 // The provided `ik` is an *inclusive* upper bound of the block we're 379 // considering skipping, so the range key's end must be strictly greater 380 // than the block bound for the block to be within bounds. 381 return m.cmp(m.maskSpan.End, ik.UserKey) > 0 382 } 383 384 // lazyCombinedIter implements the internalIterator interface, wrapping a 385 // pointIter. It requires the pointIter's the levelIters be configured with 386 // pointers to its combinedIterState. When the levelIter observes a file 387 // containing a range key, the lazyCombinedIter constructs the combined 388 // range+point key iterator stack and switches to it. 389 type lazyCombinedIter struct { 390 // parent holds a pointer to the root *bitalostable.Iterator containing this 391 // iterator. It's used to mutate the internalIterator in use when switching 392 // to combined iteration. 393 parent *Iterator 394 pointIter internalIterator 395 combinedIterState combinedIterState 396 } 397 398 // combinedIterState encapsulates the current state of combined iteration. 399 // Various low-level iterators (mergingIter, leveliter) hold pointers to the 400 // *bitalostable.Iterator's combinedIterState. This allows them to check whether or 401 // not they must monitor for files containing range keys (!initialized), or not. 402 // 403 // When !initialized, low-level iterators watch for files containing range keys. 404 // When one is discovered, they set triggered=true and key to the smallest 405 // (forward direction) or largest (reverse direction) range key that's been 406 // observed. 407 type combinedIterState struct { 408 // key holds the smallest (forward direction) or largest (backward 409 // direction) user key from a range key bound discovered during the iterator 410 // operation that triggered the switch to combined iteration. 411 // 412 // Slices stored here must be stable. This is possible because callers pass 413 // a Smallest/Largest bound from a fileMetadata, which are immutable. A key 414 // slice's bytes must not be overwritten. 415 key []byte 416 triggered bool 417 initialized bool 418 } 419 420 // Assert that *lazyCombinedIter implements internalIterator. 421 var _ internalIterator = (*lazyCombinedIter)(nil) 422 423 // initCombinedIteration is invoked after a pointIter positioning operation 424 // resulted in i.combinedIterState.triggered=true. 425 // 426 // The `dir` parameter is `+1` or `-1` indicating forward iteration or backward 427 // iteration respectively. 428 // 429 // The `pointKey` and `pointValue` parameters provide the new point key-value 430 // pair that the iterator was just positioned to. The combined iterator should 431 // be seeded with this point key-value pair and return the smaller (forward 432 // iteration) or largest (backward iteration) of the two. 433 // 434 // The `seekKey` parameter is non-nil only if the iterator operation that 435 // triggered the switch to combined iteration was a SeekGE, SeekPrefixGE or 436 // SeekLT. It provides the seek key supplied and is used to seek the range-key 437 // iterator using the same key. This is necessary for SeekGE/SeekPrefixGE 438 // operations that land in the middle of a range key and must truncate to the 439 // user-provided seek key. 440 func (i *lazyCombinedIter) initCombinedIteration( 441 dir int8, pointKey *InternalKey, pointValue []byte, seekKey []byte, 442 ) (*InternalKey, []byte) { 443 // Invariant: i.parent.rangeKey is nil. 444 // Invariant: !i.combinedIterState.initialized. 445 if invariants.Enabled { 446 if i.combinedIterState.initialized { 447 panic("bitalostable: combined iterator already initialized") 448 } 449 if i.parent.rangeKey != nil { 450 panic("bitalostable: iterator already has a range-key iterator stack") 451 } 452 } 453 454 // We need to determine the key to seek the range key iterator to. If 455 // seekKey is not nil, the user-initiated operation that triggered the 456 // switch to combined iteration was itself a seek, and we can use that key. 457 // Otherwise, a First/Last or relative positioning operation triggered the 458 // switch to combined iteration. 459 // 460 // The levelIter that observed a file containing range keys populated 461 // combinedIterState.key with the smallest (forward) or largest (backward) 462 // range key it observed. If multiple levelIters observed files with range 463 // keys during the same operation on the mergingIter, combinedIterState.key 464 // is the smallest [during forward iteration; largest in reverse iteration] 465 // such key. 466 if seekKey == nil { 467 // Use the levelIter-populated key. 468 seekKey = i.combinedIterState.key 469 470 // We may need to adjust the levelIter-populated seek key to the 471 // surfaced point key. If the key observed is beyond [in the iteration 472 // direction] the current point key, there may still exist a range key 473 // at an earlier key. Consider the following example: 474 // 475 // L5: 000003:[bar.DEL.5, foo.RANGEKEYSET.9] 476 // L6: 000001:[bar.SET.2] 000002:[bax.RANGEKEYSET.8] 477 // 478 // A call to First() seeks the levels to files L5.000003 and L6.000001. 479 // The L5 levelIter observes that L5.000003 contains the range key with 480 // start key `foo`, and triggers a switch to combined iteration, setting 481 // `combinedIterState.key` = `foo`. 482 // 483 // The L6 levelIter did not observe the true first range key 484 // (bax.RANGEKEYSET.8), because it appears in a later sstable. When the 485 // combined iterator is initialized, the range key iterator must be 486 // seeked to a key that will find `bax`. To accomplish this, we seek the 487 // key instead to `bar`. It is guaranteed that no range key exists 488 // earlier than `bar`, otherwise a levelIter would've observed it and 489 // set `combinedIterState.key` to its start key. 490 if pointKey != nil { 491 if dir == +1 && i.parent.cmp(i.combinedIterState.key, pointKey.UserKey) > 0 { 492 seekKey = pointKey.UserKey 493 } else if dir == -1 && i.parent.cmp(seekKey, pointKey.UserKey) < 0 { 494 seekKey = pointKey.UserKey 495 } 496 } 497 } 498 499 if i.parent.hasPrefix { 500 si := i.parent.comparer.Split(seekKey) 501 if i.parent.cmp(seekKey[:si], i.parent.prefixOrFullSeekKey) > 0 { 502 // The earliest possible range key has a start key with a prefix 503 // greater than the current iteration prefix. There's no need to 504 // switch to combined iteration, because there are not any range 505 // keys within the bounds of the prefix. Additionally, using a seek 506 // key that is outside the scope of the prefix can violate 507 // invariants within the range key iterator stack. Optimizations 508 // that exit early due to exhausting the prefix may result in 509 // `seekKey` being larger than the next range key's start key. 510 // 511 // See the testdata/rangekeys test case associated with #1893. 512 i.combinedIterState = combinedIterState{initialized: false} 513 return pointKey, pointValue 514 } 515 } 516 517 // An operation on the point iterator observed a file containing range keys, 518 // so we must switch to combined interleaving iteration. First, construct 519 // the range key iterator stack. It must not exist, otherwise we'd already 520 // be performing combined iteration. 521 i.parent.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState) 522 i.parent.rangeKey.init(i.parent.comparer.Compare, i.parent.comparer.Split, &i.parent.opts) 523 i.parent.constructRangeKeyIter() 524 525 // Initialize the Iterator's interleaving iterator. 526 i.parent.rangeKey.iiter.Init( 527 &i.parent.comparer, i.parent.pointIter, i.parent.rangeKey.rangeKeyIter, 528 &i.parent.rangeKeyMasking, i.parent.opts.LowerBound, i.parent.opts.UpperBound) 529 530 // Set the parent's primary iterator to point to the combined, interleaving 531 // iterator that's now initialized with our current state. 532 i.parent.iter = &i.parent.rangeKey.iiter 533 i.combinedIterState.initialized = true 534 i.combinedIterState.key = nil 535 536 // All future iterator operations will go directly through the combined 537 // iterator. 538 // 539 // Initialize the interleaving iterator. We pass the point key-value pair so 540 // that the interleaving iterator knows where the point iterator is 541 // positioned. Additionally, we pass the seek key to which the range-key 542 // iterator should be seeked in order to initialize its position. 543 // 544 // In the forward direction (invert for backwards), the seek key is a key 545 // guaranteed to find the smallest range key that's greater than the last 546 // key the iterator returned. The range key may be less than pointKey, in 547 // which case the range key will be interleaved next instead of the point 548 // key. 549 if dir == +1 { 550 var prefix []byte 551 if i.parent.hasPrefix { 552 prefix = i.parent.prefixOrFullSeekKey 553 } 554 return i.parent.rangeKey.iiter.InitSeekGE(prefix, seekKey, pointKey, pointValue) 555 } 556 return i.parent.rangeKey.iiter.InitSeekLT(seekKey, pointKey, pointValue) 557 } 558 559 func (i *lazyCombinedIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, []byte) { 560 if i.combinedIterState.initialized { 561 return i.parent.rangeKey.iiter.SeekGE(key, flags) 562 } 563 k, v := i.pointIter.SeekGE(key, flags) 564 if i.combinedIterState.triggered { 565 return i.initCombinedIteration(+1, k, v, key) 566 } 567 return k, v 568 } 569 570 func (i *lazyCombinedIter) SeekPrefixGE( 571 prefix, key []byte, flags base.SeekGEFlags, 572 ) (*InternalKey, []byte) { 573 if i.combinedIterState.initialized { 574 return i.parent.rangeKey.iiter.SeekPrefixGE(prefix, key, flags) 575 } 576 k, v := i.pointIter.SeekPrefixGE(prefix, key, flags) 577 if i.combinedIterState.triggered { 578 return i.initCombinedIteration(+1, k, v, key) 579 } 580 return k, v 581 } 582 583 func (i *lazyCombinedIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, []byte) { 584 if i.combinedIterState.initialized { 585 return i.parent.rangeKey.iiter.SeekLT(key, flags) 586 } 587 k, v := i.pointIter.SeekLT(key, flags) 588 if i.combinedIterState.triggered { 589 return i.initCombinedIteration(-1, k, v, key) 590 } 591 return k, v 592 } 593 594 func (i *lazyCombinedIter) First() (*InternalKey, []byte) { 595 if i.combinedIterState.initialized { 596 return i.parent.rangeKey.iiter.First() 597 } 598 k, v := i.pointIter.First() 599 if i.combinedIterState.triggered { 600 return i.initCombinedIteration(+1, k, v, nil) 601 } 602 return k, v 603 } 604 605 func (i *lazyCombinedIter) Last() (*InternalKey, []byte) { 606 if i.combinedIterState.initialized { 607 return i.parent.rangeKey.iiter.Last() 608 } 609 k, v := i.pointIter.Last() 610 if i.combinedIterState.triggered { 611 return i.initCombinedIteration(-1, k, v, nil) 612 } 613 return k, v 614 } 615 616 func (i *lazyCombinedIter) Next() (*InternalKey, []byte) { 617 if i.combinedIterState.initialized { 618 return i.parent.rangeKey.iiter.Next() 619 } 620 k, v := i.pointIter.Next() 621 if i.combinedIterState.triggered { 622 return i.initCombinedIteration(+1, k, v, nil) 623 } 624 return k, v 625 } 626 627 func (i *lazyCombinedIter) Prev() (*InternalKey, []byte) { 628 if i.combinedIterState.initialized { 629 return i.parent.rangeKey.iiter.Prev() 630 } 631 k, v := i.pointIter.Prev() 632 if i.combinedIterState.triggered { 633 return i.initCombinedIteration(-1, k, v, nil) 634 } 635 return k, v 636 } 637 638 func (i *lazyCombinedIter) Error() error { 639 if i.combinedIterState.initialized { 640 return i.parent.rangeKey.iiter.Error() 641 } 642 return i.pointIter.Error() 643 } 644 645 func (i *lazyCombinedIter) Close() error { 646 if i.combinedIterState.initialized { 647 return i.parent.rangeKey.iiter.Close() 648 } 649 return i.pointIter.Close() 650 } 651 652 func (i *lazyCombinedIter) SetBounds(lower, upper []byte) { 653 if i.combinedIterState.initialized { 654 i.parent.rangeKey.iiter.SetBounds(lower, upper) 655 return 656 } 657 i.pointIter.SetBounds(lower, upper) 658 } 659 660 func (i *lazyCombinedIter) String() string { 661 if i.combinedIterState.initialized { 662 return i.parent.rangeKey.iiter.String() 663 } 664 return i.pointIter.String() 665 }