github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/range_keys.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "context" 9 10 "github.com/cockroachdb/errors" 11 "github.com/cockroachdb/pebble/internal/base" 12 "github.com/cockroachdb/pebble/internal/invariants" 13 "github.com/cockroachdb/pebble/internal/keyspan" 14 "github.com/cockroachdb/pebble/internal/manifest" 15 "github.com/cockroachdb/pebble/sstable" 16 ) 17 18 // constructRangeKeyIter constructs the range-key iterator stack, populating 19 // i.rangeKey.rangeKeyIter with the resulting iterator. 20 func (i *Iterator) constructRangeKeyIter() { 21 i.rangeKey.rangeKeyIter = i.rangeKey.iterConfig.Init( 22 &i.comparer, i.seqNum, i.opts.LowerBound, i.opts.UpperBound, 23 &i.hasPrefix, &i.prefixOrFullSeekKey, false /* internalKeys */, &i.rangeKey.rangeKeyBuffers.internal) 24 25 // If there's an indexed batch with range keys, include it. 26 if i.batch != nil { 27 if i.batch.index == nil { 28 // This isn't an indexed batch. We shouldn't have gotten this far. 29 panic(errors.AssertionFailedf("creating an iterator over an unindexed batch")) 30 } else { 31 // Only include the batch's range key iterator if it has any keys. 32 // NB: This can force reconstruction of the rangekey iterator stack 33 // in SetOptions if subsequently range keys are added. See 34 // SetOptions. 35 if i.batch.countRangeKeys > 0 { 36 i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, i.batchSeqNum) 37 i.rangeKey.iterConfig.AddLevel(&i.batchRangeKeyIter) 38 } 39 } 40 } 41 42 if !i.batchOnlyIter { 43 // Next are the flushables: memtables and large batches. 44 if i.readState != nil { 45 for j := len(i.readState.memtables) - 1; j >= 0; j-- { 46 mem := i.readState.memtables[j] 47 // We only need to read from memtables which contain sequence numbers older 48 // than seqNum. 49 if logSeqNum := mem.logSeqNum; logSeqNum >= i.seqNum { 50 continue 51 } 52 if rki := mem.newRangeKeyIter(&i.opts); rki != nil { 53 i.rangeKey.iterConfig.AddLevel(rki) 54 } 55 } 56 } 57 58 current := i.version 59 if current == nil { 60 current = i.readState.current 61 } 62 // Next are the file levels: L0 sub-levels followed by lower levels. 63 64 // Add file-specific iterators for L0 files containing range keys. We 65 // maintain a separate manifest.LevelMetadata for each level containing only 66 // files that contain range keys, however we don't compute a separate 67 // L0Sublevels data structure too. 68 // 69 // We first use L0's LevelMetadata to peek and see whether L0 contains any 70 // range keys at all. If it does, we create a range key level iterator per 71 // level that contains range keys using the information from L0Sublevels. 72 // Some sublevels may not contain any range keys, and we need to iterate 73 // through the fileMetadata to determine that. Since L0's file count should 74 // not significantly exceed ~1000 files (see L0CompactionFileThreshold), 75 // this should be okay. 76 if !current.RangeKeyLevels[0].Empty() { 77 // L0 contains at least 1 file containing range keys. 78 // Add level iterators for the L0 sublevels, iterating from newest to 79 // oldest. 80 for j := len(current.L0SublevelFiles) - 1; j >= 0; j-- { 81 iter := current.L0SublevelFiles[j].Iter() 82 if !containsAnyRangeKeys(iter) { 83 continue 84 } 85 86 li := i.rangeKey.iterConfig.NewLevelIter() 87 li.Init( 88 i.opts.SpanIterOptions(), 89 i.cmp, 90 i.newIterRangeKey, 91 iter.Filter(manifest.KeyTypeRange), 92 manifest.L0Sublevel(j), 93 manifest.KeyTypeRange, 94 ) 95 i.rangeKey.iterConfig.AddLevel(li) 96 } 97 } 98 99 // Add level iterators for the non-empty non-L0 levels. 100 for level := 1; level < len(current.RangeKeyLevels); level++ { 101 if current.RangeKeyLevels[level].Empty() { 102 continue 103 } 104 li := i.rangeKey.iterConfig.NewLevelIter() 105 spanIterOpts := i.opts.SpanIterOptions() 106 li.Init(spanIterOpts, i.cmp, i.newIterRangeKey, current.RangeKeyLevels[level].Iter(), 107 manifest.Level(level), manifest.KeyTypeRange) 108 i.rangeKey.iterConfig.AddLevel(li) 109 } 110 } 111 } 112 113 func containsAnyRangeKeys(iter manifest.LevelIterator) bool { 114 for f := iter.First(); f != nil; f = iter.Next() { 115 if f.HasRangeKeys { 116 return true 117 } 118 } 119 return false 120 } 121 122 // Range key masking 123 // 124 // Pebble iterators may be configured such that range keys with suffixes mask 125 // point keys with lower suffixes. The intended use is implementing a MVCC 126 // delete range operation using range keys, when suffixes are MVCC timestamps. 127 // 128 // To enable masking, the user populates the IterOptions's RangeKeyMasking 129 // field. The Suffix field configures which range keys act as masks. The 130 // intended use is to hold a MVCC read timestamp. When implementing a MVCC 131 // delete range operation, only range keys that are visible at the read 132 // timestamp should be visible. If a range key has a suffix ≤ 133 // RangeKeyMasking.Suffix, it acts as a mask. 134 // 135 // Range key masking is facilitated by the keyspan.InterleavingIter. The 136 // interleaving iterator interleaves range keys and point keys during combined 137 // iteration. During user iteration, the interleaving iterator is configured 138 // with a keyspan.SpanMask, implemented by the rangeKeyMasking struct below. 139 // The SpanMask interface defines two methods: SpanChanged and SkipPoint. 140 // 141 // SpanChanged is used to keep the current mask up-to-date. Whenever the point 142 // iterator has stepped into or out of the bounds of a range key, the 143 // interleaving iterator invokes SpanChanged passing the current covering range 144 // key. The below rangeKeyMasking implementation scans the range keys looking 145 // for the range key with the largest suffix that's still ≤ the suffix supplied 146 // to IterOptions.RangeKeyMasking.Suffix (the "read timestamp"). If it finds a 147 // range key that meets the condition, the range key should act as a mask. The 148 // span and the relevant range key's suffix are saved. 149 // 150 // The above ensures that `rangeKeyMasking.maskActiveSuffix` always contains the 151 // current masking suffix such that any point keys with lower suffixes should be 152 // skipped. 153 // 154 // There are two ways in which masked point keys are skipped. 155 // 156 // 1. Interleaving iterator SkipPoint 157 // 158 // Whenever the interleaving iterator encounters a point key that falls within 159 // the bounds of a range key, it invokes SkipPoint. The interleaving iterator 160 // guarantees that the SpanChanged method described above has already been 161 // invoked with the covering range key. The below rangeKeyMasking implementation 162 // of SkipPoint splits the key into prefix and suffix, compares the suffix to 163 // the `maskActiveSuffix` updated by SpanChanged and returns true if 164 // suffix(point) < maskActiveSuffix. 165 // 166 // The SkipPoint logic is sufficient to ensure that the Pebble iterator filters 167 // out all masked point keys. However, it requires the iterator read each masked 168 // point key. For broad range keys that mask many points, this may be expensive. 169 // 170 // 2. Block property filter 171 // 172 // For more efficient handling of braad range keys that mask many points, the 173 // IterOptions.RangeKeyMasking field has an optional Filter option. This Filter 174 // field takes a superset of the block-property filter interface, adding a 175 // method to dynamically configure the filter's filtering criteria. 176 // 177 // To make use of the Filter option, the user is required to define and 178 // configure a block-property collector that collects a property containing at 179 // least the maximum suffix of a key within a block. 180 // 181 // When the SpanChanged method described above is invoked, rangeKeyMasking also 182 // reconfigures the user-provided filter. It invokes a SetSuffix method, 183 // providing the `maskActiveSuffix`, requesting that from now on the 184 // block-property filter return Intersects()=false for any properties indicating 185 // that a block contains exclusively keys with suffixes greater than the 186 // provided suffix. 187 // 188 // Note that unlike other block-property filters, the filter used for masking 189 // must not apply across the entire keyspace. It must only filter blocks that 190 // lie within the bounds of the range key that set the mask suffix. To 191 // accommodate this, rangeKeyMasking implements a special interface: 192 // sstable.BoundLimitedBlockPropertyFilter. This interface extends the block 193 // property filter interface with two new methods: KeyIsWithinLowerBound and 194 // KeyIsWithinUpperBound. The rangeKeyMasking type wraps the user-provided block 195 // property filter, implementing these two methods and overriding Intersects to 196 // always return true if there is no active mask. 197 // 198 // The logic to ensure that a mask block-property filter is only applied within 199 // the bounds of the masking range key is subtle. The interleaving iterator 200 // guarantees that it never invokes SpanChanged until the point iterator is 201 // positioned within the range key. During forward iteration, this guarantees 202 // that any block that a sstable reader might attempt to load contains only keys 203 // greater than or equal to the range key's lower bound. During backward 204 // iteration, it provides the analagous guarantee on the range key's upper 205 // bound. 206 // 207 // The above ensures that an sstable reader only needs to verify that a block 208 // that it skips meets the opposite bound. This is where the 209 // KeyIsWithinLowerBound and KeyIsWithinUpperBound methods are used. When an 210 // sstable iterator is configured with a BoundLimitedBlockPropertyFilter, it 211 // checks for intersection with the block-property filter before every block 212 // load, like ordinary block-property filters. However, if the bound-limited 213 // block property filter indicates that it does NOT intersect, the filter's 214 // relevant KeyIsWithin{Lower,Upper}Bound method is queried, using a block 215 // index separator as the bound. If the method indicates that the provided index 216 // separator does not fall within the range key bounds, the no-intersection 217 // result is ignored, and the block is read. 218 219 type rangeKeyMasking struct { 220 cmp base.Compare 221 split base.Split 222 filter BlockPropertyFilterMask 223 // maskActiveSuffix holds the suffix of a range key currently acting as a 224 // mask, hiding point keys with suffixes greater than it. maskActiveSuffix 225 // is only ever non-nil if IterOptions.RangeKeyMasking.Suffix is non-nil. 226 // maskActiveSuffix is updated whenever the iterator passes over a new range 227 // key. The maskActiveSuffix should only be used if maskSpan is non-nil. 228 // 229 // See SpanChanged. 230 maskActiveSuffix []byte 231 // maskSpan holds the span from which the active mask suffix was extracted. 232 // The span is used for bounds comparisons, to ensure that a range-key mask 233 // is not applied beyond the bounds of the range key. 234 maskSpan *keyspan.Span 235 parent *Iterator 236 } 237 238 func (m *rangeKeyMasking) init(parent *Iterator, cmp base.Compare, split base.Split) { 239 m.cmp = cmp 240 m.split = split 241 if parent.opts.RangeKeyMasking.Filter != nil { 242 m.filter = parent.opts.RangeKeyMasking.Filter() 243 } 244 m.parent = parent 245 } 246 247 // SpanChanged implements the keyspan.SpanMask interface, used during range key 248 // iteration. 249 func (m *rangeKeyMasking) SpanChanged(s *keyspan.Span) { 250 if s == nil && m.maskSpan == nil { 251 return 252 } 253 m.maskSpan = nil 254 m.maskActiveSuffix = m.maskActiveSuffix[:0] 255 256 // Find the smallest suffix of a range key contained within the Span, 257 // excluding suffixes less than m.opts.RangeKeyMasking.Suffix. 258 if s != nil { 259 m.parent.rangeKey.stale = true 260 if m.parent.opts.RangeKeyMasking.Suffix != nil { 261 for j := range s.Keys { 262 if s.Keys[j].Suffix == nil { 263 continue 264 } 265 if m.cmp(s.Keys[j].Suffix, m.parent.opts.RangeKeyMasking.Suffix) < 0 { 266 continue 267 } 268 if len(m.maskActiveSuffix) == 0 || m.cmp(m.maskActiveSuffix, s.Keys[j].Suffix) > 0 { 269 m.maskSpan = s 270 m.maskActiveSuffix = append(m.maskActiveSuffix[:0], s.Keys[j].Suffix...) 271 } 272 } 273 } 274 } 275 276 if m.maskSpan != nil && m.parent.opts.RangeKeyMasking.Filter != nil { 277 // Update the block-property filter to filter point keys with suffixes 278 // greater than m.maskActiveSuffix. 279 err := m.filter.SetSuffix(m.maskActiveSuffix) 280 if err != nil { 281 m.parent.err = err 282 } 283 } 284 // If no span is active, we leave the inner block-property filter configured 285 // with its existing suffix. That's okay, because Intersects calls are first 286 // evaluated by iteratorRangeKeyState.Intersects, which considers all blocks 287 // as intersecting if there's no active mask. 288 } 289 290 // SkipPoint implements the keyspan.SpanMask interface, used during range key 291 // iteration. Whenever a point key is covered by a non-empty Span, the 292 // interleaving iterator invokes SkipPoint. This function is responsible for 293 // performing range key masking. 294 // 295 // If a non-nil IterOptions.RangeKeyMasking.Suffix is set, range key masking is 296 // enabled. Masking hides point keys, transparently skipping over the keys. 297 // Whether or not a point key is masked is determined by comparing the point 298 // key's suffix, the overlapping span's keys' suffixes, and the user-configured 299 // IterOption's RangeKeyMasking.Suffix. When configured with a masking threshold 300 // _t_, and there exists a span with suffix _r_ covering a point key with suffix 301 // _p_, and 302 // 303 // _t_ ≤ _r_ < _p_ 304 // 305 // then the point key is elided. Consider the following rendering, where using 306 // integer suffixes with higher integers sort before suffixes with lower 307 // integers, (for example @7 ≤ @6 < @5): 308 // 309 // ^ 310 // @9 | •―――――――――――――――○ [e,m)@9 311 // s 8 | • l@8 312 // u 7 |------------------------------------ @7 RangeKeyMasking.Suffix 313 // f 6 | [h,q)@6 •―――――――――――――――――○ (threshold) 314 // f 5 | • h@5 315 // f 4 | • n@4 316 // i 3 | •―――――――――――○ [f,l)@3 317 // x 2 | • b@2 318 // 1 | 319 // 0 |___________________________________ 320 // a b c d e f g h i j k l m n o p q 321 // 322 // An iterator scanning the entire keyspace with the masking threshold set to @7 323 // will observe point keys b@2 and l@8. The span keys [h,q)@6 and [f,l)@3 serve 324 // as masks, because cmp(@6,@7) ≥ 0 and cmp(@3,@7) ≥ 0. The span key [e,m)@9 325 // does not serve as a mask, because cmp(@9,@7) < 0. 326 // 327 // Although point l@8 falls within the user key bounds of [e,m)@9, [e,m)@9 is 328 // non-masking due to its suffix. The point key l@8 also falls within the user 329 // key bounds of [h,q)@6, but since cmp(@6,@8) ≥ 0, l@8 is unmasked. 330 // 331 // Invariant: The userKey is within the user key bounds of the span most 332 // recently provided to `SpanChanged`. 333 func (m *rangeKeyMasking) SkipPoint(userKey []byte) bool { 334 m.parent.stats.RangeKeyStats.ContainedPoints++ 335 if m.maskSpan == nil { 336 // No range key is currently acting as a mask, so don't skip. 337 return false 338 } 339 // Range key masking is enabled and the current span includes a range key 340 // that is being used as a mask. (NB: SpanChanged already verified that the 341 // range key's suffix is ≥ RangeKeyMasking.Suffix). 342 // 343 // This point key falls within the bounds of the range key (guaranteed by 344 // the InterleavingIter). Skip the point key if the range key's suffix is 345 // greater than the point key's suffix. 346 pointSuffix := userKey[m.split(userKey):] 347 if len(pointSuffix) > 0 && m.cmp(m.maskActiveSuffix, pointSuffix) < 0 { 348 m.parent.stats.RangeKeyStats.SkippedPoints++ 349 return true 350 } 351 return false 352 } 353 354 // The iteratorRangeKeyState type implements the sstable package's 355 // BoundLimitedBlockPropertyFilter interface in order to use block property 356 // filters for range key masking. The iteratorRangeKeyState implementation wraps 357 // the block-property filter provided in Options.RangeKeyMasking.Filter. 358 // 359 // Using a block-property filter for range-key masking requires limiting the 360 // filter's effect to the bounds of the range key currently acting as a mask. 361 // Consider the range key [a,m)@10, and an iterator positioned just before the 362 // below block, bounded by index separators `c` and `z`: 363 // 364 // c z 365 // x | c@9 c@5 c@1 d@7 e@4 y@4 | ... 366 // iter pos 367 // 368 // The next block cannot be skipped, despite the range key suffix @10 is greater 369 // than all the block's keys' suffixes, because it contains a key (y@4) outside 370 // the bounds of the range key. 371 // 372 // This extended BoundLimitedBlockPropertyFilter interface adds two new methods, 373 // KeyIsWithinLowerBound and KeyIsWithinUpperBound, for testing whether a 374 // particular block is within bounds. 375 // 376 // The iteratorRangeKeyState implements these new methods by first checking if 377 // the iterator is currently positioned within a range key. If not, the provided 378 // key is considered out-of-bounds. If the iterator is positioned within a range 379 // key, it compares the corresponding range key bound. 380 var _ sstable.BoundLimitedBlockPropertyFilter = (*rangeKeyMasking)(nil) 381 382 // Name implements the limitedBlockPropertyFilter interface defined in the 383 // sstable package by passing through to the user-defined block property filter. 384 func (m *rangeKeyMasking) Name() string { 385 return m.filter.Name() 386 } 387 388 // Intersects implements the limitedBlockPropertyFilter interface defined in the 389 // sstable package by passing the intersection decision to the user-provided 390 // block property filter only if a range key is covering the current iterator 391 // position. 392 func (m *rangeKeyMasking) Intersects(prop []byte) (bool, error) { 393 if m.maskSpan == nil { 394 // No span is actively masking. 395 return true, nil 396 } 397 return m.filter.Intersects(prop) 398 } 399 400 // KeyIsWithinLowerBound implements the limitedBlockPropertyFilter interface 401 // defined in the sstable package. It's used to restrict the masking block 402 // property filter to only applying within the bounds of the active range key. 403 func (m *rangeKeyMasking) KeyIsWithinLowerBound(key []byte) bool { 404 // Invariant: m.maskSpan != nil 405 // 406 // The provided `key` is an inclusive lower bound of the block we're 407 // considering skipping. 408 return m.cmp(m.maskSpan.Start, key) <= 0 409 } 410 411 // KeyIsWithinUpperBound implements the limitedBlockPropertyFilter interface 412 // defined in the sstable package. It's used to restrict the masking block 413 // property filter to only applying within the bounds of the active range key. 414 func (m *rangeKeyMasking) KeyIsWithinUpperBound(key []byte) bool { 415 // Invariant: m.maskSpan != nil 416 // 417 // The provided `key` is an *inclusive* upper bound of the block we're 418 // considering skipping, so the range key's end must be strictly greater 419 // than the block bound for the block to be within bounds. 420 return m.cmp(m.maskSpan.End, key) > 0 421 } 422 423 // lazyCombinedIter implements the internalIterator interface, wrapping a 424 // pointIter. It requires the pointIter's the levelIters be configured with 425 // pointers to its combinedIterState. When the levelIter observes a file 426 // containing a range key, the lazyCombinedIter constructs the combined 427 // range+point key iterator stack and switches to it. 428 type lazyCombinedIter struct { 429 // parent holds a pointer to the root *pebble.Iterator containing this 430 // iterator. It's used to mutate the internalIterator in use when switching 431 // to combined iteration. 432 parent *Iterator 433 pointIter internalIterator 434 combinedIterState combinedIterState 435 } 436 437 // combinedIterState encapsulates the current state of combined iteration. 438 // Various low-level iterators (mergingIter, leveliter) hold pointers to the 439 // *pebble.Iterator's combinedIterState. This allows them to check whether or 440 // not they must monitor for files containing range keys (!initialized), or not. 441 // 442 // When !initialized, low-level iterators watch for files containing range keys. 443 // When one is discovered, they set triggered=true and key to the smallest 444 // (forward direction) or largest (reverse direction) range key that's been 445 // observed. 446 type combinedIterState struct { 447 // key holds the smallest (forward direction) or largest (backward 448 // direction) user key from a range key bound discovered during the iterator 449 // operation that triggered the switch to combined iteration. 450 // 451 // Slices stored here must be stable. This is possible because callers pass 452 // a Smallest/Largest bound from a fileMetadata, which are immutable. A key 453 // slice's bytes must not be overwritten. 454 key []byte 455 triggered bool 456 initialized bool 457 } 458 459 // Assert that *lazyCombinedIter implements internalIterator. 460 var _ internalIterator = (*lazyCombinedIter)(nil) 461 462 // initCombinedIteration is invoked after a pointIter positioning operation 463 // resulted in i.combinedIterState.triggered=true. 464 // 465 // The `dir` parameter is `+1` or `-1` indicating forward iteration or backward 466 // iteration respectively. 467 // 468 // The `pointKey` and `pointValue` parameters provide the new point key-value 469 // pair that the iterator was just positioned to. The combined iterator should 470 // be seeded with this point key-value pair and return the smaller (forward 471 // iteration) or largest (backward iteration) of the two. 472 // 473 // The `seekKey` parameter is non-nil only if the iterator operation that 474 // triggered the switch to combined iteration was a SeekGE, SeekPrefixGE or 475 // SeekLT. It provides the seek key supplied and is used to seek the range-key 476 // iterator using the same key. This is necessary for SeekGE/SeekPrefixGE 477 // operations that land in the middle of a range key and must truncate to the 478 // user-provided seek key. 479 func (i *lazyCombinedIter) initCombinedIteration( 480 dir int8, pointKey *InternalKey, pointValue base.LazyValue, seekKey []byte, 481 ) (*InternalKey, base.LazyValue) { 482 // Invariant: i.parent.rangeKey is nil. 483 // Invariant: !i.combinedIterState.initialized. 484 if invariants.Enabled { 485 if i.combinedIterState.initialized { 486 panic("pebble: combined iterator already initialized") 487 } 488 if i.parent.rangeKey != nil { 489 panic("pebble: iterator already has a range-key iterator stack") 490 } 491 } 492 493 // We need to determine the key to seek the range key iterator to. If 494 // seekKey is not nil, the user-initiated operation that triggered the 495 // switch to combined iteration was itself a seek, and we can use that key. 496 // Otherwise, a First/Last or relative positioning operation triggered the 497 // switch to combined iteration. 498 // 499 // The levelIter that observed a file containing range keys populated 500 // combinedIterState.key with the smallest (forward) or largest (backward) 501 // range key it observed. If multiple levelIters observed files with range 502 // keys during the same operation on the mergingIter, combinedIterState.key 503 // is the smallest [during forward iteration; largest in reverse iteration] 504 // such key. 505 if seekKey == nil { 506 // Use the levelIter-populated key. 507 seekKey = i.combinedIterState.key 508 509 // We may need to adjust the levelIter-populated seek key to the 510 // surfaced point key. If the key observed is beyond [in the iteration 511 // direction] the current point key, there may still exist a range key 512 // at an earlier key. Consider the following example: 513 // 514 // L5: 000003:[bar.DEL.5, foo.RANGEKEYSET.9] 515 // L6: 000001:[bar.SET.2] 000002:[bax.RANGEKEYSET.8] 516 // 517 // A call to First() seeks the levels to files L5.000003 and L6.000001. 518 // The L5 levelIter observes that L5.000003 contains the range key with 519 // start key `foo`, and triggers a switch to combined iteration, setting 520 // `combinedIterState.key` = `foo`. 521 // 522 // The L6 levelIter did not observe the true first range key 523 // (bax.RANGEKEYSET.8), because it appears in a later sstable. When the 524 // combined iterator is initialized, the range key iterator must be 525 // seeked to a key that will find `bax`. To accomplish this, we seek the 526 // key instead to `bar`. It is guaranteed that no range key exists 527 // earlier than `bar`, otherwise a levelIter would've observed it and 528 // set `combinedIterState.key` to its start key. 529 if pointKey != nil { 530 if dir == +1 && i.parent.cmp(i.combinedIterState.key, pointKey.UserKey) > 0 { 531 seekKey = pointKey.UserKey 532 } else if dir == -1 && i.parent.cmp(seekKey, pointKey.UserKey) < 0 { 533 seekKey = pointKey.UserKey 534 } 535 } 536 } 537 538 // An operation on the point iterator observed a file containing range keys, 539 // so we must switch to combined interleaving iteration. First, construct 540 // the range key iterator stack. It must not exist, otherwise we'd already 541 // be performing combined iteration. 542 i.parent.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState) 543 i.parent.rangeKey.init(i.parent.comparer.Compare, i.parent.comparer.Split, &i.parent.opts) 544 i.parent.constructRangeKeyIter() 545 546 // Initialize the Iterator's interleaving iterator. 547 i.parent.rangeKey.iiter.Init( 548 &i.parent.comparer, i.parent.pointIter, i.parent.rangeKey.rangeKeyIter, 549 keyspan.InterleavingIterOpts{ 550 Mask: &i.parent.rangeKeyMasking, 551 LowerBound: i.parent.opts.LowerBound, 552 UpperBound: i.parent.opts.UpperBound, 553 }) 554 555 // Set the parent's primary iterator to point to the combined, interleaving 556 // iterator that's now initialized with our current state. 557 i.parent.iter = &i.parent.rangeKey.iiter 558 i.combinedIterState.initialized = true 559 i.combinedIterState.key = nil 560 561 // All future iterator operations will go directly through the combined 562 // iterator. 563 // 564 // Initialize the interleaving iterator. We pass the point key-value pair so 565 // that the interleaving iterator knows where the point iterator is 566 // positioned. Additionally, we pass the seek key to which the range-key 567 // iterator should be seeked in order to initialize its position. 568 // 569 // In the forward direction (invert for backwards), the seek key is a key 570 // guaranteed to find the smallest range key that's greater than the last 571 // key the iterator returned. The range key may be less than pointKey, in 572 // which case the range key will be interleaved next instead of the point 573 // key. 574 if dir == +1 { 575 var prefix []byte 576 if i.parent.hasPrefix { 577 prefix = i.parent.prefixOrFullSeekKey 578 } 579 return i.parent.rangeKey.iiter.InitSeekGE(prefix, seekKey, pointKey, pointValue) 580 } 581 return i.parent.rangeKey.iiter.InitSeekLT(seekKey, pointKey, pointValue) 582 } 583 584 func (i *lazyCombinedIter) SeekGE( 585 key []byte, flags base.SeekGEFlags, 586 ) (*InternalKey, base.LazyValue) { 587 if i.combinedIterState.initialized { 588 return i.parent.rangeKey.iiter.SeekGE(key, flags) 589 } 590 k, v := i.pointIter.SeekGE(key, flags) 591 if i.combinedIterState.triggered { 592 return i.initCombinedIteration(+1, k, v, key) 593 } 594 return k, v 595 } 596 597 func (i *lazyCombinedIter) SeekPrefixGE( 598 prefix, key []byte, flags base.SeekGEFlags, 599 ) (*InternalKey, base.LazyValue) { 600 if i.combinedIterState.initialized { 601 return i.parent.rangeKey.iiter.SeekPrefixGE(prefix, key, flags) 602 } 603 k, v := i.pointIter.SeekPrefixGE(prefix, key, flags) 604 if i.combinedIterState.triggered { 605 return i.initCombinedIteration(+1, k, v, key) 606 } 607 return k, v 608 } 609 610 func (i *lazyCombinedIter) SeekLT( 611 key []byte, flags base.SeekLTFlags, 612 ) (*InternalKey, base.LazyValue) { 613 if i.combinedIterState.initialized { 614 return i.parent.rangeKey.iiter.SeekLT(key, flags) 615 } 616 k, v := i.pointIter.SeekLT(key, flags) 617 if i.combinedIterState.triggered { 618 return i.initCombinedIteration(-1, k, v, key) 619 } 620 return k, v 621 } 622 623 func (i *lazyCombinedIter) First() (*InternalKey, base.LazyValue) { 624 if i.combinedIterState.initialized { 625 return i.parent.rangeKey.iiter.First() 626 } 627 k, v := i.pointIter.First() 628 if i.combinedIterState.triggered { 629 return i.initCombinedIteration(+1, k, v, nil) 630 } 631 return k, v 632 } 633 634 func (i *lazyCombinedIter) Last() (*InternalKey, base.LazyValue) { 635 if i.combinedIterState.initialized { 636 return i.parent.rangeKey.iiter.Last() 637 } 638 k, v := i.pointIter.Last() 639 if i.combinedIterState.triggered { 640 return i.initCombinedIteration(-1, k, v, nil) 641 } 642 return k, v 643 } 644 645 func (i *lazyCombinedIter) Next() (*InternalKey, base.LazyValue) { 646 if i.combinedIterState.initialized { 647 return i.parent.rangeKey.iiter.Next() 648 } 649 k, v := i.pointIter.Next() 650 if i.combinedIterState.triggered { 651 return i.initCombinedIteration(+1, k, v, nil) 652 } 653 return k, v 654 } 655 656 func (i *lazyCombinedIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { 657 if i.combinedIterState.initialized { 658 return i.parent.rangeKey.iiter.NextPrefix(succKey) 659 } 660 k, v := i.pointIter.NextPrefix(succKey) 661 if i.combinedIterState.triggered { 662 return i.initCombinedIteration(+1, k, v, nil) 663 } 664 return k, v 665 } 666 667 func (i *lazyCombinedIter) Prev() (*InternalKey, base.LazyValue) { 668 if i.combinedIterState.initialized { 669 return i.parent.rangeKey.iiter.Prev() 670 } 671 k, v := i.pointIter.Prev() 672 if i.combinedIterState.triggered { 673 return i.initCombinedIteration(-1, k, v, nil) 674 } 675 return k, v 676 } 677 678 func (i *lazyCombinedIter) Error() error { 679 if i.combinedIterState.initialized { 680 return i.parent.rangeKey.iiter.Error() 681 } 682 return i.pointIter.Error() 683 } 684 685 func (i *lazyCombinedIter) Close() error { 686 if i.combinedIterState.initialized { 687 return i.parent.rangeKey.iiter.Close() 688 } 689 return i.pointIter.Close() 690 } 691 692 func (i *lazyCombinedIter) SetBounds(lower, upper []byte) { 693 if i.combinedIterState.initialized { 694 i.parent.rangeKey.iiter.SetBounds(lower, upper) 695 return 696 } 697 i.pointIter.SetBounds(lower, upper) 698 } 699 700 func (i *lazyCombinedIter) SetContext(ctx context.Context) { 701 if i.combinedIterState.initialized { 702 i.parent.rangeKey.iiter.SetContext(ctx) 703 return 704 } 705 i.pointIter.SetContext(ctx) 706 } 707 708 func (i *lazyCombinedIter) String() string { 709 if i.combinedIterState.initialized { 710 return i.parent.rangeKey.iiter.String() 711 } 712 return i.pointIter.String() 713 }