github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/reader_iter_single_lvl.go (about) 1 // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "context" 9 "fmt" 10 "unsafe" 11 12 "github.com/cockroachdb/pebble/internal/base" 13 "github.com/cockroachdb/pebble/internal/invariants" 14 "github.com/cockroachdb/pebble/objstorage" 15 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 16 "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing" 17 ) 18 19 // singleLevelIterator iterates over an entire table of data. To seek for a given 20 // key, it first looks in the index for the block that contains that key, and then 21 // looks inside that block. 22 type singleLevelIterator struct { 23 ctx context.Context 24 cmp Compare 25 // Global lower/upper bound for the iterator. 26 lower []byte 27 upper []byte 28 bpfs *BlockPropertiesFilterer 29 // Per-block lower/upper bound. Nil if the bound does not apply to the block 30 // because we determined the block lies completely within the bound. 31 blockLower []byte 32 blockUpper []byte 33 reader *Reader 34 // vState will be set iff the iterator is constructed for virtual sstable 35 // iteration. 36 vState *virtualState 37 // endKeyInclusive is set to force the iterator to treat the upper field as 38 // inclusive while iterating instead of exclusive. 39 endKeyInclusive bool 40 index blockIter 41 data blockIter 42 dataRH objstorage.ReadHandle 43 dataRHPrealloc objstorageprovider.PreallocatedReadHandle 44 // dataBH refers to the last data block that the iterator considered 45 // loading. It may not actually have loaded the block, due to an error or 46 // because it was considered irrelevant. 47 dataBH BlockHandle 48 vbReader *valueBlockReader 49 // vbRH is the read handle for value blocks, which are in a different 50 // part of the sstable than data blocks. 51 vbRH objstorage.ReadHandle 52 vbRHPrealloc objstorageprovider.PreallocatedReadHandle 53 err error 54 closeHook func(i Iterator) error 55 // stats and iterStats are slightly different. stats is a shared struct 56 // supplied from the outside, and represents stats for the whole iterator 57 // tree and can be reset from the outside (e.g. when the pebble.Iterator is 58 // being reused). It is currently only provided when the iterator tree is 59 // rooted at pebble.Iterator. iterStats is this sstable iterator's private 60 // stats that are reported to a CategoryStatsCollector when this iterator is 61 // closed. More paths are instrumented with this as the 62 // CategoryStatsCollector needed for this is provided by the 63 // tableCacheContainer (which is more universally used). 64 stats *base.InternalIteratorStats 65 iterStats iterStatsAccumulator 66 bufferPool *BufferPool 67 68 // boundsCmp and positionedUsingLatestBounds are for optimizing iteration 69 // that uses multiple adjacent bounds. The seek after setting a new bound 70 // can use the fact that the iterator is either within the previous bounds 71 // or exactly one key before or after the bounds. If the new bounds is 72 // after/before the previous bounds, and we are already positioned at a 73 // block that is relevant for the new bounds, we can try to first position 74 // using Next/Prev (repeatedly) instead of doing a more expensive seek. 75 // 76 // When there are wide files at higher levels that match the bounds 77 // but don't have any data for the bound, we will already be 78 // positioned at the key beyond the bounds and won't need to do much 79 // work -- given that most data is in L6, such files are likely to 80 // dominate the performance of the mergingIter, and may be the main 81 // benefit of this performance optimization (of course it also helps 82 // when the file that has the data has successive seeks that stay in 83 // the same block). 84 // 85 // Specifically, boundsCmp captures the relationship between the previous 86 // and current bounds, if the iterator had been positioned after setting 87 // the previous bounds. If it was not positioned, i.e., Seek/First/Last 88 // were not called, we don't know where it is positioned and cannot 89 // optimize. 90 // 91 // Example: Bounds moving forward, and iterator exhausted in forward direction. 92 // bounds = [f, h), ^ shows block iterator position 93 // file contents [ a b c d e f g h i j k ] 94 // ^ 95 // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is 96 // set to +1. SeekGE(j) can use next (the optimization also requires that j 97 // is within the block, but that is not for correctness, but to limit the 98 // optimization to when it will actually be an optimization). 99 // 100 // Example: Bounds moving forward. 101 // bounds = [f, h), ^ shows block iterator position 102 // file contents [ a b c d e f g h i j k ] 103 // ^ 104 // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is 105 // set to +1. SeekGE(j) can use next. 106 // 107 // Example: Bounds moving forward, but iterator not positioned using previous 108 // bounds. 109 // bounds = [f, h), ^ shows block iterator position 110 // file contents [ a b c d e f g h i j k ] 111 // ^ 112 // new bounds = [i, j). Iterator is at j since it was never positioned using 113 // [f, h). So positionedUsingLatestBounds=false, and boundsCmp is set to 0. 114 // SeekGE(i) will not use next. 115 // 116 // Example: Bounds moving forward and sparse file 117 // bounds = [f, h), ^ shows block iterator position 118 // file contents [ a z ] 119 // ^ 120 // new bounds = [j, k). Since positionedUsingLatestBounds=true, boundsCmp is 121 // set to +1. SeekGE(j) notices that the iterator is already past j and does 122 // not need to do anything. 123 // 124 // Similar examples can be constructed for backward iteration. 125 // 126 // This notion of exactly one key before or after the bounds is not quite 127 // true when block properties are used to ignore blocks. In that case we 128 // can't stop precisely at the first block that is past the bounds since 129 // we are using the index entries to enforce the bounds. 130 // 131 // e.g. 3 blocks with keys [b, c] [f, g], [i, j, k] with index entries d, 132 // h, l. And let the lower bound be k, and we are reverse iterating. If 133 // the block [i, j, k] is ignored due to the block interval annotations we 134 // do need to move the index to block [f, g] since the index entry for the 135 // [i, j, k] block is l which is not less than the lower bound of k. So we 136 // have passed the entries i, j. 137 // 138 // This behavior is harmless since the block property filters are fixed 139 // for the lifetime of the iterator so i, j are irrelevant. In addition, 140 // the current code will not load the [f, g] block, so the seek 141 // optimization that attempts to use Next/Prev do not apply anyway. 142 boundsCmp int 143 positionedUsingLatestBounds bool 144 145 // exhaustedBounds represents whether the iterator is exhausted for 146 // iteration by reaching the upper or lower bound. +1 when exhausted 147 // the upper bound, -1 when exhausted the lower bound, and 0 when 148 // neither. exhaustedBounds is also used for the TrySeekUsingNext 149 // optimization in twoLevelIterator and singleLevelIterator. Care should be 150 // taken in setting this in twoLevelIterator before calling into 151 // singleLevelIterator, given that these two iterators share this field. 152 exhaustedBounds int8 153 154 // maybeFilteredKeysSingleLevel indicates whether the last iterator 155 // positioning operation may have skipped any data blocks due to 156 // block-property filters when positioning the index. 157 maybeFilteredKeysSingleLevel bool 158 159 // useFilter specifies whether the filter block in this sstable, if present, 160 // should be used for prefix seeks or not. In some cases it is beneficial 161 // to skip a filter block even if it exists (eg. if probability of a match 162 // is high). 163 useFilter bool 164 lastBloomFilterMatched bool 165 166 hideObsoletePoints bool 167 } 168 169 // singleLevelIterator implements the base.InternalIterator interface. 170 var _ base.InternalIterator = (*singleLevelIterator)(nil) 171 172 // init initializes a singleLevelIterator for reading from the table. It is 173 // synonmous with Reader.NewIter, but allows for reusing of the iterator 174 // between different Readers. 175 // 176 // Note that lower, upper passed into init has nothing to do with virtual sstable 177 // bounds. If the virtualState passed in is not nil, then virtual sstable bounds 178 // will be enforced. 179 func (i *singleLevelIterator) init( 180 ctx context.Context, 181 r *Reader, 182 v *virtualState, 183 lower, upper []byte, 184 filterer *BlockPropertiesFilterer, 185 useFilter, hideObsoletePoints bool, 186 stats *base.InternalIteratorStats, 187 categoryAndQoS CategoryAndQoS, 188 statsCollector *CategoryStatsCollector, 189 rp ReaderProvider, 190 bufferPool *BufferPool, 191 ) error { 192 if r.err != nil { 193 return r.err 194 } 195 i.iterStats.init(categoryAndQoS, statsCollector) 196 indexH, err := r.readIndex(ctx, stats, &i.iterStats) 197 if err != nil { 198 return err 199 } 200 if v != nil { 201 i.vState = v 202 i.endKeyInclusive, lower, upper = v.constrainBounds(lower, upper, false /* endInclusive */) 203 } 204 205 i.ctx = ctx 206 i.lower = lower 207 i.upper = upper 208 i.bpfs = filterer 209 i.useFilter = useFilter 210 i.reader = r 211 i.cmp = r.Compare 212 i.stats = stats 213 i.hideObsoletePoints = hideObsoletePoints 214 i.bufferPool = bufferPool 215 err = i.index.initHandle(i.cmp, indexH, r.Properties.GlobalSeqNum, false) 216 if err != nil { 217 // blockIter.Close releases indexH and always returns a nil error 218 _ = i.index.Close() 219 return err 220 } 221 i.dataRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.dataRHPrealloc) 222 if r.tableFormat >= TableFormatPebblev3 { 223 if r.Properties.NumValueBlocks > 0 { 224 // NB: we cannot avoid this ~248 byte allocation, since valueBlockReader 225 // can outlive the singleLevelIterator due to be being embedded in a 226 // LazyValue. This consumes ~2% in microbenchmark CPU profiles, but we 227 // should only optimize this if it shows up as significant in end-to-end 228 // CockroachDB benchmarks, since it is tricky to do so. One possibility 229 // is that if many sstable iterators only get positioned at latest 230 // versions of keys, and therefore never expose a LazyValue that is 231 // separated to their callers, they can put this valueBlockReader into a 232 // sync.Pool. 233 i.vbReader = &valueBlockReader{ 234 bpOpen: i, 235 rp: rp, 236 vbih: r.valueBIH, 237 stats: stats, 238 } 239 i.data.lazyValueHandling.vbr = i.vbReader 240 i.vbRH = objstorageprovider.UsePreallocatedReadHandle(ctx, r.readable, &i.vbRHPrealloc) 241 } 242 i.data.lazyValueHandling.hasValuePrefix = true 243 } 244 return nil 245 } 246 247 // Helper function to check if keys returned from iterator are within global and virtual bounds. 248 func (i *singleLevelIterator) maybeVerifyKey( 249 iKey *InternalKey, val base.LazyValue, 250 ) (*InternalKey, base.LazyValue) { 251 // maybeVerify key is only used for virtual sstable iterators. 252 if invariants.Enabled && i.vState != nil && iKey != nil { 253 key := iKey.UserKey 254 255 uc, vuc := i.cmp(key, i.upper), i.cmp(key, i.vState.upper.UserKey) 256 lc, vlc := i.cmp(key, i.lower), i.cmp(key, i.vState.lower.UserKey) 257 258 if (i.vState.upper.IsExclusiveSentinel() && vuc == 0) || (!i.endKeyInclusive && uc == 0) || uc > 0 || vuc > 0 || lc < 0 || vlc < 0 { 259 panic(fmt.Sprintf("key: %s out of bounds of singleLevelIterator", key)) 260 } 261 } 262 return iKey, val 263 } 264 265 // setupForCompaction sets up the singleLevelIterator for use with compactionIter. 266 // Currently, it skips readahead ramp-up. It should be called after init is called. 267 func (i *singleLevelIterator) setupForCompaction() { 268 i.dataRH.SetupForCompaction() 269 if i.vbRH != nil { 270 i.vbRH.SetupForCompaction() 271 } 272 } 273 274 func (i *singleLevelIterator) resetForReuse() singleLevelIterator { 275 return singleLevelIterator{ 276 index: i.index.resetForReuse(), 277 data: i.data.resetForReuse(), 278 } 279 } 280 281 func (i *singleLevelIterator) initBounds() { 282 // Trim the iteration bounds for the current block. We don't have to check 283 // the bounds on each iteration if the block is entirely contained within the 284 // iteration bounds. 285 i.blockLower = i.lower 286 if i.blockLower != nil { 287 key, _ := i.data.First() 288 if key != nil && i.cmp(i.blockLower, key.UserKey) < 0 { 289 // The lower-bound is less than the first key in the block. No need 290 // to check the lower-bound again for this block. 291 i.blockLower = nil 292 } 293 } 294 i.blockUpper = i.upper 295 if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 { 296 // The upper-bound is greater than the index key which itself is greater 297 // than or equal to every key in the block. No need to check the 298 // upper-bound again for this block. Even if blockUpper is inclusive 299 // because of upper being inclusive, we can still safely set blockUpper 300 // to nil here. 301 // 302 // TODO(bananabrick): We could also set blockUpper to nil for the >= 303 // case, if blockUpper is inclusive. 304 i.blockUpper = nil 305 } 306 } 307 308 // Deterministic disabling of the bounds-based optimization that avoids seeking. 309 // Uses the iterator pointer, since we want diversity in iterator behavior for 310 // the same SetBounds call. Used for tests. 311 func disableBoundsOpt(bound []byte, ptr uintptr) bool { 312 // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ 313 simpleHash := (11400714819323198485 * uint64(ptr)) >> 63 314 return bound[len(bound)-1]&byte(1) == 0 && simpleHash == 0 315 } 316 317 // ensureBoundsOptDeterminism provides a facility for disabling of the bounds 318 // optimizations performed by disableBoundsOpt for tests that require 319 // deterministic iterator behavior. Some unit tests examine internal iterator 320 // state and require this behavior to be deterministic. 321 var ensureBoundsOptDeterminism bool 322 323 // SetBounds implements internalIterator.SetBounds, as documented in the pebble 324 // package. Note that the upper field is exclusive. 325 func (i *singleLevelIterator) SetBounds(lower, upper []byte) { 326 i.boundsCmp = 0 327 if i.vState != nil { 328 // If the reader is constructed for a virtual sstable, then we must 329 // constrain the bounds of the reader. For physical sstables, the bounds 330 // can be wider than the actual sstable's bounds because we won't 331 // accidentally expose additional keys as there are no additional keys. 332 i.endKeyInclusive, lower, upper = i.vState.constrainBounds( 333 lower, upper, false, 334 ) 335 } else { 336 // TODO(bananabrick): Figure out the logic here to enable the boundsCmp 337 // optimization for virtual sstables. 338 if i.positionedUsingLatestBounds { 339 if i.upper != nil && lower != nil && i.cmp(i.upper, lower) <= 0 { 340 i.boundsCmp = +1 341 if invariants.Enabled && !ensureBoundsOptDeterminism && 342 disableBoundsOpt(lower, uintptr(unsafe.Pointer(i))) { 343 i.boundsCmp = 0 344 } 345 } else if i.lower != nil && upper != nil && i.cmp(upper, i.lower) <= 0 { 346 i.boundsCmp = -1 347 if invariants.Enabled && !ensureBoundsOptDeterminism && 348 disableBoundsOpt(upper, uintptr(unsafe.Pointer(i))) { 349 i.boundsCmp = 0 350 } 351 } 352 } 353 } 354 355 i.positionedUsingLatestBounds = false 356 i.lower = lower 357 i.upper = upper 358 i.blockLower = nil 359 i.blockUpper = nil 360 } 361 362 func (i *singleLevelIterator) SetContext(ctx context.Context) { 363 i.ctx = ctx 364 } 365 366 // loadBlock loads the block at the current index position and leaves i.data 367 // unpositioned. If unsuccessful, it sets i.err to any error encountered, which 368 // may be nil if we have simply exhausted the entire table. 369 func (i *singleLevelIterator) loadBlock(dir int8) loadBlockResult { 370 if !i.index.valid() { 371 // Ensure the data block iterator is invalidated even if loading of the block 372 // fails. 373 i.data.invalidate() 374 return loadBlockFailed 375 } 376 // Load the next block. 377 v := i.index.value() 378 bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue()) 379 if i.dataBH == bhp.BlockHandle && i.data.valid() { 380 // We're already at the data block we want to load. Reset bounds in case 381 // they changed since the last seek, but don't reload the block from cache 382 // or disk. 383 // 384 // It's safe to leave i.data in its original state here, as all callers to 385 // loadBlock make an absolute positioning call (i.e. a seek, first, or last) 386 // to `i.data` right after loadBlock returns loadBlockOK. 387 i.initBounds() 388 return loadBlockOK 389 } 390 // Ensure the data block iterator is invalidated even if loading of the block 391 // fails. 392 i.data.invalidate() 393 i.dataBH = bhp.BlockHandle 394 if err != nil { 395 i.err = errCorruptIndexEntry 396 return loadBlockFailed 397 } 398 if i.bpfs != nil { 399 intersects, err := i.bpfs.intersects(bhp.Props) 400 if err != nil { 401 i.err = errCorruptIndexEntry 402 return loadBlockFailed 403 } 404 if intersects == blockMaybeExcluded { 405 intersects = i.resolveMaybeExcluded(dir) 406 } 407 if intersects == blockExcluded { 408 i.maybeFilteredKeysSingleLevel = true 409 return loadBlockIrrelevant 410 } 411 // blockIntersects 412 } 413 ctx := objiotracing.WithBlockType(i.ctx, objiotracing.DataBlock) 414 block, err := i.reader.readBlock( 415 ctx, i.dataBH, nil /* transform */, i.dataRH, i.stats, &i.iterStats, i.bufferPool) 416 if err != nil { 417 i.err = err 418 return loadBlockFailed 419 } 420 i.err = i.data.initHandle(i.cmp, block, i.reader.Properties.GlobalSeqNum, i.hideObsoletePoints) 421 if i.err != nil { 422 // The block is partially loaded, and we don't want it to appear valid. 423 i.data.invalidate() 424 return loadBlockFailed 425 } 426 i.initBounds() 427 return loadBlockOK 428 } 429 430 // readBlockForVBR implements the blockProviderWhenOpen interface for use by 431 // the valueBlockReader. 432 func (i *singleLevelIterator) readBlockForVBR( 433 h BlockHandle, stats *base.InternalIteratorStats, 434 ) (bufferHandle, error) { 435 ctx := objiotracing.WithBlockType(i.ctx, objiotracing.ValueBlock) 436 return i.reader.readBlock(ctx, h, nil, i.vbRH, stats, &i.iterStats, i.bufferPool) 437 } 438 439 // resolveMaybeExcluded is invoked when the block-property filterer has found 440 // that a block is excluded according to its properties but only if its bounds 441 // fall within the filter's current bounds. This function consults the 442 // apprioriate bound, depending on the iteration direction, and returns either 443 // `blockIntersects` or `blockMaybeExcluded`. 444 func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult { 445 // TODO(jackson): We could first try comparing to top-level index block's 446 // key, and if within bounds avoid per-data block key comparisons. 447 448 // This iterator is configured with a bound-limited block property 449 // filter. The bpf determined this block could be excluded from 450 // iteration based on the property encoded in the block handle. 451 // However, we still need to determine if the block is wholly 452 // contained within the filter's key bounds. 453 // 454 // External guarantees ensure all the block's keys are ≥ the 455 // filter's lower bound during forward iteration, and that all the 456 // block's keys are < the filter's upper bound during backward 457 // iteration. We only need to determine if the opposite bound is 458 // also met. 459 // 460 // The index separator in index.Key() provides an inclusive 461 // upper-bound for the data block's keys, guaranteeing that all its 462 // keys are ≤ index.Key(). For forward iteration, this is all we 463 // need. 464 if dir > 0 { 465 // Forward iteration. 466 if i.bpfs.boundLimitedFilter.KeyIsWithinUpperBound(i.index.Key().UserKey) { 467 return blockExcluded 468 } 469 return blockIntersects 470 } 471 472 // Reverse iteration. 473 // 474 // Because we're iterating in the reverse direction, we don't yet have 475 // enough context available to determine if the block is wholly contained 476 // within its bounds. This case arises only during backward iteration, 477 // because of the way the index is structured. 478 // 479 // Consider a bound-limited bpf limited to the bounds [b,d), loading the 480 // block with separator `c`. During reverse iteration, the guarantee that 481 // all the block's keys are < `d` is externally provided, but no guarantee 482 // is made on the bpf's lower bound. The separator `c` only provides an 483 // inclusive upper bound on the block's keys, indicating that the 484 // corresponding block handle points to a block containing only keys ≤ `c`. 485 // 486 // To establish a lower bound, we step the index backwards to read the 487 // previous block's separator, which provides an inclusive lower bound on 488 // the original block's keys. Afterwards, we step forward to restore our 489 // index position. 490 if peekKey, _ := i.index.Prev(); peekKey == nil { 491 // The original block points to the first block of this index block. If 492 // there's a two-level index, it could potentially provide a lower 493 // bound, but the code refactoring necessary to read it doesn't seem 494 // worth the payoff. We fall through to loading the block. 495 } else if i.bpfs.boundLimitedFilter.KeyIsWithinLowerBound(peekKey.UserKey) { 496 // The lower-bound on the original block falls within the filter's 497 // bounds, and we can skip the block (after restoring our current index 498 // position). 499 _, _ = i.index.Next() 500 return blockExcluded 501 } 502 _, _ = i.index.Next() 503 return blockIntersects 504 } 505 506 func (i *singleLevelIterator) initBoundsForAlreadyLoadedBlock() { 507 if i.data.getFirstUserKey() == nil { 508 panic("initBoundsForAlreadyLoadedBlock must not be called on empty or corrupted block") 509 } 510 i.blockLower = i.lower 511 if i.blockLower != nil { 512 firstUserKey := i.data.getFirstUserKey() 513 if firstUserKey != nil && i.cmp(i.blockLower, firstUserKey) < 0 { 514 // The lower-bound is less than the first key in the block. No need 515 // to check the lower-bound again for this block. 516 i.blockLower = nil 517 } 518 } 519 i.blockUpper = i.upper 520 if i.blockUpper != nil && i.cmp(i.blockUpper, i.index.Key().UserKey) > 0 { 521 // The upper-bound is greater than the index key which itself is greater 522 // than or equal to every key in the block. No need to check the 523 // upper-bound again for this block. 524 i.blockUpper = nil 525 } 526 } 527 528 // The number of times to call Next/Prev in a block before giving up and seeking. 529 // The value of 4 is arbitrary. 530 // TODO(sumeer): experiment with dynamic adjustment based on the history of 531 // seeks for a particular iterator. 532 const numStepsBeforeSeek = 4 533 534 func (i *singleLevelIterator) trySeekGEUsingNextWithinBlock( 535 key []byte, 536 ) (k *InternalKey, v base.LazyValue, done bool) { 537 k, v = i.data.Key(), i.data.value() 538 for j := 0; j < numStepsBeforeSeek; j++ { 539 curKeyCmp := i.cmp(k.UserKey, key) 540 if curKeyCmp >= 0 { 541 if i.blockUpper != nil { 542 cmp := i.cmp(k.UserKey, i.blockUpper) 543 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 544 i.exhaustedBounds = +1 545 return nil, base.LazyValue{}, true 546 } 547 } 548 return k, v, true 549 } 550 k, v = i.data.Next() 551 if k == nil { 552 break 553 } 554 } 555 return k, v, false 556 } 557 558 func (i *singleLevelIterator) trySeekLTUsingPrevWithinBlock( 559 key []byte, 560 ) (k *InternalKey, v base.LazyValue, done bool) { 561 k, v = i.data.Key(), i.data.value() 562 for j := 0; j < numStepsBeforeSeek; j++ { 563 curKeyCmp := i.cmp(k.UserKey, key) 564 if curKeyCmp < 0 { 565 if i.blockLower != nil && i.cmp(k.UserKey, i.blockLower) < 0 { 566 i.exhaustedBounds = -1 567 return nil, base.LazyValue{}, true 568 } 569 return k, v, true 570 } 571 k, v = i.data.Prev() 572 if k == nil { 573 break 574 } 575 } 576 return k, v, false 577 } 578 579 func (i *singleLevelIterator) recordOffset() uint64 { 580 offset := i.dataBH.Offset 581 if i.data.valid() { 582 // - i.dataBH.Length/len(i.data.data) is the compression ratio. If 583 // uncompressed, this is 1. 584 // - i.data.nextOffset is the uncompressed position of the current record 585 // in the block. 586 // - i.dataBH.Offset is the offset of the block in the sstable before 587 // decompression. 588 offset += (uint64(i.data.nextOffset) * i.dataBH.Length) / uint64(len(i.data.data)) 589 } else { 590 // Last entry in the block must increment bytes iterated by the size of the block trailer 591 // and restart points. 592 offset += i.dataBH.Length + blockTrailerLen 593 } 594 return offset 595 } 596 597 // SeekGE implements internalIterator.SeekGE, as documented in the pebble 598 // package. Note that SeekGE only checks the upper bound. It is up to the 599 // caller to ensure that key is greater than or equal to the lower bound. 600 func (i *singleLevelIterator) SeekGE( 601 key []byte, flags base.SeekGEFlags, 602 ) (*InternalKey, base.LazyValue) { 603 if i.vState != nil { 604 // Callers of SeekGE don't know about virtual sstable bounds, so we may 605 // have to internally restrict the bounds. 606 // 607 // TODO(bananabrick): We can optimize this check away for the level iter 608 // if necessary. 609 if i.cmp(key, i.lower) < 0 { 610 key = i.lower 611 } 612 } 613 614 if flags.TrySeekUsingNext() { 615 // The i.exhaustedBounds comparison indicates that the upper bound was 616 // reached. The i.data.isDataInvalidated() indicates that the sstable was 617 // exhausted. 618 if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && i.err == nil { 619 // Already exhausted, so return nil. 620 return nil, base.LazyValue{} 621 } 622 if i.err != nil { 623 // The current iterator position cannot be used. 624 flags = flags.DisableTrySeekUsingNext() 625 } 626 // INVARIANT: flags.TrySeekUsingNext() => i.err == nil && 627 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is, 628 // data-exhausted and bounds-exhausted, as defined earlier, are both 629 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err 630 // before calling into seekGEHelper. 631 } 632 633 i.exhaustedBounds = 0 634 i.err = nil // clear cached iteration error 635 boundsCmp := i.boundsCmp 636 // Seek optimization only applies until iterator is first positioned after SetBounds. 637 i.boundsCmp = 0 638 i.positionedUsingLatestBounds = true 639 return i.seekGEHelper(key, boundsCmp, flags) 640 } 641 642 // seekGEHelper contains the common functionality for SeekGE and SeekPrefixGE. 643 func (i *singleLevelIterator) seekGEHelper( 644 key []byte, boundsCmp int, flags base.SeekGEFlags, 645 ) (*InternalKey, base.LazyValue) { 646 // Invariant: trySeekUsingNext => !i.data.isDataInvalidated() && i.exhaustedBounds != +1 647 648 // SeekGE performs various step-instead-of-seeking optimizations: eg enabled 649 // by trySeekUsingNext, or by monotonically increasing bounds (i.boundsCmp). 650 // Care must be taken to ensure that when performing these optimizations and 651 // the iterator becomes exhausted, i.maybeFilteredKeys is set appropriately. 652 // Consider a previous SeekGE that filtered keys from k until the current 653 // iterator position. 654 // 655 // If the previous SeekGE exhausted the iterator, it's possible keys greater 656 // than or equal to the current search key were filtered. We must not reuse 657 // the current iterator position without remembering the previous value of 658 // maybeFilteredKeys. 659 660 var dontSeekWithinBlock bool 661 if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() && 662 boundsCmp > 0 && i.cmp(key, i.index.Key().UserKey) <= 0 { 663 // Fast-path: The bounds have moved forward and this SeekGE is 664 // respecting the lower bound (guaranteed by Iterator). We know that 665 // the iterator must already be positioned within or just outside the 666 // previous bounds. Therefore it cannot be positioned at a block (or 667 // the position within that block) that is ahead of the seek position. 668 // However it can be positioned at an earlier block. This fast-path to 669 // use Next() on the block is only applied when we are already at the 670 // block that the slow-path (the else-clause) would load -- this is 671 // the motivation for the i.cmp(key, i.index.Key().UserKey) <= 0 672 // predicate. 673 i.initBoundsForAlreadyLoadedBlock() 674 ikey, val, done := i.trySeekGEUsingNextWithinBlock(key) 675 if done { 676 return ikey, val 677 } 678 if ikey == nil { 679 // Done with this block. 680 dontSeekWithinBlock = true 681 } 682 } else { 683 // Cannot use bounds monotonicity. But may be able to optimize if 684 // caller claimed externally known invariant represented by 685 // flags.TrySeekUsingNext(). 686 if flags.TrySeekUsingNext() { 687 // seekPrefixGE or SeekGE has already ensured 688 // !i.data.isDataInvalidated() && i.exhaustedBounds != +1 689 currKey := i.data.Key() 690 value := i.data.value() 691 less := i.cmp(currKey.UserKey, key) < 0 692 // We could be more sophisticated and confirm that the seek 693 // position is within the current block before applying this 694 // optimization. But there may be some benefit even if it is in 695 // the next block, since we can avoid seeking i.index. 696 for j := 0; less && j < numStepsBeforeSeek; j++ { 697 currKey, value = i.Next() 698 if currKey == nil { 699 return nil, base.LazyValue{} 700 } 701 less = i.cmp(currKey.UserKey, key) < 0 702 } 703 if !less { 704 if i.blockUpper != nil { 705 cmp := i.cmp(currKey.UserKey, i.blockUpper) 706 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 707 i.exhaustedBounds = +1 708 return nil, base.LazyValue{} 709 } 710 } 711 return currKey, value 712 } 713 } 714 715 // Slow-path. 716 // Since we're re-seeking the iterator, the previous value of 717 // maybeFilteredKeysSingleLevel is irrelevant. If we filter out blocks 718 // during seeking, loadBlock will set it to true. 719 i.maybeFilteredKeysSingleLevel = false 720 721 var ikey *InternalKey 722 if ikey, _ = i.index.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey == nil { 723 // The target key is greater than any key in the index block. 724 // Invalidate the block iterator so that a subsequent call to Prev() 725 // will return the last key in the table. 726 i.data.invalidate() 727 return nil, base.LazyValue{} 728 } 729 result := i.loadBlock(+1) 730 if result == loadBlockFailed { 731 return nil, base.LazyValue{} 732 } 733 if result == loadBlockIrrelevant { 734 // Enforce the upper bound here since don't want to bother moving 735 // to the next block if upper bound is already exceeded. Note that 736 // the next block starts with keys >= ikey.UserKey since even 737 // though this is the block separator, the same user key can span 738 // multiple blocks. If upper is exclusive we use >= below, else 739 // we use >. 740 if i.upper != nil { 741 cmp := i.cmp(ikey.UserKey, i.upper) 742 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 743 i.exhaustedBounds = +1 744 return nil, base.LazyValue{} 745 } 746 } 747 // Want to skip to the next block. 748 dontSeekWithinBlock = true 749 } 750 } 751 if !dontSeekWithinBlock { 752 if ikey, val := i.data.SeekGE(key, flags.DisableTrySeekUsingNext()); ikey != nil { 753 if i.blockUpper != nil { 754 cmp := i.cmp(ikey.UserKey, i.blockUpper) 755 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 756 i.exhaustedBounds = +1 757 return nil, base.LazyValue{} 758 } 759 } 760 return ikey, val 761 } 762 } 763 return i.skipForward() 764 } 765 766 // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the 767 // pebble package. Note that SeekPrefixGE only checks the upper bound. It is up 768 // to the caller to ensure that key is greater than or equal to the lower bound. 769 func (i *singleLevelIterator) SeekPrefixGE( 770 prefix, key []byte, flags base.SeekGEFlags, 771 ) (*base.InternalKey, base.LazyValue) { 772 if i.vState != nil { 773 // Callers of SeekPrefixGE aren't aware of virtual sstable bounds, so 774 // we may have to internally restrict the bounds. 775 // 776 // TODO(bananabrick): We can optimize away this check for the level iter 777 // if necessary. 778 if i.cmp(key, i.lower) < 0 { 779 key = i.lower 780 } 781 } 782 return i.seekPrefixGE(prefix, key, flags, i.useFilter) 783 } 784 785 func (i *singleLevelIterator) seekPrefixGE( 786 prefix, key []byte, flags base.SeekGEFlags, checkFilter bool, 787 ) (k *InternalKey, value base.LazyValue) { 788 // NOTE: prefix is only used for bloom filter checking and not later work in 789 // this method. Hence, we can use the existing iterator position if the last 790 // SeekPrefixGE did not fail bloom filter matching. 791 792 err := i.err 793 i.err = nil // clear cached iteration error 794 if checkFilter && i.reader.tableFilter != nil { 795 if !i.lastBloomFilterMatched { 796 // Iterator is not positioned based on last seek. 797 flags = flags.DisableTrySeekUsingNext() 798 } 799 i.lastBloomFilterMatched = false 800 // Check prefix bloom filter. 801 var dataH bufferHandle 802 dataH, i.err = i.reader.readFilter(i.ctx, i.stats, &i.iterStats) 803 if i.err != nil { 804 i.data.invalidate() 805 return nil, base.LazyValue{} 806 } 807 mayContain := i.reader.tableFilter.mayContain(dataH.Get(), prefix) 808 dataH.Release() 809 if !mayContain { 810 // This invalidation may not be necessary for correctness, and may 811 // be a place to optimize later by reusing the already loaded 812 // block. It was necessary in earlier versions of the code since 813 // the caller was allowed to call Next when SeekPrefixGE returned 814 // nil. This is no longer allowed. 815 i.data.invalidate() 816 return nil, base.LazyValue{} 817 } 818 i.lastBloomFilterMatched = true 819 } 820 if flags.TrySeekUsingNext() { 821 // The i.exhaustedBounds comparison indicates that the upper bound was 822 // reached. The i.data.isDataInvalidated() indicates that the sstable was 823 // exhausted. 824 if (i.exhaustedBounds == +1 || i.data.isDataInvalidated()) && err == nil { 825 // Already exhausted, so return nil. 826 return nil, base.LazyValue{} 827 } 828 if err != nil { 829 // The current iterator position cannot be used. 830 flags = flags.DisableTrySeekUsingNext() 831 } 832 // INVARIANT: flags.TrySeekUsingNext() => err == nil && 833 // !i.exhaustedBounds==+1 && !i.data.isDataInvalidated(). That is, 834 // data-exhausted and bounds-exhausted, as defined earlier, are both 835 // false. Ths makes it safe to clear out i.exhaustedBounds and i.err 836 // before calling into seekGEHelper. 837 } 838 // Bloom filter matches, or skipped, so this method will position the 839 // iterator. 840 i.exhaustedBounds = 0 841 boundsCmp := i.boundsCmp 842 // Seek optimization only applies until iterator is first positioned after SetBounds. 843 i.boundsCmp = 0 844 i.positionedUsingLatestBounds = true 845 k, value = i.seekGEHelper(key, boundsCmp, flags) 846 return i.maybeVerifyKey(k, value) 847 } 848 849 // virtualLast should only be called if i.vReader != nil. 850 func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) { 851 if i.vState == nil { 852 panic("pebble: invalid call to virtualLast") 853 } 854 855 // Seek to the first internal key. 856 ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone) 857 if i.endKeyInclusive { 858 // Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2, 859 // c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is 860 // c#1. We can perform SeekGE(i.upper) and then keep nexting until we find 861 // the last key with userkey == i.upper. 862 // 863 // TODO(bananabrick): Think about how to improve this. If many internal keys 864 // with the same user key at the upper bound then this could be slow, but 865 // maybe the odds of having many internal keys with the same user key at the 866 // upper bound are low. 867 for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 { 868 ikey, _ = i.Next() 869 } 870 return i.Prev() 871 } 872 873 // We seeked to the first key >= i.upper. 874 return i.Prev() 875 } 876 877 // SeekLT implements internalIterator.SeekLT, as documented in the pebble 878 // package. Note that SeekLT only checks the lower bound. It is up to the 879 // caller to ensure that key is less than or equal to the upper bound. 880 func (i *singleLevelIterator) SeekLT( 881 key []byte, flags base.SeekLTFlags, 882 ) (*InternalKey, base.LazyValue) { 883 if i.vState != nil { 884 // Might have to fix upper bound since virtual sstable bounds are not 885 // known to callers of SeekLT. 886 // 887 // TODO(bananabrick): We can optimize away this check for the level iter 888 // if necessary. 889 cmp := i.cmp(key, i.upper) 890 // key == i.upper is fine. We'll do the right thing and return the 891 // first internal key with user key < key. 892 if cmp > 0 { 893 // Return the last key in the virtual sstable. 894 return i.virtualLast() 895 } 896 } 897 898 i.exhaustedBounds = 0 899 i.err = nil // clear cached iteration error 900 boundsCmp := i.boundsCmp 901 // Seek optimization only applies until iterator is first positioned after SetBounds. 902 i.boundsCmp = 0 903 904 // Seeking operations perform various step-instead-of-seeking optimizations: 905 // eg by considering monotonically increasing bounds (i.boundsCmp). Care 906 // must be taken to ensure that when performing these optimizations and the 907 // iterator becomes exhausted i.maybeFilteredKeysSingleLevel is set 908 // appropriately. Consider a previous SeekLT that filtered keys from k 909 // until the current iterator position. 910 // 911 // If the previous SeekLT did exhausted the iterator, it's possible keys 912 // less than the current search key were filtered. We must not reuse the 913 // current iterator position without remembering the previous value of 914 // maybeFilteredKeysSingleLevel. 915 916 i.positionedUsingLatestBounds = true 917 918 var dontSeekWithinBlock bool 919 if !i.data.isDataInvalidated() && !i.index.isDataInvalidated() && i.data.valid() && i.index.valid() && 920 boundsCmp < 0 && i.cmp(i.data.getFirstUserKey(), key) < 0 { 921 // Fast-path: The bounds have moved backward, and this SeekLT is 922 // respecting the upper bound (guaranteed by Iterator). We know that 923 // the iterator must already be positioned within or just outside the 924 // previous bounds. Therefore it cannot be positioned at a block (or 925 // the position within that block) that is behind the seek position. 926 // However it can be positioned at a later block. This fast-path to 927 // use Prev() on the block is only applied when we are already at the 928 // block that can satisfy this seek -- this is the motivation for the 929 // the i.cmp(i.data.firstKey.UserKey, key) < 0 predicate. 930 i.initBoundsForAlreadyLoadedBlock() 931 ikey, val, done := i.trySeekLTUsingPrevWithinBlock(key) 932 if done { 933 return ikey, val 934 } 935 if ikey == nil { 936 // Done with this block. 937 dontSeekWithinBlock = true 938 } 939 } else { 940 // Slow-path. 941 i.maybeFilteredKeysSingleLevel = false 942 var ikey *InternalKey 943 944 // NB: If a bound-limited block property filter is configured, it's 945 // externally ensured that the filter is disabled (through returning 946 // Intersects=false irrespective of the block props provided) during 947 // seeks. 948 if ikey, _ = i.index.SeekGE(key, base.SeekGEFlagsNone); ikey == nil { 949 ikey, _ = i.index.Last() 950 if ikey == nil { 951 return nil, base.LazyValue{} 952 } 953 } 954 // INVARIANT: ikey != nil. 955 result := i.loadBlock(-1) 956 if result == loadBlockFailed { 957 return nil, base.LazyValue{} 958 } 959 if result == loadBlockIrrelevant { 960 // Enforce the lower bound here since don't want to bother moving 961 // to the previous block if lower bound is already exceeded. Note 962 // that the previous block starts with keys <= ikey.UserKey since 963 // even though this is the current block's separator, the same 964 // user key can span multiple blocks. 965 if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 { 966 i.exhaustedBounds = -1 967 return nil, base.LazyValue{} 968 } 969 // Want to skip to the previous block. 970 dontSeekWithinBlock = true 971 } 972 } 973 if !dontSeekWithinBlock { 974 if ikey, val := i.data.SeekLT(key, flags); ikey != nil { 975 if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 { 976 i.exhaustedBounds = -1 977 return nil, base.LazyValue{} 978 } 979 return ikey, val 980 } 981 } 982 // The index contains separator keys which may lie between 983 // user-keys. Consider the user-keys: 984 // 985 // complete 986 // ---- new block --- 987 // complexion 988 // 989 // If these two keys end one block and start the next, the index key may 990 // be chosen as "compleu". The SeekGE in the index block will then point 991 // us to the block containing "complexion". If this happens, we want the 992 // last key from the previous data block. 993 return i.maybeVerifyKey(i.skipBackward()) 994 } 995 996 // First implements internalIterator.First, as documented in the pebble 997 // package. Note that First only checks the upper bound. It is up to the caller 998 // to ensure that key is greater than or equal to the lower bound (e.g. via a 999 // call to SeekGE(lower)). 1000 func (i *singleLevelIterator) First() (*InternalKey, base.LazyValue) { 1001 // If the iterator was created on a virtual sstable, we will SeekGE to the 1002 // lower bound instead of using First, because First does not respect 1003 // bounds. 1004 if i.vState != nil { 1005 return i.SeekGE(i.lower, base.SeekGEFlagsNone) 1006 } 1007 1008 if i.lower != nil { 1009 panic("singleLevelIterator.First() used despite lower bound") 1010 } 1011 i.positionedUsingLatestBounds = true 1012 i.maybeFilteredKeysSingleLevel = false 1013 1014 return i.firstInternal() 1015 } 1016 1017 // firstInternal is a helper used for absolute positioning in a single-level 1018 // index file, or for positioning in the second-level index in a two-level 1019 // index file. For the latter, one cannot make any claims about absolute 1020 // positioning. 1021 func (i *singleLevelIterator) firstInternal() (*InternalKey, base.LazyValue) { 1022 i.exhaustedBounds = 0 1023 i.err = nil // clear cached iteration error 1024 // Seek optimization only applies until iterator is first positioned after SetBounds. 1025 i.boundsCmp = 0 1026 1027 var ikey *InternalKey 1028 if ikey, _ = i.index.First(); ikey == nil { 1029 i.data.invalidate() 1030 return nil, base.LazyValue{} 1031 } 1032 result := i.loadBlock(+1) 1033 if result == loadBlockFailed { 1034 return nil, base.LazyValue{} 1035 } 1036 if result == loadBlockOK { 1037 if ikey, val := i.data.First(); ikey != nil { 1038 if i.blockUpper != nil { 1039 cmp := i.cmp(ikey.UserKey, i.blockUpper) 1040 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1041 i.exhaustedBounds = +1 1042 return nil, base.LazyValue{} 1043 } 1044 } 1045 return ikey, val 1046 } 1047 // Else fall through to skipForward. 1048 } else { 1049 // result == loadBlockIrrelevant. Enforce the upper bound here since 1050 // don't want to bother moving to the next block if upper bound is 1051 // already exceeded. Note that the next block starts with keys >= 1052 // ikey.UserKey since even though this is the block separator, the 1053 // same user key can span multiple blocks. If upper is exclusive we 1054 // use >= below, else we use >. 1055 if i.upper != nil { 1056 cmp := i.cmp(ikey.UserKey, i.upper) 1057 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1058 i.exhaustedBounds = +1 1059 return nil, base.LazyValue{} 1060 } 1061 } 1062 // Else fall through to skipForward. 1063 } 1064 1065 return i.skipForward() 1066 } 1067 1068 // Last implements internalIterator.Last, as documented in the pebble 1069 // package. Note that Last only checks the lower bound. It is up to the caller 1070 // to ensure that key is less than the upper bound (e.g. via a call to 1071 // SeekLT(upper)) 1072 func (i *singleLevelIterator) Last() (*InternalKey, base.LazyValue) { 1073 if i.vState != nil { 1074 return i.virtualLast() 1075 } 1076 1077 if i.upper != nil { 1078 panic("singleLevelIterator.Last() used despite upper bound") 1079 } 1080 i.positionedUsingLatestBounds = true 1081 i.maybeFilteredKeysSingleLevel = false 1082 return i.lastInternal() 1083 } 1084 1085 // lastInternal is a helper used for absolute positioning in a single-level 1086 // index file, or for positioning in the second-level index in a two-level 1087 // index file. For the latter, one cannot make any claims about absolute 1088 // positioning. 1089 func (i *singleLevelIterator) lastInternal() (*InternalKey, base.LazyValue) { 1090 i.exhaustedBounds = 0 1091 i.err = nil // clear cached iteration error 1092 // Seek optimization only applies until iterator is first positioned after SetBounds. 1093 i.boundsCmp = 0 1094 1095 var ikey *InternalKey 1096 if ikey, _ = i.index.Last(); ikey == nil { 1097 i.data.invalidate() 1098 return nil, base.LazyValue{} 1099 } 1100 result := i.loadBlock(-1) 1101 if result == loadBlockFailed { 1102 return nil, base.LazyValue{} 1103 } 1104 if result == loadBlockOK { 1105 if ikey, val := i.data.Last(); ikey != nil { 1106 if i.blockLower != nil && i.cmp(ikey.UserKey, i.blockLower) < 0 { 1107 i.exhaustedBounds = -1 1108 return nil, base.LazyValue{} 1109 } 1110 return ikey, val 1111 } 1112 // Else fall through to skipBackward. 1113 } else { 1114 // result == loadBlockIrrelevant. Enforce the lower bound here since 1115 // don't want to bother moving to the previous block if lower bound is 1116 // already exceeded. Note that the previous block starts with keys <= 1117 // key.UserKey since even though this is the current block's 1118 // separator, the same user key can span multiple blocks. 1119 if i.lower != nil && i.cmp(ikey.UserKey, i.lower) < 0 { 1120 i.exhaustedBounds = -1 1121 return nil, base.LazyValue{} 1122 } 1123 } 1124 1125 return i.skipBackward() 1126 } 1127 1128 // Next implements internalIterator.Next, as documented in the pebble 1129 // package. 1130 // Note: compactionIterator.Next mirrors the implementation of Iterator.Next 1131 // due to performance. Keep the two in sync. 1132 func (i *singleLevelIterator) Next() (*InternalKey, base.LazyValue) { 1133 if i.exhaustedBounds == +1 { 1134 panic("Next called even though exhausted upper bound") 1135 } 1136 i.exhaustedBounds = 0 1137 i.maybeFilteredKeysSingleLevel = false 1138 // Seek optimization only applies until iterator is first positioned after SetBounds. 1139 i.boundsCmp = 0 1140 1141 if i.err != nil { 1142 // TODO(jackson): Can this case be turned into a panic? Once an error is 1143 // encountered, the iterator must be re-seeked. 1144 return nil, base.LazyValue{} 1145 } 1146 if key, val := i.data.Next(); key != nil { 1147 if i.blockUpper != nil { 1148 cmp := i.cmp(key.UserKey, i.blockUpper) 1149 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1150 i.exhaustedBounds = +1 1151 return nil, base.LazyValue{} 1152 } 1153 } 1154 return key, val 1155 } 1156 return i.skipForward() 1157 } 1158 1159 // NextPrefix implements (base.InternalIterator).NextPrefix. 1160 func (i *singleLevelIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { 1161 if i.exhaustedBounds == +1 { 1162 panic("NextPrefix called even though exhausted upper bound") 1163 } 1164 i.exhaustedBounds = 0 1165 i.maybeFilteredKeysSingleLevel = false 1166 // Seek optimization only applies until iterator is first positioned after SetBounds. 1167 i.boundsCmp = 0 1168 if i.err != nil { 1169 // TODO(jackson): Can this case be turned into a panic? Once an error is 1170 // encountered, the iterator must be re-seeked. 1171 return nil, base.LazyValue{} 1172 } 1173 if key, val := i.data.NextPrefix(succKey); key != nil { 1174 if i.blockUpper != nil { 1175 cmp := i.cmp(key.UserKey, i.blockUpper) 1176 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1177 i.exhaustedBounds = +1 1178 return nil, base.LazyValue{} 1179 } 1180 } 1181 return key, val 1182 } 1183 // Did not find prefix in the existing data block. This is the slow-path 1184 // where we effectively seek the iterator. 1185 var ikey *InternalKey 1186 // The key is likely to be in the next data block, so try one step. 1187 if ikey, _ = i.index.Next(); ikey == nil { 1188 // The target key is greater than any key in the index block. 1189 // Invalidate the block iterator so that a subsequent call to Prev() 1190 // will return the last key in the table. 1191 i.data.invalidate() 1192 return nil, base.LazyValue{} 1193 } 1194 if i.cmp(succKey, ikey.UserKey) > 0 { 1195 // Not in the next data block, so seek the index. 1196 if ikey, _ = i.index.SeekGE(succKey, base.SeekGEFlagsNone); ikey == nil { 1197 // The target key is greater than any key in the index block. 1198 // Invalidate the block iterator so that a subsequent call to Prev() 1199 // will return the last key in the table. 1200 i.data.invalidate() 1201 return nil, base.LazyValue{} 1202 } 1203 } 1204 result := i.loadBlock(+1) 1205 if result == loadBlockFailed { 1206 return nil, base.LazyValue{} 1207 } 1208 if result == loadBlockIrrelevant { 1209 // Enforce the upper bound here since don't want to bother moving 1210 // to the next block if upper bound is already exceeded. Note that 1211 // the next block starts with keys >= ikey.UserKey since even 1212 // though this is the block separator, the same user key can span 1213 // multiple blocks. If upper is exclusive we use >= below, else we use 1214 // >. 1215 if i.upper != nil { 1216 cmp := i.cmp(ikey.UserKey, i.upper) 1217 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1218 i.exhaustedBounds = +1 1219 return nil, base.LazyValue{} 1220 } 1221 } 1222 } else if key, val := i.data.SeekGE(succKey, base.SeekGEFlagsNone); key != nil { 1223 if i.blockUpper != nil { 1224 cmp := i.cmp(key.UserKey, i.blockUpper) 1225 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1226 i.exhaustedBounds = +1 1227 return nil, base.LazyValue{} 1228 } 1229 } 1230 return i.maybeVerifyKey(key, val) 1231 } 1232 1233 return i.skipForward() 1234 } 1235 1236 // Prev implements internalIterator.Prev, as documented in the pebble 1237 // package. 1238 func (i *singleLevelIterator) Prev() (*InternalKey, base.LazyValue) { 1239 if i.exhaustedBounds == -1 { 1240 panic("Prev called even though exhausted lower bound") 1241 } 1242 i.exhaustedBounds = 0 1243 i.maybeFilteredKeysSingleLevel = false 1244 // Seek optimization only applies until iterator is first positioned after SetBounds. 1245 i.boundsCmp = 0 1246 1247 if i.err != nil { 1248 return nil, base.LazyValue{} 1249 } 1250 if key, val := i.data.Prev(); key != nil { 1251 if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 { 1252 i.exhaustedBounds = -1 1253 return nil, base.LazyValue{} 1254 } 1255 return key, val 1256 } 1257 return i.skipBackward() 1258 } 1259 1260 func (i *singleLevelIterator) skipForward() (*InternalKey, base.LazyValue) { 1261 for { 1262 var key *InternalKey 1263 if key, _ = i.index.Next(); key == nil { 1264 i.data.invalidate() 1265 break 1266 } 1267 result := i.loadBlock(+1) 1268 if result != loadBlockOK { 1269 if i.err != nil { 1270 break 1271 } 1272 if result == loadBlockFailed { 1273 // We checked that i.index was at a valid entry, so 1274 // loadBlockFailed could not have happened due to to i.index 1275 // being exhausted, and must be due to an error. 1276 panic("loadBlock should not have failed with no error") 1277 } 1278 // result == loadBlockIrrelevant. Enforce the upper bound here 1279 // since don't want to bother moving to the next block if upper 1280 // bound is already exceeded. Note that the next block starts with 1281 // keys >= key.UserKey since even though this is the block 1282 // separator, the same user key can span multiple blocks. If upper 1283 // is exclusive we use >= below, else we use >. 1284 if i.upper != nil { 1285 cmp := i.cmp(key.UserKey, i.upper) 1286 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1287 i.exhaustedBounds = +1 1288 return nil, base.LazyValue{} 1289 } 1290 } 1291 continue 1292 } 1293 if key, val := i.data.First(); key != nil { 1294 if i.blockUpper != nil { 1295 cmp := i.cmp(key.UserKey, i.blockUpper) 1296 if (!i.endKeyInclusive && cmp >= 0) || cmp > 0 { 1297 i.exhaustedBounds = +1 1298 return nil, base.LazyValue{} 1299 } 1300 } 1301 return i.maybeVerifyKey(key, val) 1302 } 1303 } 1304 return nil, base.LazyValue{} 1305 } 1306 1307 func (i *singleLevelIterator) skipBackward() (*InternalKey, base.LazyValue) { 1308 for { 1309 var key *InternalKey 1310 if key, _ = i.index.Prev(); key == nil { 1311 i.data.invalidate() 1312 break 1313 } 1314 result := i.loadBlock(-1) 1315 if result != loadBlockOK { 1316 if i.err != nil { 1317 break 1318 } 1319 if result == loadBlockFailed { 1320 // We checked that i.index was at a valid entry, so 1321 // loadBlockFailed could not have happened due to to i.index 1322 // being exhausted, and must be due to an error. 1323 panic("loadBlock should not have failed with no error") 1324 } 1325 // result == loadBlockIrrelevant. Enforce the lower bound here 1326 // since don't want to bother moving to the previous block if lower 1327 // bound is already exceeded. Note that the previous block starts with 1328 // keys <= key.UserKey since even though this is the current block's 1329 // separator, the same user key can span multiple blocks. 1330 if i.lower != nil && i.cmp(key.UserKey, i.lower) < 0 { 1331 i.exhaustedBounds = -1 1332 return nil, base.LazyValue{} 1333 } 1334 continue 1335 } 1336 key, val := i.data.Last() 1337 if key == nil { 1338 return nil, base.LazyValue{} 1339 } 1340 if i.blockLower != nil && i.cmp(key.UserKey, i.blockLower) < 0 { 1341 i.exhaustedBounds = -1 1342 return nil, base.LazyValue{} 1343 } 1344 return i.maybeVerifyKey(key, val) 1345 } 1346 return nil, base.LazyValue{} 1347 } 1348 1349 // Error implements internalIterator.Error, as documented in the pebble 1350 // package. 1351 func (i *singleLevelIterator) Error() error { 1352 if err := i.data.Error(); err != nil { 1353 return err 1354 } 1355 return i.err 1356 } 1357 1358 // MaybeFilteredKeys may be called when an iterator is exhausted to indicate 1359 // whether or not the last positioning method may have skipped any keys due to 1360 // block-property filters. 1361 func (i *singleLevelIterator) MaybeFilteredKeys() bool { 1362 return i.maybeFilteredKeysSingleLevel 1363 } 1364 1365 // SetCloseHook sets a function that will be called when the iterator is 1366 // closed. 1367 func (i *singleLevelIterator) SetCloseHook(fn func(i Iterator) error) { 1368 i.closeHook = fn 1369 } 1370 1371 func firstError(err0, err1 error) error { 1372 if err0 != nil { 1373 return err0 1374 } 1375 return err1 1376 } 1377 1378 // Close implements internalIterator.Close, as documented in the pebble 1379 // package. 1380 func (i *singleLevelIterator) Close() error { 1381 i.iterStats.close() 1382 var err error 1383 if i.closeHook != nil { 1384 err = firstError(err, i.closeHook(i)) 1385 } 1386 err = firstError(err, i.data.Close()) 1387 err = firstError(err, i.index.Close()) 1388 if i.dataRH != nil { 1389 err = firstError(err, i.dataRH.Close()) 1390 i.dataRH = nil 1391 } 1392 err = firstError(err, i.err) 1393 if i.bpfs != nil { 1394 releaseBlockPropertiesFilterer(i.bpfs) 1395 } 1396 if i.vbReader != nil { 1397 i.vbReader.close() 1398 } 1399 if i.vbRH != nil { 1400 err = firstError(err, i.vbRH.Close()) 1401 i.vbRH = nil 1402 } 1403 *i = i.resetForReuse() 1404 singleLevelIterPool.Put(i) 1405 return err 1406 } 1407 1408 func (i *singleLevelIterator) String() string { 1409 if i.vState != nil { 1410 return i.vState.fileNum.String() 1411 } 1412 return i.reader.fileNum.String() 1413 }