github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/iterator.go (about) 1 // Copyright 2011 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "bytes" 9 "io" 10 "sync" 11 "sync/atomic" 12 "unsafe" 13 14 "github.com/cockroachdb/errors" 15 "github.com/cockroachdb/redact" 16 "github.com/zuoyebang/bitalostable/internal/base" 17 "github.com/zuoyebang/bitalostable/internal/fastrand" 18 "github.com/zuoyebang/bitalostable/internal/humanize" 19 "github.com/zuoyebang/bitalostable/internal/invariants" 20 "github.com/zuoyebang/bitalostable/internal/keyspan" 21 "github.com/zuoyebang/bitalostable/internal/manifest" 22 "github.com/zuoyebang/bitalostable/internal/rangekey" 23 "github.com/zuoyebang/bitalostable/sstable" 24 ) 25 26 // iterPos describes the state of the internal iterator, in terms of whether it 27 // is at the position returned to the user (cur), one ahead of the position 28 // returned (next for forward iteration and prev for reverse iteration). The cur 29 // position is split into two states, for forward and reverse iteration, since 30 // we need to differentiate for switching directions. 31 // 32 // There is subtlety in what is considered the current position of the Iterator. 33 // The internal iterator exposes a sequence of internal keys. There is not 34 // always a single internalIterator position corresponding to the position 35 // returned to the user. Consider the example: 36 // 37 // a.MERGE.9 a.MERGE.8 a.MERGE.7 a.SET.6 b.DELETE.9 b.DELETE.5 b.SET.4 38 // \ / 39 // \ Iterator.Key() = 'a' / 40 // 41 // The Iterator exposes one valid position at user key 'a' and the two exhausted 42 // positions at the beginning and end of iteration. The underlying 43 // internalIterator contains 7 valid positions and 2 exhausted positions. 44 // 45 // Iterator positioning methods must set iterPos to iterPosCur{Foward,Backward} 46 // iff the user key at the current internalIterator position equals the 47 // Iterator.Key returned to the user. This guarantees that a call to nextUserKey 48 // or prevUserKey will advance to the next or previous iterator position. 49 // iterPosCur{Forward,Backward} does not make any guarantee about the internal 50 // iterator position among internal keys with matching user keys, and it will 51 // vary subtly depending on the particular key kinds encountered. In the above 52 // example, the iterator returning 'a' may set iterPosCurForward if the internal 53 // iterator is positioned at any of a.MERGE.9, a.MERGE.8, a.MERGE.7 or a.SET.6. 54 // 55 // When setting iterPos to iterPosNext or iterPosPrev, the internal iterator 56 // must be advanced to the first internalIterator position at a user key greater 57 // (iterPosNext) or less (iterPosPrev) than the key returned to the user. An 58 // internalIterator position that's !Valid() must also be considered greater or 59 // less—depending on the direction of iteration—than the last valid Iterator 60 // position. 61 type iterPos int8 62 63 const ( 64 iterPosCurForward iterPos = 0 65 iterPosNext iterPos = 1 66 iterPosPrev iterPos = -1 67 iterPosCurReverse iterPos = -2 68 69 // For limited iteration. When the iterator is at iterPosCurForwardPaused 70 // - Next*() call should behave as if the internal iterator is already 71 // at next (akin to iterPosNext). 72 // - Prev*() call should behave as if the internal iterator is at the 73 // current key (akin to iterPosCurForward). 74 // 75 // Similar semantics apply to CurReversePaused. 76 iterPosCurForwardPaused iterPos = 2 77 iterPosCurReversePaused iterPos = -3 78 ) 79 80 // Approximate gap in bytes between samples of data read during iteration. 81 // This is multiplied with a default ReadSamplingMultiplier of 1 << 4 to yield 82 // 1 << 20 (1MB). The 1MB factor comes from: 83 // https://github.com/zuoyebang/bitalostable/issues/29#issuecomment-494477985 84 const readBytesPeriod uint64 = 1 << 16 85 86 var errReversePrefixIteration = errors.New("bitalostable: unsupported reverse prefix iteration") 87 88 // IteratorMetrics holds per-iterator metrics. These do not change over the 89 // lifetime of the iterator. 90 type IteratorMetrics struct { 91 // The read amplification experienced by this iterator. This is the sum of 92 // the memtables, the L0 sublevels and the non-empty Ln levels. Higher read 93 // amplification generally results in slower reads, though allowing higher 94 // read amplification can also result in faster writes. 95 ReadAmp int 96 } 97 98 // IteratorStatsKind describes the two kind of iterator stats. 99 type IteratorStatsKind int8 100 101 const ( 102 // InterfaceCall represents calls to Iterator. 103 InterfaceCall IteratorStatsKind = iota 104 // InternalIterCall represents calls by Iterator to its internalIterator. 105 InternalIterCall 106 // NumStatsKind is the number of kinds, and is used for array sizing. 107 NumStatsKind 108 ) 109 110 // IteratorStats contains iteration stats. 111 type IteratorStats struct { 112 // ForwardSeekCount includes SeekGE, SeekPrefixGE, First. 113 ForwardSeekCount [NumStatsKind]int 114 // ReverseSeek includes SeekLT, Last. 115 ReverseSeekCount [NumStatsKind]int 116 // ForwardStepCount includes Next. 117 ForwardStepCount [NumStatsKind]int 118 // ReverseStepCount includes Prev. 119 ReverseStepCount [NumStatsKind]int 120 InternalStats InternalIteratorStats 121 } 122 123 var _ redact.SafeFormatter = &IteratorStats{} 124 125 // InternalIteratorStats contains miscellaneous stats produced by internal 126 // iterators. 127 type InternalIteratorStats = base.InternalIteratorStats 128 129 // Iterator iterates over a DB's key/value pairs in key order. 130 // 131 // An iterator must be closed after use, but it is not necessary to read an 132 // iterator until exhaustion. 133 // 134 // An iterator is not goroutine-safe, but it is safe to use multiple iterators 135 // concurrently, with each in a dedicated goroutine. 136 // 137 // It is also safe to use an iterator concurrently with modifying its 138 // underlying DB, if that DB permits modification. However, the resultant 139 // key/value pairs are not guaranteed to be a consistent snapshot of that DB 140 // at a particular point in time. 141 // 142 // If an iterator encounters an error during any operation, it is stored by 143 // the Iterator and surfaced through the Error method. All absolute 144 // positioning methods (eg, SeekLT, SeekGT, First, Last, etc) reset any 145 // accumulated error before positioning. All relative positioning methods (eg, 146 // Next, Prev) return without advancing if the iterator has an accumulated 147 // error. 148 type Iterator struct { 149 opts IterOptions 150 merge Merge 151 comparer base.Comparer 152 iter internalIterator 153 pointIter internalIterator 154 readState *readState 155 // rangeKey holds iteration state specific to iteration over range keys. 156 // The range key field may be nil if the Iterator has never been configured 157 // to iterate over range keys. Its non-nilness cannot be used to determine 158 // if the Iterator is currently iterating over range keys: For that, consult 159 // the IterOptions using opts.rangeKeys(). If non-nil, its rangeKeyIter 160 // field is guaranteed to be non-nil too. 161 rangeKey *iteratorRangeKeyState 162 // rangeKeyMasking holds state for range-key masking of point keys. 163 rangeKeyMasking rangeKeyMasking 164 err error 165 // When iterValidityState=IterValid, key represents the current key, which 166 // is backed by keyBuf. 167 key []byte 168 keyBuf []byte 169 value []byte 170 valueBuf []byte 171 valueCloser io.Closer 172 // boundsBuf holds two buffers used to store the lower and upper bounds. 173 // Whenever the Iterator's bounds change, the new bounds are copied into 174 // boundsBuf[boundsBufIdx]. The two bounds share a slice to reduce 175 // allocations. opts.LowerBound and opts.UpperBound point into this slice. 176 boundsBuf [2][]byte 177 boundsBufIdx int 178 // iterKey, iterValue reflect the latest position of iter, except when 179 // SetBounds is called. In that case, these are explicitly set to nil. 180 iterKey *InternalKey 181 iterValue []byte 182 alloc *iterAlloc 183 getIterAlloc *getIterAlloc 184 prefixOrFullSeekKey []byte 185 readSampling readSampling 186 stats IteratorStats 187 externalReaders [][]*sstable.Reader 188 189 // Following fields used when constructing an iterator stack, eg, in Clone 190 // and SetOptions or when re-fragmenting a batch's range keys/range dels. 191 // Non-nil if this Iterator includes a Batch. 192 batch *Batch 193 newIters tableNewIters 194 newIterRangeKey keyspan.TableNewSpanIter 195 lazyCombinedIter lazyCombinedIter 196 seqNum uint64 197 // batchSeqNum is used by Iterators over indexed batches to detect when the 198 // underlying batch has been mutated. The batch beneath an indexed batch may 199 // be mutated while the Iterator is open, but new keys are not surfaced 200 // until the next call to SetOptions. 201 batchSeqNum uint64 202 // batch{PointIter,RangeDelIter,RangeKeyIter} are used when the Iterator is 203 // configured to read through an indexed batch. If a batch is set, these 204 // iterators will be included within the iterator stack regardless of 205 // whether the batch currently contains any keys of their kind. These 206 // pointers are used during a call to SetOptions to refresh the Iterator's 207 // view of its indexed batch. 208 batchPointIter batchIter 209 batchRangeDelIter keyspan.Iter 210 batchRangeKeyIter keyspan.Iter 211 212 // Keeping the bools here after all the 8 byte aligned fields shrinks the 213 // sizeof this struct by 24 bytes. 214 215 // INVARIANT: 216 // iterValidityState==IterAtLimit <=> 217 // pos==iterPosCurForwardPaused || pos==iterPosCurReversePaused 218 iterValidityState IterValidityState 219 // Set to true by SetBounds, SetOptions. Causes the Iterator to appear 220 // exhausted externally, while preserving the correct iterValidityState for 221 // the iterator's internal state. Preserving the correct internal validity 222 // is used for SeekPrefixGE(..., trySeekUsingNext), and SeekGE/SeekLT 223 // optimizations after "no-op" calls to SetBounds and SetOptions. 224 requiresReposition bool 225 // The position of iter. When this is iterPos{Prev,Next} the iter has been 226 // moved past the current key-value, which can only happen if 227 // iterValidityState=IterValid, i.e., there is something to return to the 228 // client for the current position. 229 pos iterPos 230 // Relates to the prefixOrFullSeekKey field above. 231 hasPrefix bool 232 // Used for deriving the value of SeekPrefixGE(..., trySeekUsingNext), 233 // and SeekGE/SeekLT optimizations 234 lastPositioningOp lastPositioningOpKind 235 // Used for an optimization in external iterators to reduce the number of 236 // merging levels. 237 forwardOnly bool 238 // closePointIterOnce is set to true if this point iter can only be Close()d 239 // once, _and_ closing i.iter and then i.pointIter would close i.pointIter 240 // twice. This is necessary to track if the point iter is an internal iterator 241 // that could release its resources to a pool on Close(), making it harder for 242 // that iterator to make its own closes idempotent. 243 // 244 // TODO(bilal): Update SetOptions to always close out point key iterators when 245 // they won't be used, so that Close() doesn't need to default to closing 246 // point iterators twice. 247 closePointIterOnce bool 248 // Used in some tests to disable the random disabling of seek optimizations. 249 forceEnableSeekOpt bool 250 } 251 252 // cmp is a convenience shorthand for the i.comparer.Compare function. 253 func (i *Iterator) cmp(a, b []byte) int { 254 return i.comparer.Compare(a, b) 255 } 256 257 // split is a convenience shorthand for the i.comparer.Split function. 258 func (i *Iterator) split(a []byte) int { 259 return i.comparer.Split(a) 260 } 261 262 // equal is a convenience shorthand for the i.comparer.Equal function. 263 func (i *Iterator) equal(a, b []byte) bool { 264 return i.comparer.Equal(a, b) 265 } 266 267 // iteratorRangeKeyState holds an iterator's range key iteration state. 268 type iteratorRangeKeyState struct { 269 opts *IterOptions 270 cmp base.Compare 271 split base.Split 272 // rangeKeyIter holds the range key iterator stack that iterates over the 273 // merged spans across the entirety of the LSM. 274 rangeKeyIter keyspan.FragmentIterator 275 iiter keyspan.InterleavingIter 276 // stale is set to true when the range key state recorded here (in start, 277 // end and keys) may not be in sync with the current range key at the 278 // interleaving iterator's current position. 279 // 280 // When the interelaving iterator passes over a new span, it invokes the 281 // SpanChanged hook defined on the `rangeKeyMasking` type, which sets stale 282 // to true if the span is non-nil. 283 // 284 // The parent iterator may not be positioned over the interleaving 285 // iterator's current position (eg, i.iterPos = iterPos{Next,Prev}), so 286 // {keys,start,end} are only updated to the new range key during a call to 287 // Iterator.saveRangeKey. 288 stale bool 289 // updated is used to signal to the Iterator client whether the state of 290 // range keys has changed since the previous iterator position through the 291 // `RangeKeyChanged` method. It's set to true during an Iterator positioning 292 // operation that changes the state of the current range key. Each Iterator 293 // positioning operation sets it back to false before executing. 294 updated bool 295 // prevPosHadRangeKey records whether the previous Iterator position had a 296 // range key (HasPointAndRage() = (_, true)). It's updated at the beginning 297 // of each new Iterator positioning operation. It's required by saveRangeKey to 298 // to set `updated` appropriately: Without this record of the previous iterator 299 // state, it's ambiguous whether an iterator only temporarily stepped onto a 300 // position without a range key. 301 prevPosHadRangeKey bool 302 // rangeKeyOnly is set to true if at the current iterator position there is 303 // no point key, only a range key start boundary. 304 rangeKeyOnly bool 305 // hasRangeKey is true when the current iterator position has a covering 306 // range key (eg, a range key with bounds [<lower>,<upper>) such that 307 // <lower> ≤ Key() < <upper>). 308 hasRangeKey bool 309 // start and end are the [start, end) boundaries of the current range keys. 310 start []byte 311 end []byte 312 // keys is sorted by Suffix ascending. 313 keys []RangeKeyData 314 // buf is used to save range-key data before moving the range-key iterator. 315 // Start and end boundaries, suffixes and values are all copied into buf. 316 buf []byte 317 318 // iterConfig holds fields that are used for the construction of the 319 // iterator stack, but do not need to be directly accessed during iteration. 320 // This struct is bundled within the iteratorRangeKeyState struct to reduce 321 // allocations. 322 iterConfig rangekey.UserIteratorConfig 323 } 324 325 func (i *iteratorRangeKeyState) init(cmp base.Compare, split base.Split, opts *IterOptions) { 326 i.cmp = cmp 327 i.split = split 328 i.opts = opts 329 } 330 331 var iterRangeKeyStateAllocPool = sync.Pool{ 332 New: func() interface{} { 333 return &iteratorRangeKeyState{} 334 }, 335 } 336 337 // isEphemeralPosition returns true iff the current iterator position is 338 // ephemeral, and won't be visited during subsequent relative positioning 339 // operations. 340 // 341 // The iterator position resulting from a SeekGE or SeekPrefixGE that lands on a 342 // straddling range key without a coincident point key is such a position. 343 func (i *Iterator) isEphemeralPosition() bool { 344 return i.opts.rangeKeys() && i.rangeKey != nil && i.rangeKey.rangeKeyOnly && 345 !i.equal(i.rangeKey.start, i.key) 346 } 347 348 type lastPositioningOpKind int8 349 350 const ( 351 unknownLastPositionOp lastPositioningOpKind = iota 352 seekPrefixGELastPositioningOp 353 seekGELastPositioningOp 354 seekLTLastPositioningOp 355 ) 356 357 // Limited iteration mode. Not for use with prefix iteration. 358 // 359 // SeekGE, SeekLT, Prev, Next have WithLimit variants, that pause the iterator 360 // at the limit in a best-effort manner. The client should behave correctly 361 // even if the limits are ignored. These limits are not "deep", in that they 362 // are not passed down to the underlying collection of internalIterators. This 363 // is because the limits are transient, and apply only until the next 364 // iteration call. They serve mainly as a way to bound the amount of work when 365 // two (or more) Iterators are being coordinated at a higher level. 366 // 367 // In limited iteration mode: 368 // - Avoid using Iterator.Valid if the last call was to a *WithLimit() method. 369 // The return value from the *WithLimit() method provides a more precise 370 // disposition. 371 // - The limit is exclusive for forward and inclusive for reverse. 372 // 373 // 374 // Limited iteration mode & range keys 375 // 376 // Limited iteration interacts with range-key iteration. When range key 377 // iteration is enabled, range keys are interleaved at their start boundaries. 378 // Limited iteration must ensure that if a range key exists within the limit, 379 // the iterator visits the range key. 380 // 381 // During forward limited iteration, this is trivial: An overlapping range key 382 // must have a start boundary less than the limit, and the range key's start 383 // boundary will be interleaved and found to be within the limit. 384 // 385 // During reverse limited iteration, the tail of the range key may fall within 386 // the limit. The range key must be surfaced even if the range key's start 387 // boundary is less than the limit, and if there are no point keys between the 388 // current iterator position and the limit. To provide this guarantee, reverse 389 // limited iteration ignores the limit as long as there is a range key 390 // overlapping the iteration position. 391 392 // IterValidityState captures the state of the Iterator. 393 type IterValidityState int8 394 395 const ( 396 // IterExhausted represents an Iterator that is exhausted. 397 IterExhausted IterValidityState = iota 398 // IterValid represents an Iterator that is valid. 399 IterValid 400 // IterAtLimit represents an Iterator that has a non-exhausted 401 // internalIterator, but has reached a limit without any key for the 402 // caller. 403 IterAtLimit 404 ) 405 406 // readSampling stores variables used to sample a read to trigger a read 407 // compaction 408 type readSampling struct { 409 bytesUntilReadSampling uint64 410 initialSamplePassed bool 411 pendingCompactions readCompactionQueue 412 // forceReadSampling is used for testing purposes to force a read sample on every 413 // call to Iterator.maybeSampleRead() 414 forceReadSampling bool 415 } 416 417 func (i *Iterator) findNextEntry(limit []byte) { 418 i.iterValidityState = IterExhausted 419 i.pos = iterPosCurForward 420 if i.opts.rangeKeys() && i.rangeKey != nil { 421 i.rangeKey.rangeKeyOnly = false 422 } 423 424 // Close the closer for the current value if one was open. 425 if i.closeValueCloser() != nil { 426 return 427 } 428 429 for i.iterKey != nil { 430 key := *i.iterKey 431 432 if i.hasPrefix { 433 if n := i.split(key.UserKey); !i.equal(i.prefixOrFullSeekKey, key.UserKey[:n]) { 434 return 435 } 436 } 437 // Compare with limit every time we start at a different user key. 438 // Note that given the best-effort contract of limit, we could avoid a 439 // comparison in the common case by doing this only after 440 // i.nextUserKey is called for the deletes below. However that makes 441 // the behavior non-deterministic (since the behavior will vary based 442 // on what has been compacted), which makes it hard to test with the 443 // metamorphic test. So we forego that performance optimization. 444 if limit != nil && i.cmp(limit, i.iterKey.UserKey) <= 0 { 445 i.iterValidityState = IterAtLimit 446 i.pos = iterPosCurForwardPaused 447 return 448 } 449 450 switch key.Kind() { 451 case InternalKeyKindRangeKeySet: 452 // Save the current key. 453 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 454 i.key = i.keyBuf 455 i.value = nil 456 // There may also be a live point key at this userkey that we have 457 // not yet read. We need to find the next entry with this user key 458 // to find it. Save the range key so we don't lose it when we Next 459 // the underlying iterator. 460 i.saveRangeKey() 461 pointKeyExists := i.nextPointCurrentUserKey() 462 if i.err != nil { 463 i.iterValidityState = IterExhausted 464 return 465 } 466 i.rangeKey.rangeKeyOnly = !pointKeyExists 467 i.iterValidityState = IterValid 468 return 469 470 case InternalKeyKindDelete, InternalKeyKindSingleDelete: 471 i.nextUserKey() 472 continue 473 474 case InternalKeyKindSet, InternalKeyKindSetWithDelete: 475 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 476 i.key = i.keyBuf 477 i.value = i.iterValue 478 i.iterValidityState = IterValid 479 i.saveRangeKey() 480 return 481 482 case InternalKeyKindMerge: 483 // Resolving the merge may advance us to the next point key, which 484 // may be covered by a different set of range keys. Save the range 485 // key state so we don't lose it. 486 i.saveRangeKey() 487 if i.mergeForward(key) { 488 i.iterValidityState = IterValid 489 return 490 } 491 492 // The merge didn't yield a valid key, either because the value 493 // merger indicated it should be deleted, or because an error was 494 // encountered. 495 i.iterValidityState = IterExhausted 496 if i.err != nil { 497 return 498 } 499 if i.pos != iterPosNext { 500 i.nextUserKey() 501 } 502 if i.closeValueCloser() != nil { 503 return 504 } 505 i.pos = iterPosCurForward 506 507 default: 508 i.err = base.CorruptionErrorf("bitalostable: invalid internal key kind: %d", errors.Safe(key.Kind())) 509 i.iterValidityState = IterExhausted 510 return 511 } 512 } 513 } 514 515 func (i *Iterator) nextPointCurrentUserKey() bool { 516 i.pos = iterPosCurForward 517 518 i.iterKey, i.iterValue = i.iter.Next() 519 i.stats.ForwardStepCount[InternalIterCall]++ 520 if i.iterKey == nil || !i.equal(i.key, i.iterKey.UserKey) { 521 i.pos = iterPosNext 522 return false 523 } 524 525 key := *i.iterKey 526 switch key.Kind() { 527 case InternalKeyKindRangeKeySet: 528 // RangeKeySets must always be interleaved as the first internal key 529 // for a user key. 530 i.err = base.CorruptionErrorf("bitalostable: unexpected range key set mid-user key") 531 return false 532 533 case InternalKeyKindDelete, InternalKeyKindSingleDelete: 534 return false 535 536 case InternalKeyKindSet, InternalKeyKindSetWithDelete: 537 i.value = i.iterValue 538 return true 539 540 case InternalKeyKindMerge: 541 return i.mergeForward(key) 542 543 default: 544 i.err = base.CorruptionErrorf("bitalostable: invalid internal key kind: %d", errors.Safe(key.Kind())) 545 return false 546 } 547 } 548 549 // mergeForward resolves a MERGE key, advancing the underlying iterator forward 550 // to merge with subsequent keys with the same userkey. mergeForward returns a 551 // boolean indicating whether or not the merge yielded a valid key. A merge may 552 // not yield a valid key if an error occurred, in which case i.err is non-nil, 553 // or the user's value merger specified the key to be deleted. 554 // 555 // mergeForward does not update iterValidityState. 556 func (i *Iterator) mergeForward(key base.InternalKey) (valid bool) { 557 var valueMerger ValueMerger 558 valueMerger, i.err = i.merge(key.UserKey, i.iterValue) 559 if i.err != nil { 560 return false 561 } 562 563 i.mergeNext(key, valueMerger) 564 if i.err != nil { 565 return false 566 } 567 568 var needDelete bool 569 i.value, needDelete, i.valueCloser, i.err = finishValueMerger( 570 valueMerger, true /* includesBase */) 571 if i.err != nil { 572 return false 573 } 574 if needDelete { 575 _ = i.closeValueCloser() 576 return false 577 } 578 return true 579 } 580 581 func (i *Iterator) closeValueCloser() error { 582 if i.valueCloser != nil { 583 i.err = i.valueCloser.Close() 584 i.valueCloser = nil 585 } 586 return i.err 587 } 588 589 func (i *Iterator) nextUserKey() { 590 if i.iterKey == nil { 591 return 592 } 593 trailer := i.iterKey.Trailer 594 done := i.iterKey.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer 595 if i.iterValidityState != IterValid { 596 i.keyBuf = append(i.keyBuf[:0], i.iterKey.UserKey...) 597 i.key = i.keyBuf 598 } 599 for { 600 i.iterKey, i.iterValue = i.iter.Next() 601 i.stats.ForwardStepCount[InternalIterCall]++ 602 // NB: We're guaranteed to be on the next user key if the previous key 603 // had a zero sequence number (`done`), or the new key has a trailer 604 // greater or equal to the previous key's trailer. This is true because 605 // internal keys with the same user key are sorted by Trailer in 606 // strictly monotonically descending order. We expect the trailer 607 // optimization to trigger around 50% of the time with randomly 608 // distributed writes. We expect it to trigger very frequently when 609 // iterating through ingested sstables, which contain keys that all have 610 // the same sequence number. 611 if done || i.iterKey == nil || i.iterKey.Trailer >= trailer { 612 break 613 } 614 if !i.equal(i.key, i.iterKey.UserKey) { 615 break 616 } 617 done = i.iterKey.Trailer <= base.InternalKeyZeroSeqnumMaxTrailer 618 trailer = i.iterKey.Trailer 619 } 620 } 621 622 func (i *Iterator) maybeSampleRead() { 623 // This method is only called when a public method of Iterator is 624 // returning, and below we exclude the case were the iterator is paused at 625 // a limit. The effect of these choices is that keys that are deleted, but 626 // are encountered during iteration, are not accounted for in the read 627 // sampling and will not cause read driven compactions, even though we are 628 // incurring cost in iterating over them. And this issue is not limited to 629 // Iterator, which does not see the effect of range deletes, which may be 630 // causing iteration work in mergingIter. It is not clear at this time 631 // whether this is a deficiency worth addressing. 632 if i.iterValidityState != IterValid { 633 return 634 } 635 if i.readState == nil { 636 return 637 } 638 if i.readSampling.forceReadSampling { 639 i.sampleRead() 640 return 641 } 642 samplingPeriod := int32(int64(readBytesPeriod) * i.readState.db.opts.Experimental.ReadSamplingMultiplier) 643 if samplingPeriod <= 0 { 644 return 645 } 646 bytesRead := uint64(len(i.key) + len(i.value)) 647 for i.readSampling.bytesUntilReadSampling < bytesRead { 648 i.readSampling.bytesUntilReadSampling += uint64(fastrand.Uint32n(2 * uint32(samplingPeriod))) 649 // The block below tries to adjust for the case where this is the 650 // first read in a newly-opened iterator. As bytesUntilReadSampling 651 // starts off at zero, we don't want to sample the first read of 652 // every newly-opened iterator, but we do want to sample some of them. 653 if !i.readSampling.initialSamplePassed { 654 i.readSampling.initialSamplePassed = true 655 if fastrand.Uint32n(uint32(i.readSampling.bytesUntilReadSampling)) > uint32(bytesRead) { 656 continue 657 } 658 } 659 i.sampleRead() 660 } 661 i.readSampling.bytesUntilReadSampling -= bytesRead 662 } 663 664 func (i *Iterator) sampleRead() { 665 var topFile *manifest.FileMetadata 666 topLevel, numOverlappingLevels := numLevels, 0 667 if mi, ok := i.iter.(*mergingIter); ok { 668 if len(mi.levels) > 1 { 669 mi.ForEachLevelIter(func(li *levelIter) bool { 670 l := manifest.LevelToInt(li.level) 671 if file := li.files.Current(); file != nil { 672 var containsKey bool 673 if i.pos == iterPosNext || i.pos == iterPosCurForward || 674 i.pos == iterPosCurForwardPaused { 675 containsKey = i.cmp(file.SmallestPointKey.UserKey, i.key) <= 0 676 } else if i.pos == iterPosPrev || i.pos == iterPosCurReverse || 677 i.pos == iterPosCurReversePaused { 678 containsKey = i.cmp(file.LargestPointKey.UserKey, i.key) >= 0 679 } 680 // Do nothing if the current key is not contained in file's 681 // bounds. We could seek the LevelIterator at this level 682 // to find the right file, but the performance impacts of 683 // doing that are significant enough to negate the benefits 684 // of read sampling in the first place. See the discussion 685 // at: 686 // https://github.com/zuoyebang/bitalostable/pull/1041#issuecomment-763226492 687 if containsKey { 688 numOverlappingLevels++ 689 if numOverlappingLevels >= 2 { 690 // Terminate the loop early if at least 2 overlapping levels are found. 691 return true 692 } 693 topLevel = l 694 topFile = file 695 } 696 } 697 return false 698 }) 699 } 700 } 701 if topFile == nil || topLevel >= numLevels { 702 return 703 } 704 if numOverlappingLevels >= 2 { 705 allowedSeeks := atomic.AddInt64(&topFile.Atomic.AllowedSeeks, -1) 706 if allowedSeeks == 0 { 707 708 // Since the compaction queue can handle duplicates, we can keep 709 // adding to the queue even once allowedSeeks hits 0. 710 // In fact, we NEED to keep adding to the queue, because the queue 711 // is small and evicts older and possibly useful compactions. 712 atomic.AddInt64(&topFile.Atomic.AllowedSeeks, topFile.InitAllowedSeeks) 713 714 read := readCompaction{ 715 start: topFile.SmallestPointKey.UserKey, 716 end: topFile.LargestPointKey.UserKey, 717 level: topLevel, 718 fileNum: topFile.FileNum, 719 } 720 i.readSampling.pendingCompactions.add(&read, i.cmp) 721 } 722 } 723 } 724 725 func (i *Iterator) findPrevEntry(limit []byte) { 726 i.iterValidityState = IterExhausted 727 i.pos = iterPosCurReverse 728 if i.opts.rangeKeys() && i.rangeKey != nil { 729 i.rangeKey.rangeKeyOnly = false 730 } 731 732 // Close the closer for the current value if one was open. 733 if i.valueCloser != nil { 734 i.err = i.valueCloser.Close() 735 i.valueCloser = nil 736 if i.err != nil { 737 i.iterValidityState = IterExhausted 738 return 739 } 740 } 741 742 var valueMerger ValueMerger 743 firstLoopIter := true 744 rangeKeyBoundary := false 745 // The code below compares with limit in multiple places. As documented in 746 // findNextEntry, this is being done to make the behavior of limit 747 // deterministic to allow for metamorphic testing. It is not required by 748 // the best-effort contract of limit. 749 for i.iterKey != nil { 750 key := *i.iterKey 751 752 // NB: We cannot pause if the current key is covered by a range key. 753 // Otherwise, the user might not ever learn of a range key that covers 754 // the key space being iterated over in which there are no point keys. 755 // Since limits are best effort, ignoring the limit in this case is 756 // allowed by the contract of limit. 757 if firstLoopIter && limit != nil && i.cmp(limit, i.iterKey.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) { 758 i.iterValidityState = IterAtLimit 759 i.pos = iterPosCurReversePaused 760 return 761 } 762 firstLoopIter = false 763 764 if i.iterValidityState == IterValid { 765 if !i.equal(key.UserKey, i.key) { 766 // We've iterated to the previous user key. 767 i.pos = iterPosPrev 768 if valueMerger != nil { 769 var needDelete bool 770 i.value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */) 771 if i.err == nil && needDelete { 772 // The point key at this key is deleted. If we also have 773 // a range key boundary at this key, we still want to 774 // return. Otherwise, we need to continue looking for 775 // a live key. 776 i.value = nil 777 if rangeKeyBoundary { 778 i.rangeKey.rangeKeyOnly = true 779 } else { 780 i.iterValidityState = IterExhausted 781 if i.closeValueCloser() == nil { 782 continue 783 } 784 } 785 } 786 } 787 if i.err != nil { 788 i.iterValidityState = IterExhausted 789 } 790 return 791 } 792 } 793 794 switch key.Kind() { 795 case InternalKeyKindRangeKeySet: 796 // Range key start boundary markers are interleaved with the maximum 797 // sequence number, so if there's a point key also at this key, we 798 // must've already iterated over it. 799 // This is the final entry at this user key, so we may return 800 i.rangeKey.rangeKeyOnly = i.iterValidityState != IterValid 801 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 802 i.key = i.keyBuf 803 i.iterValidityState = IterValid 804 i.saveRangeKey() 805 // In all other cases, previous iteration requires advancing to 806 // iterPosPrev in order to determine if the key is live and 807 // unshadowed by another key at the same user key. In this case, 808 // because range key start boundary markers are always interleaved 809 // at the maximum sequence number, we know that there aren't any 810 // additional keys with the same user key in the backward direction. 811 // 812 // We Prev the underlying iterator once anyways for consistency, so 813 // that we can maintain the invariant during backward iteration that 814 // i.iterPos = iterPosPrev. 815 i.stats.ReverseStepCount[InternalIterCall]++ 816 i.iterKey, i.iterValue = i.iter.Prev() 817 818 // Set rangeKeyBoundary so that on the next iteration, we know to 819 // return the key even if the MERGE point key is deleted. 820 rangeKeyBoundary = true 821 822 case InternalKeyKindDelete, InternalKeyKindSingleDelete: 823 i.value = nil 824 i.iterValidityState = IterExhausted 825 valueMerger = nil 826 i.iterKey, i.iterValue = i.iter.Prev() 827 i.stats.ReverseStepCount[InternalIterCall]++ 828 // Compare with the limit. We could optimize by only checking when 829 // we step to the previous user key, but detecting that requires a 830 // comparison too. Note that this position may already passed a 831 // number of versions of this user key, but they are all deleted, 832 // so the fact that a subsequent Prev*() call will not see them is 833 // harmless. Also note that this is the only place in the loop, 834 // other than the firstLoopIter case above, where we could step 835 // to a different user key and start processing it for returning 836 // to the caller. 837 if limit != nil && i.iterKey != nil && i.cmp(limit, i.iterKey.UserKey) > 0 && !i.rangeKeyWithinLimit(limit) { 838 i.iterValidityState = IterAtLimit 839 i.pos = iterPosCurReversePaused 840 return 841 } 842 continue 843 844 case InternalKeyKindSet, InternalKeyKindSetWithDelete: 845 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 846 i.key = i.keyBuf 847 // iterValue is owned by i.iter and could change after the Prev() 848 // call, so use valueBuf instead. Note that valueBuf is only used 849 // in this one instance; everywhere else (eg. in findNextEntry), 850 // we just point i.value to the unsafe i.iter-owned value buffer. 851 i.valueBuf = append(i.valueBuf[:0], i.iterValue...) 852 i.value = i.valueBuf 853 i.saveRangeKey() 854 i.iterValidityState = IterValid 855 i.iterKey, i.iterValue = i.iter.Prev() 856 i.stats.ReverseStepCount[InternalIterCall]++ 857 valueMerger = nil 858 continue 859 860 case InternalKeyKindMerge: 861 if i.iterValidityState == IterExhausted { 862 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 863 i.key = i.keyBuf 864 i.saveRangeKey() 865 valueMerger, i.err = i.merge(i.key, i.iterValue) 866 if i.err != nil { 867 return 868 } 869 i.iterValidityState = IterValid 870 } else if valueMerger == nil { 871 valueMerger, i.err = i.merge(i.key, i.value) 872 if i.err == nil { 873 i.err = valueMerger.MergeNewer(i.iterValue) 874 } 875 if i.err != nil { 876 i.iterValidityState = IterExhausted 877 return 878 } 879 } else { 880 i.err = valueMerger.MergeNewer(i.iterValue) 881 if i.err != nil { 882 i.iterValidityState = IterExhausted 883 return 884 } 885 } 886 i.iterKey, i.iterValue = i.iter.Prev() 887 i.stats.ReverseStepCount[InternalIterCall]++ 888 continue 889 890 default: 891 i.err = base.CorruptionErrorf("bitalostable: invalid internal key kind: %d", errors.Safe(key.Kind())) 892 i.iterValidityState = IterExhausted 893 return 894 } 895 } 896 897 // i.iterKey == nil, so broke out of the preceding loop. 898 if i.iterValidityState == IterValid { 899 i.pos = iterPosPrev 900 if valueMerger != nil { 901 var needDelete bool 902 i.value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, true /* includesBase */) 903 if i.err == nil && needDelete { 904 i.key = nil 905 i.value = nil 906 i.iterValidityState = IterExhausted 907 } 908 } 909 if i.err != nil { 910 i.iterValidityState = IterExhausted 911 } 912 } 913 } 914 915 func (i *Iterator) prevUserKey() { 916 if i.iterKey == nil { 917 return 918 } 919 if i.iterValidityState != IterValid { 920 // If we're going to compare against the prev key, we need to save the 921 // current key. 922 i.keyBuf = append(i.keyBuf[:0], i.iterKey.UserKey...) 923 i.key = i.keyBuf 924 } 925 for { 926 i.iterKey, i.iterValue = i.iter.Prev() 927 i.stats.ReverseStepCount[InternalIterCall]++ 928 if i.iterKey == nil { 929 break 930 } 931 if !i.equal(i.key, i.iterKey.UserKey) { 932 break 933 } 934 } 935 } 936 937 func (i *Iterator) mergeNext(key InternalKey, valueMerger ValueMerger) { 938 // Save the current key. 939 i.keyBuf = append(i.keyBuf[:0], key.UserKey...) 940 i.key = i.keyBuf 941 942 // Loop looking for older values for this key and merging them. 943 for { 944 i.iterKey, i.iterValue = i.iter.Next() 945 i.stats.ForwardStepCount[InternalIterCall]++ 946 if i.iterKey == nil { 947 i.pos = iterPosNext 948 return 949 } 950 key = *i.iterKey 951 if !i.equal(i.key, key.UserKey) { 952 // We've advanced to the next key. 953 i.pos = iterPosNext 954 return 955 } 956 switch key.Kind() { 957 case InternalKeyKindDelete, InternalKeyKindSingleDelete: 958 // We've hit a deletion tombstone. Return everything up to this 959 // point. 960 return 961 962 case InternalKeyKindSet, InternalKeyKindSetWithDelete: 963 // We've hit a Set value. Merge with the existing value and return. 964 i.err = valueMerger.MergeOlder(i.iterValue) 965 return 966 967 case InternalKeyKindMerge: 968 // We've hit another Merge value. Merge with the existing value and 969 // continue looping. 970 i.err = valueMerger.MergeOlder(i.iterValue) 971 if i.err != nil { 972 return 973 } 974 continue 975 976 case InternalKeyKindRangeKeySet: 977 // The RANGEKEYSET marker must sort before a MERGE at the same user key. 978 i.err = base.CorruptionErrorf("bitalostable: out of order range key marker") 979 return 980 981 default: 982 i.err = base.CorruptionErrorf("bitalostable: invalid internal key kind: %d", errors.Safe(key.Kind())) 983 return 984 } 985 } 986 } 987 988 // SeekGE moves the iterator to the first key/value pair whose key is greater 989 // than or equal to the given key. Returns true if the iterator is pointing at 990 // a valid entry and false otherwise. 991 func (i *Iterator) SeekGE(key []byte) bool { 992 return i.SeekGEWithLimit(key, nil) == IterValid 993 } 994 995 // SeekGEWithLimit moves the iterator to the first key/value pair whose key is 996 // greater than or equal to the given key. 997 // 998 // If limit is provided, it serves as a best-effort exclusive limit. If the 999 // first key greater than or equal to the given search key is also greater than 1000 // or equal to limit, the Iterator may pause and return IterAtLimit. Because 1001 // limits are best-effort, SeekGEWithLimit may return a key beyond limit. 1002 // 1003 // If the Iterator is configured to iterate over range keys, SeekGEWithLimit 1004 // guarantees it will surface any range keys with bounds overlapping the 1005 // keyspace [key, limit). 1006 func (i *Iterator) SeekGEWithLimit(key []byte, limit []byte) IterValidityState { 1007 lastPositioningOp := i.lastPositioningOp 1008 // Set it to unknown, since this operation may not succeed, in which case 1009 // the SeekGE following this should not make any assumption about iterator 1010 // position. 1011 i.lastPositioningOp = unknownLastPositionOp 1012 i.requiresReposition = false 1013 i.err = nil 1014 i.hasPrefix = false 1015 i.stats.ForwardSeekCount[InterfaceCall]++ 1016 if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 { 1017 key = lowerBound 1018 } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 { 1019 key = upperBound 1020 } 1021 if i.rangeKey != nil { 1022 i.rangeKey.updated = false 1023 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1024 } 1025 seekInternalIter := true 1026 var flags base.SeekGEFlags 1027 // The following noop optimization only applies when i.batch == nil, since 1028 // an iterator over a batch is iterating over mutable data, that may have 1029 // changed since the last seek. 1030 if lastPositioningOp == seekGELastPositioningOp && i.batch == nil { 1031 cmp := i.cmp(i.prefixOrFullSeekKey, key) 1032 // If this seek is to the same or later key, and the iterator is 1033 // already positioned there, this is a noop. This can be helpful for 1034 // sparse key spaces that have many deleted keys, where one can avoid 1035 // the overhead of iterating past them again and again. 1036 if cmp <= 0 { 1037 if i.iterValidityState == IterExhausted || 1038 (i.iterValidityState == IterValid && i.cmp(key, i.key) <= 0 && 1039 (limit == nil || i.cmp(i.key, limit) < 0)) { 1040 // Noop 1041 if !invariants.Enabled || !disableSeekOpt(key, uintptr(unsafe.Pointer(i))) || i.forceEnableSeekOpt { 1042 i.lastPositioningOp = seekGELastPositioningOp 1043 return i.iterValidityState 1044 } 1045 } 1046 // cmp == 0 is not safe to optimize since 1047 // - i.pos could be at iterPosNext, due to a merge. 1048 // - Even if i.pos were at iterPosCurForward, we could have a DELETE, 1049 // SET pair for a key, and the iterator would have moved past DELETE 1050 // but stayed at iterPosCurForward. A similar situation occurs for a 1051 // MERGE, SET pair where the MERGE is consumed and the iterator is 1052 // at the SET. 1053 // We also leverage the IterAtLimit <=> i.pos invariant defined in the 1054 // comment on iterValidityState, to exclude any cases where i.pos 1055 // is iterPosCur{Forward,Reverse}Paused. This avoids the need to 1056 // special-case those iterator positions and their interactions with 1057 // TrySeekUsingNext, as the main uses for TrySeekUsingNext in CockroachDB 1058 // do not use limited Seeks in the first place. 1059 if cmp < 0 && i.iterValidityState != IterAtLimit && limit == nil { 1060 flags = flags.EnableTrySeekUsingNext() 1061 } 1062 if invariants.Enabled && flags.TrySeekUsingNext() && !i.forceEnableSeekOpt && disableSeekOpt(key, uintptr(unsafe.Pointer(i))) { 1063 flags = flags.DisableTrySeekUsingNext() 1064 } 1065 if i.pos == iterPosCurForwardPaused && i.cmp(key, i.iterKey.UserKey) <= 0 { 1066 // Have some work to do, but don't need to seek, and we can 1067 // start doing findNextEntry from i.iterKey. 1068 seekInternalIter = false 1069 } 1070 } 1071 } 1072 if seekInternalIter { 1073 i.iterKey, i.iterValue = i.iter.SeekGE(key, flags) 1074 i.stats.ForwardSeekCount[InternalIterCall]++ 1075 } 1076 i.findNextEntry(limit) 1077 i.maybeSampleRead() 1078 if i.Error() == nil && i.batch == nil { 1079 // Prepare state for a future noop optimization. 1080 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...) 1081 i.lastPositioningOp = seekGELastPositioningOp 1082 } 1083 return i.iterValidityState 1084 } 1085 1086 // SeekPrefixGE moves the iterator to the first key/value pair whose key is 1087 // greater than or equal to the given key and which has the same "prefix" as 1088 // the given key. The prefix for a key is determined by the user-defined 1089 // Comparer.Split function. The iterator will not observe keys not matching the 1090 // "prefix" of the search key. Calling SeekPrefixGE puts the iterator in prefix 1091 // iteration mode. The iterator remains in prefix iteration until a subsequent 1092 // call to another absolute positioning method (SeekGE, SeekLT, First, 1093 // Last). Reverse iteration (Prev) is not supported when an iterator is in 1094 // prefix iteration mode. Returns true if the iterator is pointing at a valid 1095 // entry and false otherwise. 1096 // 1097 // The semantics of SeekPrefixGE are slightly unusual and designed for 1098 // iteration to be able to take advantage of bloom filters that have been 1099 // created on the "prefix". If you're not using bloom filters, there is no 1100 // reason to use SeekPrefixGE. 1101 // 1102 // An example Split function may separate a timestamp suffix from the prefix of 1103 // the key. 1104 // 1105 // Split(<key>@<timestamp>) -> <key> 1106 // 1107 // Consider the keys "a@1", "a@2", "aa@3", "aa@4". The prefixes for these keys 1108 // are "a", and "aa". Note that despite "a" and "aa" sharing a prefix by the 1109 // usual definition, those prefixes differ by the definition of the Split 1110 // function. To see how this works, consider the following set of calls on this 1111 // data set: 1112 // 1113 // SeekPrefixGE("a@0") -> "a@1" 1114 // Next() -> "a@2" 1115 // Next() -> EOF 1116 // 1117 // If you're just looking to iterate over keys with a shared prefix, as 1118 // defined by the configured comparer, set iterator bounds instead: 1119 // 1120 // iter := db.NewIter(&bitalostable.IterOptions{ 1121 // LowerBound: []byte("prefix"), 1122 // UpperBound: []byte("prefiy"), 1123 // }) 1124 // for iter.First(); iter.Valid(); iter.Next() { 1125 // // Only keys beginning with "prefix" will be visited. 1126 // } 1127 // 1128 // See ExampleIterator_SeekPrefixGE for a working example. 1129 // 1130 // When iterating with range keys enabled, all range keys encountered are 1131 // truncated to the seek key's prefix's bounds. The truncation of the upper 1132 // bound requires that the database's Comparer is configured with a 1133 // ImmediateSuccessor method. For example, a SeekPrefixGE("a@9") call with the 1134 // prefix "a" will truncate range key bounds to [a,ImmediateSuccessor(a)]. 1135 func (i *Iterator) SeekPrefixGE(key []byte) bool { 1136 lastPositioningOp := i.lastPositioningOp 1137 // Set it to unknown, since this operation may not succeed, in which case 1138 // the SeekPrefixGE following this should not make any assumption about 1139 // iterator position. 1140 i.lastPositioningOp = unknownLastPositionOp 1141 i.requiresReposition = false 1142 i.err = nil 1143 i.stats.ForwardSeekCount[InterfaceCall]++ 1144 if i.rangeKey != nil { 1145 i.rangeKey.updated = false 1146 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1147 } 1148 if i.comparer.Split == nil { 1149 panic("bitalostable: split must be provided for SeekPrefixGE") 1150 } 1151 if i.comparer.ImmediateSuccessor == nil && i.opts.KeyTypes != IterKeyTypePointsOnly { 1152 panic("bitalostable: ImmediateSuccessor must be provided for SeekPrefixGE with range keys") 1153 } 1154 prefixLen := i.split(key) 1155 keyPrefix := key[:prefixLen] 1156 var flags base.SeekGEFlags 1157 if lastPositioningOp == seekPrefixGELastPositioningOp { 1158 if !i.hasPrefix { 1159 panic("lastPositioningOpsIsSeekPrefixGE is true, but hasPrefix is false") 1160 } 1161 // The iterator has not been repositioned after the last SeekPrefixGE. 1162 // See if we are seeking to a larger key, since then we can optimize 1163 // the seek by using next. Note that we could also optimize if Next 1164 // has been called, if the iterator is not exhausted and the current 1165 // position is <= the seek key. We are keeping this limited for now 1166 // since such optimizations require care for correctness, and to not 1167 // become de-optimizations (if one usually has to do all the next 1168 // calls and then the seek). This SeekPrefixGE optimization 1169 // specifically benefits CockroachDB. 1170 cmp := i.cmp(i.prefixOrFullSeekKey, keyPrefix) 1171 // cmp == 0 is not safe to optimize since 1172 // - i.pos could be at iterPosNext, due to a merge. 1173 // - Even if i.pos were at iterPosCurForward, we could have a DELETE, 1174 // SET pair for a key, and the iterator would have moved past DELETE 1175 // but stayed at iterPosCurForward. A similar situation occurs for a 1176 // MERGE, SET pair where the MERGE is consumed and the iterator is 1177 // at the SET. 1178 // In general some versions of i.prefix could have been consumed by 1179 // the iterator, so we only optimize for cmp < 0. 1180 if cmp < 0 { 1181 flags = flags.EnableTrySeekUsingNext() 1182 } 1183 if invariants.Enabled && flags.TrySeekUsingNext() && !i.forceEnableSeekOpt && disableSeekOpt(key, uintptr(unsafe.Pointer(i))) { 1184 flags = flags.DisableTrySeekUsingNext() 1185 } 1186 } 1187 // Make a copy of the prefix so that modifications to the key after 1188 // SeekPrefixGE returns does not affect the stored prefix. 1189 if cap(i.prefixOrFullSeekKey) < prefixLen { 1190 i.prefixOrFullSeekKey = make([]byte, prefixLen) 1191 } else { 1192 i.prefixOrFullSeekKey = i.prefixOrFullSeekKey[:prefixLen] 1193 } 1194 i.hasPrefix = true 1195 copy(i.prefixOrFullSeekKey, keyPrefix) 1196 1197 if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 { 1198 if n := i.split(lowerBound); !bytes.Equal(i.prefixOrFullSeekKey, lowerBound[:n]) { 1199 i.err = errors.New("bitalostable: SeekPrefixGE supplied with key outside of lower bound") 1200 i.iterValidityState = IterExhausted 1201 return false 1202 } 1203 key = lowerBound 1204 } else if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 { 1205 if n := i.split(upperBound); !bytes.Equal(i.prefixOrFullSeekKey, upperBound[:n]) { 1206 i.err = errors.New("bitalostable: SeekPrefixGE supplied with key outside of upper bound") 1207 i.iterValidityState = IterExhausted 1208 return false 1209 } 1210 key = upperBound 1211 } 1212 i.iterKey, i.iterValue = i.iter.SeekPrefixGE(i.prefixOrFullSeekKey, key, flags) 1213 i.stats.ForwardSeekCount[InternalIterCall]++ 1214 i.findNextEntry(nil) 1215 i.maybeSampleRead() 1216 if i.Error() == nil { 1217 i.lastPositioningOp = seekPrefixGELastPositioningOp 1218 } 1219 return i.iterValidityState == IterValid 1220 } 1221 1222 // Deterministic disabling of the seek optimization. It uses the iterator 1223 // pointer, since we want diversity in iterator behavior for the same key. 1224 // Used for tests. 1225 func disableSeekOpt(key []byte, ptr uintptr) bool { 1226 // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ 1227 simpleHash := (11400714819323198485 * uint64(ptr)) >> 63 1228 return key != nil && key[0]&byte(1) == 0 && simpleHash == 0 1229 } 1230 1231 // SeekLT moves the iterator to the last key/value pair whose key is less than 1232 // the given key. Returns true if the iterator is pointing at a valid entry and 1233 // false otherwise. 1234 func (i *Iterator) SeekLT(key []byte) bool { 1235 return i.SeekLTWithLimit(key, nil) == IterValid 1236 } 1237 1238 // SeekLTWithLimit moves the iterator to the last key/value pair whose key is 1239 // less than the given key. 1240 // 1241 // If limit is provided, it serves as a best-effort inclusive limit. If the last 1242 // key less than the given search key is also less than limit, the Iterator may 1243 // pause and return IterAtLimit. Because limits are best-effort, SeekLTWithLimit 1244 // may return a key beyond limit. 1245 // 1246 // If the Iterator is configured to iterate over range keys, SeekLTWithLimit 1247 // guarantees it will surface any range keys with bounds overlapping the 1248 // keyspace up to limit. 1249 func (i *Iterator) SeekLTWithLimit(key []byte, limit []byte) IterValidityState { 1250 lastPositioningOp := i.lastPositioningOp 1251 // Set it to unknown, since this operation may not succeed, in which case 1252 // the SeekLT following this should not make any assumption about iterator 1253 // position. 1254 i.lastPositioningOp = unknownLastPositionOp 1255 i.requiresReposition = false 1256 i.err = nil 1257 i.stats.ReverseSeekCount[InterfaceCall]++ 1258 if upperBound := i.opts.GetUpperBound(); upperBound != nil && i.cmp(key, upperBound) > 0 { 1259 key = upperBound 1260 } else if lowerBound := i.opts.GetLowerBound(); lowerBound != nil && i.cmp(key, lowerBound) < 0 { 1261 key = lowerBound 1262 } 1263 if i.rangeKey != nil { 1264 i.rangeKey.updated = false 1265 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1266 } 1267 i.hasPrefix = false 1268 seekInternalIter := true 1269 // The following noop optimization only applies when i.batch == nil, since 1270 // an iterator over a batch is iterating over mutable data, that may have 1271 // changed since the last seek. 1272 if lastPositioningOp == seekLTLastPositioningOp && i.batch == nil { 1273 cmp := i.cmp(key, i.prefixOrFullSeekKey) 1274 // If this seek is to the same or earlier key, and the iterator is 1275 // already positioned there, this is a noop. This can be helpful for 1276 // sparse key spaces that have many deleted keys, where one can avoid 1277 // the overhead of iterating past them again and again. 1278 if cmp <= 0 { 1279 // NB: when pos != iterPosCurReversePaused, the invariant 1280 // documented earlier implies that iterValidityState != 1281 // IterAtLimit. 1282 if i.iterValidityState == IterExhausted || 1283 (i.iterValidityState == IterValid && i.cmp(i.key, key) < 0 && 1284 (limit == nil || i.cmp(limit, i.key) <= 0)) { 1285 if !invariants.Enabled || !disableSeekOpt(key, uintptr(unsafe.Pointer(i))) { 1286 i.lastPositioningOp = seekLTLastPositioningOp 1287 return i.iterValidityState 1288 } 1289 } 1290 if i.pos == iterPosCurReversePaused && i.cmp(i.iterKey.UserKey, key) < 0 { 1291 // Have some work to do, but don't need to seek, and we can 1292 // start doing findPrevEntry from i.iterKey. 1293 seekInternalIter = false 1294 } 1295 } 1296 } 1297 if seekInternalIter { 1298 i.iterKey, i.iterValue = i.iter.SeekLT(key, base.SeekLTFlagsNone) 1299 i.stats.ReverseSeekCount[InternalIterCall]++ 1300 } 1301 i.findPrevEntry(limit) 1302 i.maybeSampleRead() 1303 if i.Error() == nil && i.batch == nil { 1304 // Prepare state for a future noop optimization. 1305 i.prefixOrFullSeekKey = append(i.prefixOrFullSeekKey[:0], key...) 1306 i.lastPositioningOp = seekLTLastPositioningOp 1307 } 1308 return i.iterValidityState 1309 } 1310 1311 // First moves the iterator the the first key/value pair. Returns true if the 1312 // iterator is pointing at a valid entry and false otherwise. 1313 func (i *Iterator) First() bool { 1314 i.err = nil 1315 i.hasPrefix = false 1316 i.lastPositioningOp = unknownLastPositionOp 1317 i.requiresReposition = false 1318 i.stats.ForwardSeekCount[InterfaceCall]++ 1319 if i.rangeKey != nil { 1320 i.rangeKey.updated = false 1321 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1322 } 1323 1324 if lowerBound := i.opts.GetLowerBound(); lowerBound != nil { 1325 i.iterKey, i.iterValue = i.iter.SeekGE(lowerBound, base.SeekGEFlagsNone) 1326 i.stats.ForwardSeekCount[InternalIterCall]++ 1327 } else { 1328 i.iterKey, i.iterValue = i.iter.First() 1329 i.stats.ForwardSeekCount[InternalIterCall]++ 1330 } 1331 i.findNextEntry(nil) 1332 i.maybeSampleRead() 1333 return i.iterValidityState == IterValid 1334 } 1335 1336 // Last moves the iterator the the last key/value pair. Returns true if the 1337 // iterator is pointing at a valid entry and false otherwise. 1338 func (i *Iterator) Last() bool { 1339 i.err = nil 1340 i.hasPrefix = false 1341 i.lastPositioningOp = unknownLastPositionOp 1342 i.requiresReposition = false 1343 i.stats.ReverseSeekCount[InterfaceCall]++ 1344 if i.rangeKey != nil { 1345 i.rangeKey.updated = false 1346 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1347 } 1348 1349 if upperBound := i.opts.GetUpperBound(); upperBound != nil { 1350 i.iterKey, i.iterValue = i.iter.SeekLT(upperBound, base.SeekLTFlagsNone) 1351 i.stats.ReverseSeekCount[InternalIterCall]++ 1352 } else { 1353 i.iterKey, i.iterValue = i.iter.Last() 1354 i.stats.ReverseSeekCount[InternalIterCall]++ 1355 } 1356 i.findPrevEntry(nil) 1357 i.maybeSampleRead() 1358 return i.iterValidityState == IterValid 1359 } 1360 1361 // Next moves the iterator to the next key/value pair. Returns true if the 1362 // iterator is pointing at a valid entry and false otherwise. 1363 func (i *Iterator) Next() bool { 1364 return i.NextWithLimit(nil) == IterValid 1365 } 1366 1367 // NextWithLimit moves the iterator to the next key/value pair. 1368 // 1369 // If limit is provided, it serves as a best-effort exclusive limit. If the next 1370 // key is greater than or equal to limit, the Iterator may pause and return 1371 // IterAtLimit. Because limits are best-effort, NextWithLimit may return a key 1372 // beyond limit. 1373 // 1374 // If the Iterator is configured to iterate over range keys, NextWithLimit 1375 // guarantees it will surface any range keys with bounds overlapping the 1376 // keyspace up to limit. 1377 func (i *Iterator) NextWithLimit(limit []byte) IterValidityState { 1378 i.stats.ForwardStepCount[InterfaceCall]++ 1379 if i.hasPrefix { 1380 if limit != nil { 1381 i.err = errors.New("cannot use limit with prefix iteration") 1382 i.iterValidityState = IterExhausted 1383 return i.iterValidityState 1384 } else if i.iterValidityState == IterExhausted { 1385 // No-op, already exhasuted. We avoid executing the Next because it 1386 // can break invariants: Specifically, a file that fails the bloom 1387 // filter test may result in its level being removed from the 1388 // merging iterator. The level's removal can cause a lazy combined 1389 // iterator to miss range keys and trigger a switch to combined 1390 // iteration at a larger key, breaking keyspan invariants. 1391 return i.iterValidityState 1392 } 1393 } 1394 if i.err != nil { 1395 return i.iterValidityState 1396 } 1397 i.lastPositioningOp = unknownLastPositionOp 1398 i.requiresReposition = false 1399 if i.rangeKey != nil { 1400 i.rangeKey.updated = false 1401 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1402 } 1403 switch i.pos { 1404 case iterPosCurForward: 1405 i.nextUserKey() 1406 case iterPosCurForwardPaused: 1407 // Already at the right place. 1408 case iterPosCurReverse: 1409 // Switching directions. 1410 // Unless the iterator was exhausted, reverse iteration needs to 1411 // position the iterator at iterPosPrev. 1412 if i.iterKey != nil { 1413 i.err = errors.New("switching from reverse to forward but iter is not at prev") 1414 i.iterValidityState = IterExhausted 1415 return i.iterValidityState 1416 } 1417 // We're positioned before the first key. Need to reposition to point to 1418 // the first key. 1419 if lowerBound := i.opts.GetLowerBound(); lowerBound != nil { 1420 i.iterKey, i.iterValue = i.iter.SeekGE(lowerBound, base.SeekGEFlagsNone) 1421 i.stats.ForwardSeekCount[InternalIterCall]++ 1422 } else { 1423 i.iterKey, i.iterValue = i.iter.First() 1424 i.stats.ForwardSeekCount[InternalIterCall]++ 1425 } 1426 case iterPosCurReversePaused: 1427 // Switching directions. 1428 // The iterator must not be exhausted since it paused. 1429 if i.iterKey == nil { 1430 i.err = errors.New("switching paused from reverse to forward but iter is exhausted") 1431 i.iterValidityState = IterExhausted 1432 return i.iterValidityState 1433 } 1434 i.nextUserKey() 1435 case iterPosPrev: 1436 // The underlying iterator is pointed to the previous key (this can 1437 // only happen when switching iteration directions). We set 1438 // i.iterValidityState to IterExhausted here to force the calls to 1439 // nextUserKey to save the current key i.iter is pointing at in order 1440 // to determine when the next user-key is reached. 1441 i.iterValidityState = IterExhausted 1442 if i.iterKey == nil { 1443 // We're positioned before the first key. Need to reposition to point to 1444 // the first key. 1445 if lowerBound := i.opts.GetLowerBound(); lowerBound != nil { 1446 i.iterKey, i.iterValue = i.iter.SeekGE(lowerBound, base.SeekGEFlagsNone) 1447 i.stats.ForwardSeekCount[InternalIterCall]++ 1448 } else { 1449 i.iterKey, i.iterValue = i.iter.First() 1450 i.stats.ForwardSeekCount[InternalIterCall]++ 1451 } 1452 } else { 1453 i.nextUserKey() 1454 } 1455 i.nextUserKey() 1456 case iterPosNext: 1457 // Already at the right place. 1458 } 1459 i.findNextEntry(limit) 1460 i.maybeSampleRead() 1461 return i.iterValidityState 1462 } 1463 1464 // Prev moves the iterator to the previous key/value pair. Returns true if the 1465 // iterator is pointing at a valid entry and false otherwise. 1466 func (i *Iterator) Prev() bool { 1467 return i.PrevWithLimit(nil) == IterValid 1468 } 1469 1470 // PrevWithLimit moves the iterator to the previous key/value pair. 1471 // 1472 // If limit is provided, it serves as a best-effort inclusive limit. If the 1473 // previous key is less than limit, the Iterator may pause and return 1474 // IterAtLimit. Because limits are best-effort, PrevWithLimit may return a key 1475 // beyond limit. 1476 // 1477 // If the Iterator is configured to iterate over range keys, PrevWithLimit 1478 // guarantees it will surface any range keys with bounds overlapping the 1479 // keyspace up to limit. 1480 func (i *Iterator) PrevWithLimit(limit []byte) IterValidityState { 1481 i.stats.ReverseStepCount[InterfaceCall]++ 1482 if i.err != nil { 1483 return i.iterValidityState 1484 } 1485 i.lastPositioningOp = unknownLastPositionOp 1486 i.requiresReposition = false 1487 if i.rangeKey != nil { 1488 i.rangeKey.updated = false 1489 i.rangeKey.prevPosHadRangeKey = i.rangeKey.hasRangeKey && i.Valid() 1490 } 1491 if i.hasPrefix { 1492 i.err = errReversePrefixIteration 1493 i.iterValidityState = IterExhausted 1494 return i.iterValidityState 1495 } 1496 switch i.pos { 1497 case iterPosCurForward: 1498 // Switching directions, and will handle this below. 1499 case iterPosCurForwardPaused: 1500 // Switching directions, and will handle this below. 1501 case iterPosCurReverse: 1502 i.prevUserKey() 1503 case iterPosCurReversePaused: 1504 // Already at the right place. 1505 case iterPosNext: 1506 // The underlying iterator is pointed to the next key (this can only happen 1507 // when switching iteration directions). We will handle this below. 1508 case iterPosPrev: 1509 // Already at the right place. 1510 } 1511 if i.pos == iterPosCurForward || i.pos == iterPosNext || i.pos == iterPosCurForwardPaused { 1512 // Switching direction. 1513 stepAgain := i.pos == iterPosNext 1514 1515 // Synthetic range key markers are a special case. Consider SeekGE(b) 1516 // which finds a range key [a, c). To ensure the user observes the range 1517 // key, the Iterator pauses at Key() = b. The iterator must advance the 1518 // internal iterator to see if there's also a coincident point key at 1519 // 'b', leaving the iterator at iterPosNext if there's not. 1520 // 1521 // This is a problem: Synthetic range key markers are only interleaved 1522 // during the original seek. A subsequent Prev() of i.iter will not move 1523 // back onto the synthetic range key marker. In this case where the 1524 // previous iterator position was a synthetic range key start boundary, 1525 // we must not step a second time. 1526 if i.isEphemeralPosition() { 1527 stepAgain = false 1528 } 1529 1530 // We set i.iterValidityState to IterExhausted here to force the calls 1531 // to prevUserKey to save the current key i.iter is pointing at in 1532 // order to determine when the prev user-key is reached. 1533 i.iterValidityState = IterExhausted 1534 if i.iterKey == nil { 1535 // We're positioned after the last key. Need to reposition to point to 1536 // the last key. 1537 if upperBound := i.opts.GetUpperBound(); upperBound != nil { 1538 i.iterKey, i.iterValue = i.iter.SeekLT(upperBound, base.SeekLTFlagsNone) 1539 i.stats.ReverseSeekCount[InternalIterCall]++ 1540 } else { 1541 i.iterKey, i.iterValue = i.iter.Last() 1542 i.stats.ReverseSeekCount[InternalIterCall]++ 1543 } 1544 } else { 1545 i.prevUserKey() 1546 } 1547 if stepAgain { 1548 i.prevUserKey() 1549 } 1550 } 1551 i.findPrevEntry(limit) 1552 i.maybeSampleRead() 1553 return i.iterValidityState 1554 } 1555 1556 // RangeKeyData describes a range key's data, set through RangeKeySet. The key 1557 // boundaries of the range key is provided by Iterator.RangeBounds. 1558 type RangeKeyData struct { 1559 Suffix []byte 1560 Value []byte 1561 } 1562 1563 // rangeKeyWithinLimit is called during limited reverse iteration when 1564 // positioned over a key beyond the limit. If there exists a range key that lies 1565 // within the limit, the iterator must not pause in order to ensure the user has 1566 // an opportunity to observe the range key within limit. 1567 // 1568 // It would be valid to ignore the limit whenever there's a range key covering 1569 // the key, but that would introduce nondeterminism. To preserve determinism for 1570 // testing, the iterator ignores the limit only if the covering range key does 1571 // cover the keyspace within the limit. 1572 // 1573 // This awkwardness exists because range keys are interleaved at their inclusive 1574 // start positions. Note that limit is inclusive. 1575 func (i *Iterator) rangeKeyWithinLimit(limit []byte) bool { 1576 if i.rangeKey == nil || !i.opts.rangeKeys() { 1577 return false 1578 } 1579 s := i.rangeKey.iiter.Span() 1580 // If the range key ends beyond the limit, then the range key does not cover 1581 // any portion of the keyspace within the limit and it is safe to pause. 1582 return s != nil && i.cmp(s.End, limit) > 0 1583 } 1584 1585 // saveRangeKey saves the current range key to the underlying iterator's current 1586 // range key state. If the range key has not changed, saveRangeKey is a no-op. 1587 // If there is a new range key, saveRangeKey copies all of the key, value and 1588 // suffixes into Iterator-managed buffers. 1589 func (i *Iterator) saveRangeKey() { 1590 if i.rangeKey == nil || i.opts.KeyTypes == IterKeyTypePointsOnly { 1591 return 1592 } 1593 1594 s := i.rangeKey.iiter.Span() 1595 if s == nil { 1596 i.rangeKey.hasRangeKey = false 1597 i.rangeKey.updated = i.rangeKey.prevPosHadRangeKey 1598 return 1599 } else if !i.rangeKey.stale { 1600 // The range key `s` is identical to the one currently saved. No-op. 1601 return 1602 } 1603 1604 if s.KeysOrder != keyspan.BySuffixAsc { 1605 panic("bitalostable: range key span's keys unexpectedly not in ascending suffix order") 1606 } 1607 1608 // Although `i.rangeKey.stale` is true, the span s may still be identical 1609 // to the currently saved span. This is possible when seeking the iterator, 1610 // which may land back on the same range key. If we previously had a range 1611 // key and the new one has an identical start key, then it must be the same 1612 // range key and we can avoid copying and keep `i.rangeKey.updated=false`. 1613 // 1614 // TODO(jackson): These key comparisons could be avoidable during relative 1615 // positioning operations continuing in the same direction, because these 1616 // ops will never encounter the previous position's range key while 1617 // stale=true. However, threading whether the current op is a seek or step 1618 // maybe isn't worth it. This key comparison is only necessary once when we 1619 // step onto a new range key, which should be relatively rare. 1620 if i.rangeKey.prevPosHadRangeKey && i.equal(i.rangeKey.start, s.Start) && 1621 i.equal(i.rangeKey.end, s.End) { 1622 i.rangeKey.updated = false 1623 i.rangeKey.stale = false 1624 i.rangeKey.hasRangeKey = true 1625 return 1626 } 1627 1628 i.rangeKey.hasRangeKey = true 1629 i.rangeKey.updated = true 1630 i.rangeKey.stale = false 1631 i.rangeKey.buf = append(i.rangeKey.buf[:0], s.Start...) 1632 i.rangeKey.start = i.rangeKey.buf 1633 i.rangeKey.buf = append(i.rangeKey.buf, s.End...) 1634 i.rangeKey.end = i.rangeKey.buf[len(i.rangeKey.buf)-len(s.End):] 1635 i.rangeKey.keys = i.rangeKey.keys[:0] 1636 for j := 0; j < len(s.Keys); j++ { 1637 if invariants.Enabled { 1638 if s.Keys[j].Kind() != base.InternalKeyKindRangeKeySet { 1639 panic("bitalostable: user iteration encountered non-RangeKeySet key kind") 1640 } else if j > 0 && i.cmp(s.Keys[j].Suffix, s.Keys[j-1].Suffix) < 0 { 1641 panic("bitalostable: user iteration encountered range keys not in suffix order") 1642 } 1643 } 1644 i.rangeKey.buf = append(i.rangeKey.buf, s.Keys[j].Suffix...) 1645 suffix := i.rangeKey.buf[len(i.rangeKey.buf)-len(s.Keys[j].Suffix):] 1646 i.rangeKey.buf = append(i.rangeKey.buf, s.Keys[j].Value...) 1647 value := i.rangeKey.buf[len(i.rangeKey.buf)-len(s.Keys[j].Value):] 1648 i.rangeKey.keys = append(i.rangeKey.keys, RangeKeyData{ 1649 Suffix: suffix, 1650 Value: value, 1651 }) 1652 } 1653 } 1654 1655 // RangeKeyChanged indicates whether the most recent iterator positioning 1656 // operation resulted in the iterator stepping into or out of a new range key. 1657 // If true, previously returned range key bounds and data has been invalidated. 1658 // If false, previously obtained range key bounds, suffix and value slices are 1659 // still valid and may continue to be read. 1660 // 1661 // Invalid iterator positions are considered to not hold range keys, meaning 1662 // that if an iterator steps from an IterExhausted or IterAtLimit position onto 1663 // a position with a range key, RangeKeyChanged will yield true. 1664 func (i *Iterator) RangeKeyChanged() bool { 1665 return i.iterValidityState == IterValid && i.rangeKey != nil && i.rangeKey.updated 1666 } 1667 1668 // HasPointAndRange indicates whether there exists a point key, a range key or 1669 // both at the current iterator position. 1670 func (i *Iterator) HasPointAndRange() (hasPoint, hasRange bool) { 1671 if i.iterValidityState != IterValid || i.requiresReposition { 1672 return false, false 1673 } 1674 if i.opts.KeyTypes == IterKeyTypePointsOnly { 1675 return true, false 1676 } 1677 return i.rangeKey == nil || !i.rangeKey.rangeKeyOnly, i.rangeKey != nil && i.rangeKey.hasRangeKey 1678 } 1679 1680 // RangeBounds returns the start (inclusive) and end (exclusive) bounds of the 1681 // range key covering the current iterator position. RangeBounds returns nil 1682 // bounds if there is no range key covering the current iterator position, or 1683 // the iterator is not configured to surface range keys. 1684 func (i *Iterator) RangeBounds() (start, end []byte) { 1685 if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey { 1686 return nil, nil 1687 } 1688 return i.rangeKey.start, i.rangeKey.end 1689 } 1690 1691 // Key returns the key of the current key/value pair, or nil if done. The 1692 // caller should not modify the contents of the returned slice, and its 1693 // contents may change on the next call to Next. 1694 func (i *Iterator) Key() []byte { 1695 return i.key 1696 } 1697 1698 // Value returns the value of the current key/value pair, or nil if done. The 1699 // caller should not modify the contents of the returned slice, and its 1700 // contents may change on the next call to Next. 1701 // 1702 // Only valid if HasPointAndRange() returns true for hasPoint. 1703 func (i *Iterator) Value() []byte { 1704 return i.value 1705 } 1706 1707 // RangeKeys returns the range key values and their suffixes covering the 1708 // current iterator position. The range bounds may be retrieved separately 1709 // through Iterator.RangeBounds(). 1710 func (i *Iterator) RangeKeys() []RangeKeyData { 1711 if i.rangeKey == nil || !i.opts.rangeKeys() || !i.rangeKey.hasRangeKey { 1712 return nil 1713 } 1714 return i.rangeKey.keys 1715 } 1716 1717 // Valid returns true if the iterator is positioned at a valid key/value pair 1718 // and false otherwise. 1719 func (i *Iterator) Valid() bool { 1720 return i.iterValidityState == IterValid && !i.requiresReposition 1721 } 1722 1723 // Error returns any accumulated error. 1724 func (i *Iterator) Error() error { 1725 if i.iter != nil { 1726 return firstError(i.err, i.iter.Error()) 1727 } 1728 return i.err 1729 } 1730 1731 // Close closes the iterator and returns any accumulated error. Exhausting 1732 // all the key/value pairs in a table is not considered to be an error. 1733 // It is not valid to call any method, including Close, after the iterator 1734 // has been closed. 1735 func (i *Iterator) Close() error { 1736 // Close the child iterator before releasing the readState because when the 1737 // readState is released sstables referenced by the readState may be deleted 1738 // which will fail on Windows if the sstables are still open by the child 1739 // iterator. 1740 if i.iter != nil { 1741 i.err = firstError(i.err, i.iter.Close()) 1742 1743 // Closing i.iter did not necessarily close the point and range key 1744 // iterators. Calls to SetOptions may have 'disconnected' either one 1745 // from i.iter if iteration key types were changed. Both point and range 1746 // key iterators are preserved in case the iterator needs to switch key 1747 // types again. We explicitly close both of these iterators here. 1748 // 1749 // NB: If the iterators were still connected to i.iter, they may be 1750 // closed, but calling Close on a closed internal iterator or fragment 1751 // iterator is allowed. 1752 if i.pointIter != nil && !i.closePointIterOnce { 1753 i.err = firstError(i.err, i.pointIter.Close()) 1754 } 1755 if i.rangeKey != nil && i.rangeKey.rangeKeyIter != nil { 1756 i.err = firstError(i.err, i.rangeKey.rangeKeyIter.Close()) 1757 } 1758 } 1759 err := i.err 1760 1761 if i.readState != nil { 1762 if i.readSampling.pendingCompactions.size > 0 { 1763 // Copy pending read compactions using db.mu.Lock() 1764 i.readState.db.mu.Lock() 1765 i.readState.db.mu.compact.readCompactions.combine(&i.readSampling.pendingCompactions, i.cmp) 1766 reschedule := i.readState.db.mu.compact.rescheduleReadCompaction 1767 i.readState.db.mu.compact.rescheduleReadCompaction = false 1768 concurrentCompactions := i.readState.db.mu.compact.compactingCount 1769 i.readState.db.mu.Unlock() 1770 1771 if reschedule && concurrentCompactions == 0 { 1772 // In a read heavy workload, flushes may not happen frequently enough to 1773 // schedule compactions. 1774 i.readState.db.compactionSchedulers.Add(1) 1775 go i.readState.db.maybeScheduleCompactionAsync() 1776 } 1777 } 1778 1779 i.readState.unref() 1780 i.readState = nil 1781 } 1782 1783 for _, readers := range i.externalReaders { 1784 for _, r := range readers { 1785 err = firstError(err, r.Close()) 1786 } 1787 } 1788 1789 // Close the closer for the current value if one was open. 1790 if i.valueCloser != nil { 1791 err = firstError(err, i.valueCloser.Close()) 1792 i.valueCloser = nil 1793 } 1794 1795 const maxKeyBufCacheSize = 4 << 10 // 4 KB 1796 1797 if i.rangeKey != nil { 1798 // Avoid caching the key buf if it is overly large. The constant is 1799 // fairly arbitrary. 1800 if cap(i.rangeKey.buf) >= maxKeyBufCacheSize { 1801 i.rangeKey.buf = nil 1802 } 1803 *i.rangeKey = iteratorRangeKeyState{buf: i.rangeKey.buf} 1804 iterRangeKeyStateAllocPool.Put(i.rangeKey) 1805 i.rangeKey = nil 1806 } 1807 if alloc := i.alloc; alloc != nil { 1808 // Avoid caching the key buf if it is overly large. The constant is fairly 1809 // arbitrary. 1810 if cap(i.keyBuf) >= maxKeyBufCacheSize { 1811 alloc.keyBuf = nil 1812 } else { 1813 alloc.keyBuf = i.keyBuf 1814 } 1815 if cap(i.prefixOrFullSeekKey) >= maxKeyBufCacheSize { 1816 alloc.prefixOrFullSeekKey = nil 1817 } else { 1818 alloc.prefixOrFullSeekKey = i.prefixOrFullSeekKey 1819 } 1820 for j := range i.boundsBuf { 1821 if cap(i.boundsBuf[j]) >= maxKeyBufCacheSize { 1822 alloc.boundsBuf[j] = nil 1823 } else { 1824 alloc.boundsBuf[j] = i.boundsBuf[j] 1825 } 1826 } 1827 *alloc = iterAlloc{ 1828 keyBuf: alloc.keyBuf, 1829 boundsBuf: alloc.boundsBuf, 1830 prefixOrFullSeekKey: alloc.prefixOrFullSeekKey, 1831 } 1832 iterAllocPool.Put(alloc) 1833 } else if alloc := i.getIterAlloc; alloc != nil { 1834 if cap(i.keyBuf) >= maxKeyBufCacheSize { 1835 alloc.keyBuf = nil 1836 } else { 1837 alloc.keyBuf = i.keyBuf 1838 } 1839 *alloc = getIterAlloc{ 1840 keyBuf: alloc.keyBuf, 1841 } 1842 getIterAllocPool.Put(alloc) 1843 } 1844 return err 1845 } 1846 1847 // SetBounds sets the lower and upper bounds for the iterator. Once SetBounds 1848 // returns, the caller is free to mutate the provided slices. 1849 // 1850 // The iterator will always be invalidated and must be repositioned with a call 1851 // to SeekGE, SeekPrefixGE, SeekLT, First, or Last. 1852 func (i *Iterator) SetBounds(lower, upper []byte) { 1853 // Ensure that the Iterator appears exhausted, regardless of whether we 1854 // actually have to invalidate the internal iterator. Optimizations that 1855 // avoid exhaustion are an internal implementation detail that shouldn't 1856 // leak through the interface. The caller should still call an absolute 1857 // positioning method to reposition the iterator. 1858 i.requiresReposition = true 1859 1860 if ((i.opts.LowerBound == nil) == (lower == nil)) && 1861 ((i.opts.UpperBound == nil) == (upper == nil)) && 1862 i.equal(i.opts.LowerBound, lower) && 1863 i.equal(i.opts.UpperBound, upper) { 1864 // Unchanged, noop. 1865 return 1866 } 1867 1868 // Copy the user-provided bounds into an Iterator-owned buffer, and set them 1869 // on i.opts.{Lower,Upper}Bound. 1870 i.saveBounds(lower, upper) 1871 1872 i.iter.SetBounds(i.opts.LowerBound, i.opts.UpperBound) 1873 // If the iterator has an open point iterator that's not currently being 1874 // used, propagate the new bounds to it. 1875 if i.pointIter != nil && !i.opts.pointKeys() { 1876 i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound) 1877 } 1878 // If the iterator has a range key iterator, propagate bounds to it. The 1879 // top-level SetBounds on the interleaving iterator (i.iter) won't propagate 1880 // bounds to the range key iterator stack, because the FragmentIterator 1881 // interface doesn't define a SetBounds method. We need to directly inform 1882 // the iterConfig stack. 1883 if i.rangeKey != nil { 1884 i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound) 1885 } 1886 1887 // Even though this is not a positioning operation, the alteration of the 1888 // bounds means we cannot optimize Seeks by using Next. 1889 i.invalidate() 1890 } 1891 1892 func (i *Iterator) saveBounds(lower, upper []byte) { 1893 // Copy the user-provided bounds into an Iterator-owned buffer. We can't 1894 // overwrite the current bounds, because some internal iterators compare old 1895 // and new bounds for optimizations. 1896 1897 buf := i.boundsBuf[i.boundsBufIdx][:0] 1898 if lower != nil { 1899 buf = append(buf, lower...) 1900 i.opts.LowerBound = buf 1901 } else { 1902 i.opts.LowerBound = nil 1903 } 1904 if upper != nil { 1905 buf = append(buf, upper...) 1906 i.opts.UpperBound = buf[len(buf)-len(upper):] 1907 } else { 1908 i.opts.UpperBound = nil 1909 } 1910 i.boundsBuf[i.boundsBufIdx] = buf 1911 i.boundsBufIdx = 1 - i.boundsBufIdx 1912 } 1913 1914 // SetOptions sets new iterator options for the iterator. Note that the lower 1915 // and upper bounds applied here will supersede any bounds set by previous calls 1916 // to SetBounds. 1917 // 1918 // Note that the slices provided in this SetOptions must not be changed by the 1919 // caller until the iterator is closed, or a subsequent SetBounds or SetOptions 1920 // has returned. This is because comparisons between the existing and new bounds 1921 // are sometimes used to optimize seeking. See the extended commentary on 1922 // SetBounds. 1923 // 1924 // If the iterator was created over an indexed mutable batch, the iterator's 1925 // view of the mutable batch is refreshed. 1926 // 1927 // The iterator will always be invalidated and must be repositioned with a call 1928 // to SeekGE, SeekPrefixGE, SeekLT, First, or Last. 1929 // 1930 // If only lower and upper bounds need to be modified, prefer SetBounds. 1931 func (i *Iterator) SetOptions(o *IterOptions) { 1932 if i.externalReaders != nil { 1933 if err := validateExternalIterOpts(o); err != nil { 1934 panic(err) 1935 } 1936 } 1937 1938 // Ensure that the Iterator appears exhausted, regardless of whether we 1939 // actually have to invalidate the internal iterator. Optimizations that 1940 // avoid exhaustion are an internal implementation detail that shouldn't 1941 // leak through the interface. The caller should still call an absolute 1942 // positioning method to reposition the iterator. 1943 i.requiresReposition = true 1944 1945 // Check if global state requires we close all internal iterators. 1946 // 1947 // If the Iterator is in an error state, invalidate the existing iterators 1948 // so that we reconstruct an iterator state from scratch. 1949 // 1950 // If OnlyReadGuaranteedDurable changed, the iterator stacks are incorrect, 1951 // improperly including or excluding memtables. Invalidate them so that 1952 // finishInitializingIter will reconstruct them. 1953 // 1954 // If either the original options or the new options specify a table filter, 1955 // we need to reconstruct the iterator stacks. If they both supply a table 1956 // filter, we can't be certain that it's the same filter since we have no 1957 // mechanism to compare the filter closures. 1958 closeBoth := i.err != nil || 1959 o.OnlyReadGuaranteedDurable != i.opts.OnlyReadGuaranteedDurable || 1960 o.TableFilter != nil || i.opts.TableFilter != nil 1961 1962 // If either options specify block property filters for an iterator stack, 1963 // reconstruct it. 1964 if i.pointIter != nil && (closeBoth || len(o.PointKeyFilters) > 0 || len(i.opts.PointKeyFilters) > 0 || 1965 o.RangeKeyMasking.Filter != nil || i.opts.RangeKeyMasking.Filter != nil) { 1966 i.err = firstError(i.err, i.pointIter.Close()) 1967 i.pointIter = nil 1968 } 1969 if i.rangeKey != nil { 1970 if closeBoth || len(o.RangeKeyFilters) > 0 || len(i.opts.RangeKeyFilters) > 0 { 1971 i.err = firstError(i.err, i.rangeKey.rangeKeyIter.Close()) 1972 i.rangeKey = nil 1973 } else { 1974 // If there's still a range key iterator stack, invalidate the 1975 // iterator. This ensures RangeKeyChanged() returns true if a 1976 // subsequent positioning operation discovers a range key. It also 1977 // prevents seek no-op optimizations. 1978 i.invalidate() 1979 } 1980 } 1981 1982 // If the iterator is backed by a batch that's been mutated, refresh its 1983 // existing point and range-key iterators, and invalidate the iterator to 1984 // prevent seek-using-next optimizations. If we don't yet have a point-key 1985 // iterator or range-key iterator but we require one, it'll be created in 1986 // the slow path that reconstructs the iterator in finishInitializingIter. 1987 if i.batch != nil { 1988 nextBatchSeqNum := (uint64(len(i.batch.data)) | base.InternalKeySeqNumBatch) 1989 if nextBatchSeqNum != i.batchSeqNum { 1990 i.batchSeqNum = nextBatchSeqNum 1991 if i.pointIter != nil { 1992 if i.batch.countRangeDels == 0 { 1993 // No range deletions exist in the batch. We only need to 1994 // update the batchIter's snapshot. 1995 i.batchPointIter.snapshot = nextBatchSeqNum 1996 i.invalidate() 1997 } else if i.batchRangeDelIter.Count() == 0 { 1998 // When we constructed this iterator, there were no 1999 // rangedels in the batch. Iterator construction will have 2000 // excluded the batch rangedel iterator from the point 2001 // iterator stack. We need to reconstruct the point iterator 2002 // to add i.batchRangeDelIter into the iterator stack. 2003 i.err = firstError(i.err, i.pointIter.Close()) 2004 i.pointIter = nil 2005 } else { 2006 // There are range deletions in the batch and we already 2007 // have a batch rangedel iterator. We can update the batch 2008 // rangedel iterator in place. 2009 // 2010 // NB: There may or may not be new range deletions. We can't 2011 // tell based on i.batchRangeDelIter.Count(), which is the 2012 // count of fragmented range deletions, NOT the number of 2013 // range deletions written to the batch 2014 // [i.batch.countRangeDels]. 2015 i.batchPointIter.snapshot = nextBatchSeqNum 2016 i.batch.initRangeDelIter(&i.opts, &i.batchRangeDelIter, nextBatchSeqNum) 2017 i.invalidate() 2018 } 2019 } 2020 if i.rangeKey != nil && i.batch.countRangeKeys > 0 { 2021 if i.batchRangeKeyIter.Count() == 0 { 2022 // When we constructed this iterator, there were no range 2023 // keys in the batch. Iterator construction will have 2024 // excluded the batch rangekey iterator from the range key 2025 // iterator stack. We need to reconstruct the range key 2026 // iterator to add i.batchRangeKeyIter into the iterator 2027 // stack. 2028 i.err = firstError(i.err, i.rangeKey.rangeKeyIter.Close()) 2029 i.rangeKey = nil 2030 } else { 2031 // There are range keys in the batch and we already 2032 // have a batch rangekey iterator. We can update the batch 2033 // rangekey iterator in place. 2034 // 2035 // NB: There may or may not be new range keys. We can't 2036 // tell based on i.batchRangeKeyIter.Count(), which is the 2037 // count of fragmented range keys, NOT the number of 2038 // range keys written to the batch [i.batch.countRangeKeys]. 2039 i.batch.initRangeKeyIter(&i.opts, &i.batchRangeKeyIter, nextBatchSeqNum) 2040 i.invalidate() 2041 } 2042 } 2043 } 2044 } 2045 2046 // Reset combinedIterState.initialized in case the iterator key types 2047 // changed. If there's already a range key iterator stack, the combined 2048 // iterator is already initialized. Additionally, if the iterator is not 2049 // configured to include range keys, mark it as initialized to signal that 2050 // lower level iterators should not trigger a switch to combined iteration. 2051 i.lazyCombinedIter.combinedIterState = combinedIterState{ 2052 initialized: i.rangeKey != nil || !i.opts.rangeKeys(), 2053 } 2054 2055 boundsEqual := ((i.opts.LowerBound == nil) == (o.LowerBound == nil)) && 2056 ((i.opts.UpperBound == nil) == (o.UpperBound == nil)) && 2057 i.equal(i.opts.LowerBound, o.LowerBound) && 2058 i.equal(i.opts.UpperBound, o.UpperBound) 2059 2060 if boundsEqual && o.KeyTypes == i.opts.KeyTypes && 2061 (i.pointIter != nil || !i.opts.pointKeys()) && 2062 (i.rangeKey != nil || !i.opts.rangeKeys() || i.opts.KeyTypes == IterKeyTypePointsAndRanges) && 2063 i.equal(o.RangeKeyMasking.Suffix, i.opts.RangeKeyMasking.Suffix) && 2064 o.UseL6Filters == i.opts.UseL6Filters { 2065 // The options are identical, so we can likely use the fast path. In 2066 // addition to all the above constraints, we cannot use the fast path if 2067 // configured to perform lazy combined iteration but an indexed batch 2068 // used by the iterator now contains range keys. Lazy combined iteration 2069 // is not compatible with batch range keys because we always need to 2070 // merge the batch's range keys into iteration. 2071 if i.rangeKey != nil || !i.opts.rangeKeys() || i.batch == nil || i.batch.countRangeKeys == 0 { 2072 // Fast path. This preserves the Seek-using-Next optimizations as 2073 // long as the iterator wasn't already invalidated up above. 2074 return 2075 } 2076 } 2077 // Slow path. 2078 2079 // The options changed. Save the new ones to i.opts. 2080 if boundsEqual { 2081 // Copying the options into i.opts will overwrite LowerBound and 2082 // UpperBound fields with the user-provided slices. We need to hold on 2083 // to the Pebble-owned slices, so save them and re-set them after the 2084 // copy. 2085 lower, upper := i.opts.LowerBound, i.opts.UpperBound 2086 i.opts = *o 2087 i.opts.LowerBound, i.opts.UpperBound = lower, upper 2088 } else { 2089 i.opts = *o 2090 i.saveBounds(o.LowerBound, o.UpperBound) 2091 // Propagate the changed bounds to the existing point iterator. 2092 // NB: We propagate i.opts.{Lower,Upper}Bound, not o.{Lower,Upper}Bound 2093 // because i.opts now point to buffers owned by Pebble. 2094 if i.pointIter != nil { 2095 i.pointIter.SetBounds(i.opts.LowerBound, i.opts.UpperBound) 2096 } 2097 if i.rangeKey != nil { 2098 i.rangeKey.iterConfig.SetBounds(i.opts.LowerBound, i.opts.UpperBound) 2099 } 2100 } 2101 2102 // Even though this is not a positioning operation, the invalidation of the 2103 // iterator stack means we cannot optimize Seeks by using Next. 2104 i.invalidate() 2105 2106 // Iterators created through NewExternalIter have a different iterator 2107 // initialization process. 2108 if i.externalReaders != nil { 2109 finishInitializingExternal(i) 2110 return 2111 } 2112 finishInitializingIter(i.alloc) 2113 } 2114 2115 func (i *Iterator) invalidate() { 2116 i.lastPositioningOp = unknownLastPositionOp 2117 i.hasPrefix = false 2118 i.iterKey = nil 2119 i.iterValue = nil 2120 i.err = nil 2121 // This switch statement isn't necessary for correctness since callers 2122 // should call a repositioning method. We could have arbitrarily set i.pos 2123 // to one of the values. But it results in more intuitive behavior in 2124 // tests, which do not always reposition. 2125 switch i.pos { 2126 case iterPosCurForward, iterPosNext, iterPosCurForwardPaused: 2127 i.pos = iterPosCurForward 2128 case iterPosCurReverse, iterPosPrev, iterPosCurReversePaused: 2129 i.pos = iterPosCurReverse 2130 } 2131 i.iterValidityState = IterExhausted 2132 if i.rangeKey != nil { 2133 i.rangeKey.iiter.Invalidate() 2134 } 2135 } 2136 2137 // Metrics returns per-iterator metrics. 2138 func (i *Iterator) Metrics() IteratorMetrics { 2139 m := IteratorMetrics{ 2140 ReadAmp: 1, 2141 } 2142 if mi, ok := i.iter.(*mergingIter); ok { 2143 m.ReadAmp = len(mi.levels) 2144 } 2145 return m 2146 } 2147 2148 // ResetStats resets the stats to 0. 2149 func (i *Iterator) ResetStats() { 2150 i.stats = IteratorStats{} 2151 } 2152 2153 // Stats returns the current stats. 2154 func (i *Iterator) Stats() IteratorStats { 2155 return i.stats 2156 } 2157 2158 // CloneOptions configures an iterator constructed through Iterator.Clone. 2159 type CloneOptions struct { 2160 // IterOptions, if non-nil, define the iterator options to configure a 2161 // cloned iterator. If nil, the clone adopts the same IterOptions as the 2162 // iterator being cloned. 2163 IterOptions *IterOptions 2164 // RefreshBatchView may be set to true when cloning an Iterator over an 2165 // indexed batch. When false, the clone adopts the same (possibly stale) 2166 // view of the indexed batch as the cloned Iterator. When true, the clone is 2167 // constructed with a refreshed view of the batch, observing all of the 2168 // batch's mutations at the time of the Clone. If the cloned iterator was 2169 // not constructed to read over an indexed batch, RefreshVatchView has no 2170 // effect. 2171 RefreshBatchView bool 2172 } 2173 2174 // Clone creates a new Iterator over the same underlying data, i.e., over the 2175 // same {batch, memtables, sstables}). The resulting iterator is not positioned. 2176 // It starts with the same IterOptions, unless opts.IterOptions is set. 2177 // 2178 // When called on an Iterator over an indexed batch, the clone's visibility of 2179 // the indexed batch is determined by CloneOptions.RefreshBatchView. If false, 2180 // the clone inherits the iterator's current (possibly stale) view of the batch, 2181 // and callers may call SetOptions to subsequently refresh the clone's view to 2182 // include all batch mutations. If true, the clone is constructed with a 2183 // complete view of the indexed batch's mutations at the time of the Clone. 2184 // 2185 // Callers can use Clone if they need multiple iterators that need to see 2186 // exactly the same underlying state of the DB. This should not be used to 2187 // extend the lifetime of the data backing the original Iterator since that 2188 // will cause an increase in memory and disk usage (use NewSnapshot for that 2189 // purpose). 2190 func (i *Iterator) Clone(opts CloneOptions) (*Iterator, error) { 2191 if opts.IterOptions == nil { 2192 opts.IterOptions = &i.opts 2193 } 2194 2195 readState := i.readState 2196 if readState == nil { 2197 return nil, errors.Errorf("cannot Clone a closed Iterator") 2198 } 2199 // i is already holding a ref, so there is no race with unref here. 2200 readState.ref() 2201 // Bundle various structures under a single umbrella in order to allocate 2202 // them together. 2203 buf := iterAllocPool.Get().(*iterAlloc) 2204 dbi := &buf.dbi 2205 *dbi = Iterator{ 2206 opts: *opts.IterOptions, 2207 alloc: buf, 2208 merge: i.merge, 2209 comparer: i.comparer, 2210 readState: readState, 2211 keyBuf: buf.keyBuf, 2212 prefixOrFullSeekKey: buf.prefixOrFullSeekKey, 2213 boundsBuf: buf.boundsBuf, 2214 batch: i.batch, 2215 batchSeqNum: i.batchSeqNum, 2216 newIters: i.newIters, 2217 newIterRangeKey: i.newIterRangeKey, 2218 seqNum: i.seqNum, 2219 } 2220 dbi.saveBounds(dbi.opts.LowerBound, dbi.opts.UpperBound) 2221 2222 // If the caller requested the clone have a current view of the indexed 2223 // batch, set the clone's batch sequence number appropriately. 2224 if i.batch != nil && opts.RefreshBatchView { 2225 dbi.batchSeqNum = (uint64(len(i.batch.data)) | base.InternalKeySeqNumBatch) 2226 } 2227 2228 return finishInitializingIter(buf), nil 2229 } 2230 2231 func (stats *IteratorStats) String() string { 2232 return redact.StringWithoutMarkers(stats) 2233 } 2234 2235 // SafeFormat implements the redact.SafeFormatter interface. 2236 func (stats *IteratorStats) SafeFormat(s redact.SafePrinter, verb rune) { 2237 for i := range stats.ForwardStepCount { 2238 switch IteratorStatsKind(i) { 2239 case InterfaceCall: 2240 s.SafeString("(interface (dir, seek, step): ") 2241 case InternalIterCall: 2242 s.SafeString(", (internal (dir, seek, step): ") 2243 } 2244 s.Printf("(fwd, %d, %d), (rev, %d, %d))", 2245 redact.Safe(stats.ForwardSeekCount[i]), redact.Safe(stats.ForwardStepCount[i]), 2246 redact.Safe(stats.ReverseSeekCount[i]), redact.Safe(stats.ReverseStepCount[i])) 2247 } 2248 if stats.InternalStats != (InternalIteratorStats{}) { 2249 s.SafeString(",\n(internal-stats: ") 2250 s.Printf("(block-bytes: (total %s, cached %s)), "+ 2251 "(points: (count %s, key-bytes %s, value-bytes %s, tombstoned: %s))", 2252 humanize.IEC.Uint64(stats.InternalStats.BlockBytes), 2253 humanize.IEC.Uint64(stats.InternalStats.BlockBytesInCache), 2254 humanize.SI.Uint64(stats.InternalStats.PointCount), 2255 humanize.SI.Uint64(stats.InternalStats.KeyBytes), 2256 humanize.SI.Uint64(stats.InternalStats.ValueBytes), 2257 humanize.SI.Uint64(stats.InternalStats.PointsCoveredByRangeTombstones), 2258 ) 2259 } 2260 }