github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/reader_iter.go (about) 1 // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "fmt" 9 "os" 10 "sync" 11 12 "github.com/cockroachdb/pebble/internal/base" 13 "github.com/cockroachdb/pebble/internal/invariants" 14 ) 15 16 // Iterator iterates over an entire table of data. 17 type Iterator interface { 18 base.InternalIterator 19 20 // NextPrefix implements (base.InternalIterator).NextPrefix. 21 NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) 22 23 // MaybeFilteredKeys may be called when an iterator is exhausted to indicate 24 // whether or not the last positioning method may have skipped any keys due 25 // to block-property filters. This is used by the Pebble levelIter to 26 // control when an iterator steps to the next sstable. 27 // 28 // MaybeFilteredKeys may always return false positives, that is it may 29 // return true when no keys were filtered. It should only be called when the 30 // iterator is exhausted. It must never return false negatives when the 31 // iterator is exhausted. 32 MaybeFilteredKeys() bool 33 34 SetCloseHook(fn func(i Iterator) error) 35 } 36 37 // Iterator positioning optimizations and singleLevelIterator and 38 // twoLevelIterator: 39 // 40 // An iterator is absolute positioned using one of the Seek or First or Last 41 // calls. After absolute positioning, there can be relative positioning done 42 // by stepping using Prev or Next. 43 // 44 // We implement optimizations below where an absolute positioning call can in 45 // some cases use the current position to do less work. To understand these, 46 // we first define some terms. An iterator is bounds-exhausted if the bounds 47 // (upper of lower) have been reached. An iterator is data-exhausted if it has 48 // the reached the end of the data (forward or reverse) in the sstable. A 49 // singleLevelIterator only knows a local-data-exhausted property since when 50 // it is used as part of a twoLevelIterator, the twoLevelIterator can step to 51 // the next lower-level index block. 52 // 53 // The bounds-exhausted property is tracked by 54 // singleLevelIterator.exhaustedBounds being +1 (upper bound reached) or -1 55 // (lower bound reached). The same field is reused by twoLevelIterator. Either 56 // may notice the exhaustion of the bound and set it. Note that if 57 // singleLevelIterator sets this property, it is not a local property (since 58 // the bound has been reached regardless of whether this is in the context of 59 // the twoLevelIterator or not). 60 // 61 // The data-exhausted property is tracked in a more subtle manner. We define 62 // two predicates: 63 // - partial-local-data-exhausted (PLDE): 64 // i.data.isDataInvalidated() || !i.data.valid() 65 // - partial-global-data-exhausted (PGDE): 66 // i.index.isDataInvalidated() || !i.index.valid() || i.data.isDataInvalidated() || 67 // !i.data.valid() 68 // 69 // PLDE is defined for a singleLevelIterator. PGDE is defined for a 70 // twoLevelIterator. Oddly, in our code below the singleLevelIterator does not 71 // know when it is part of a twoLevelIterator so it does not know when its 72 // property is local or global. 73 // 74 // Now to define data-exhausted: 75 // - Prerequisite: we must know that the iterator has been positioned and 76 // i.err is nil. 77 // - bounds-exhausted must not be true: 78 // If bounds-exhausted is true, we have incomplete knowledge of 79 // data-exhausted since PLDE or PGDE could be true because we could have 80 // chosen not to load index block or data block and figured out that the 81 // bound is exhausted (due to block property filters filtering out index and 82 // data blocks and going past the bound on the top level index block). Note 83 // that if we tried to separate out the BPF case from others we could 84 // develop more knowledge here. 85 // - PGDE is true for twoLevelIterator. PLDE is true if it is a standalone 86 // singleLevelIterator. !PLDE or !PGDE of course imply that data-exhausted 87 // is not true. 88 // 89 // An implication of the above is that if we are going to somehow utilize 90 // knowledge of data-exhausted in an optimization, we must not forget the 91 // existing value of bounds-exhausted since by forgetting the latter we can 92 // erroneously think that data-exhausted is true. Bug #2036 was due to this 93 // forgetting. 94 // 95 // Now to the two categories of optimizations we currently have: 96 // - Monotonic bounds optimization that reuse prior iterator position when 97 // doing seek: These only work with !data-exhausted. We could choose to make 98 // these work with data-exhausted but have not bothered because in the 99 // context of a DB if data-exhausted were true, the DB would move to the 100 // next file in the level. Note that this behavior of moving to the next 101 // file is not necessarily true for L0 files, so there could be some benefit 102 // in the future in this optimization. See the WARNING-data-exhausted 103 // comments if trying to optimize this in the future. 104 // - TrySeekUsingNext optimizations: these work regardless of exhaustion 105 // state. 106 // 107 // Implementation detail: In the code PLDE only checks that 108 // i.data.isDataInvalidated(). This narrower check is safe, since this is a 109 // subset of the set expressed by the OR expression. Also, it is not a 110 // de-optimization since whenever we exhaust the iterator we explicitly call 111 // i.data.invalidate(). PGDE checks i.index.isDataInvalidated() && 112 // i.data.isDataInvalidated(). Again, this narrower check is safe, and not a 113 // de-optimization since whenever we exhaust the iterator we explicitly call 114 // i.index.invalidate() and i.data.invalidate(). The && is questionable -- for 115 // now this is a bit of defensive code. We should seriously consider removing 116 // it, since defensive code suggests we are not confident about our invariants 117 // (and if we are not confident, we need more invariant assertions, not 118 // defensive code). 119 // 120 // TODO(sumeer): remove the aforementioned defensive code. 121 122 var singleLevelIterPool = sync.Pool{ 123 New: func() interface{} { 124 i := &singleLevelIterator{} 125 // Note: this is a no-op if invariants are disabled or race is enabled. 126 invariants.SetFinalizer(i, checkSingleLevelIterator) 127 return i 128 }, 129 } 130 131 var twoLevelIterPool = sync.Pool{ 132 New: func() interface{} { 133 i := &twoLevelIterator{} 134 // Note: this is a no-op if invariants are disabled or race is enabled. 135 invariants.SetFinalizer(i, checkTwoLevelIterator) 136 return i 137 }, 138 } 139 140 // TODO(jackson): rangedel fragmentBlockIters can't be pooled because of some 141 // code paths that double Close the iters. Fix the double close and pool the 142 // *fragmentBlockIter type directly. 143 144 var rangeKeyFragmentBlockIterPool = sync.Pool{ 145 New: func() interface{} { 146 i := &rangeKeyFragmentBlockIter{} 147 // Note: this is a no-op if invariants are disabled or race is enabled. 148 invariants.SetFinalizer(i, checkRangeKeyFragmentBlockIterator) 149 return i 150 }, 151 } 152 153 func checkSingleLevelIterator(obj interface{}) { 154 i := obj.(*singleLevelIterator) 155 if p := i.data.handle.Get(); p != nil { 156 fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 157 os.Exit(1) 158 } 159 if p := i.index.handle.Get(); p != nil { 160 fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 161 os.Exit(1) 162 } 163 } 164 165 func checkTwoLevelIterator(obj interface{}) { 166 i := obj.(*twoLevelIterator) 167 if p := i.data.handle.Get(); p != nil { 168 fmt.Fprintf(os.Stderr, "singleLevelIterator.data.handle is not nil: %p\n", p) 169 os.Exit(1) 170 } 171 if p := i.index.handle.Get(); p != nil { 172 fmt.Fprintf(os.Stderr, "singleLevelIterator.index.handle is not nil: %p\n", p) 173 os.Exit(1) 174 } 175 } 176 177 func checkRangeKeyFragmentBlockIterator(obj interface{}) { 178 i := obj.(*rangeKeyFragmentBlockIter) 179 if p := i.blockIter.handle.Get(); p != nil { 180 fmt.Fprintf(os.Stderr, "fragmentBlockIter.blockIter.handle is not nil: %p\n", p) 181 os.Exit(1) 182 } 183 } 184 185 // compactionIterator is similar to Iterator but it increments the number of 186 // bytes that have been iterated through. 187 type compactionIterator struct { 188 *singleLevelIterator 189 bytesIterated *uint64 190 prevOffset uint64 191 } 192 193 // compactionIterator implements the base.InternalIterator interface. 194 var _ base.InternalIterator = (*compactionIterator)(nil) 195 196 func (i *compactionIterator) String() string { 197 if i.vState != nil { 198 return i.vState.fileNum.String() 199 } 200 return i.reader.fileNum.String() 201 } 202 203 func (i *compactionIterator) SeekGE( 204 key []byte, flags base.SeekGEFlags, 205 ) (*InternalKey, base.LazyValue) { 206 panic("pebble: SeekGE unimplemented") 207 } 208 209 func (i *compactionIterator) SeekPrefixGE( 210 prefix, key []byte, flags base.SeekGEFlags, 211 ) (*base.InternalKey, base.LazyValue) { 212 panic("pebble: SeekPrefixGE unimplemented") 213 } 214 215 func (i *compactionIterator) SeekLT( 216 key []byte, flags base.SeekLTFlags, 217 ) (*InternalKey, base.LazyValue) { 218 panic("pebble: SeekLT unimplemented") 219 } 220 221 func (i *compactionIterator) First() (*InternalKey, base.LazyValue) { 222 i.err = nil // clear cached iteration error 223 return i.skipForward(i.singleLevelIterator.First()) 224 } 225 226 func (i *compactionIterator) Last() (*InternalKey, base.LazyValue) { 227 panic("pebble: Last unimplemented") 228 } 229 230 // Note: compactionIterator.Next mirrors the implementation of Iterator.Next 231 // due to performance. Keep the two in sync. 232 func (i *compactionIterator) Next() (*InternalKey, base.LazyValue) { 233 if i.err != nil { 234 return nil, base.LazyValue{} 235 } 236 return i.skipForward(i.data.Next()) 237 } 238 239 func (i *compactionIterator) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { 240 panic("pebble: NextPrefix unimplemented") 241 } 242 243 func (i *compactionIterator) Prev() (*InternalKey, base.LazyValue) { 244 panic("pebble: Prev unimplemented") 245 } 246 247 func (i *compactionIterator) skipForward( 248 key *InternalKey, val base.LazyValue, 249 ) (*InternalKey, base.LazyValue) { 250 if key == nil { 251 for { 252 if key, _ := i.index.Next(); key == nil { 253 break 254 } 255 result := i.loadBlock(+1) 256 if result != loadBlockOK { 257 if i.err != nil { 258 break 259 } 260 switch result { 261 case loadBlockFailed: 262 // We checked that i.index was at a valid entry, so 263 // loadBlockFailed could not have happened due to to i.index 264 // being exhausted, and must be due to an error. 265 panic("loadBlock should not have failed with no error") 266 case loadBlockIrrelevant: 267 panic("compactionIter should not be using block intervals for skipping") 268 default: 269 panic(fmt.Sprintf("unexpected case %d", result)) 270 } 271 } 272 // result == loadBlockOK 273 if key, val = i.data.First(); key != nil { 274 break 275 } 276 } 277 } 278 279 curOffset := i.recordOffset() 280 *i.bytesIterated += uint64(curOffset - i.prevOffset) 281 i.prevOffset = curOffset 282 283 if i.vState != nil && key != nil { 284 cmp := i.cmp(key.UserKey, i.vState.upper.UserKey) 285 if cmp > 0 || (i.vState.upper.IsExclusiveSentinel() && cmp == 0) { 286 return nil, base.LazyValue{} 287 } 288 } 289 290 return key, val 291 }