github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/level_iter.go (about) 1 // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package keyspan 6 7 import ( 8 "fmt" 9 10 "github.com/cockroachdb/pebble/internal/base" 11 "github.com/cockroachdb/pebble/internal/invariants" 12 "github.com/cockroachdb/pebble/internal/manifest" 13 ) 14 15 // LevelIter provides a merged view of spans from sstables in a level. 16 // It takes advantage of level invariants to only have one sstable span block 17 // open at one time, opened using the newIter function passed in. 18 type LevelIter struct { 19 cmp base.Compare 20 // Denotes the kind of key the level iterator should read. If the key type 21 // is KeyTypePoint, the level iterator will read range tombstones (which 22 // only affect point keys). If the key type is KeyTypeRange, the level 23 // iterator will read range keys. It is invalid to configure an iterator 24 // with the KeyTypePointAndRange key type. 25 // 26 // If key type is KeyTypePoint, no straddle spans are emitted between files, 27 // and point key bounds are used to find files instead of range key bounds. 28 // 29 // TODO(bilal): Straddle spans can safely be produced in rangedel mode once 30 // we can guarantee that we will never read sstables in a level that split 31 // user keys across them. This might be guaranteed in a future release, but 32 // as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when 33 // keyType == KeyTypePoint 34 keyType manifest.KeyType 35 // The LSM level this LevelIter is initialized for. Used in logging. 36 level manifest.Level 37 // The below fields are used to fill in gaps between adjacent files' range 38 // key spaces. This is an optimization to avoid unnecessarily loading files 39 // in cases where range keys are sparse and rare. dir is set by every 40 // positioning operation, straddleDir is set to dir whenever a straddling 41 // Span is synthesized and the last positioning operation returned a 42 // synthesized straddle span. 43 // 44 // Note that when a straddle span is initialized, iterFile is modified to 45 // point to the next file in the straddleDir direction. A change of direction 46 // on a straddle key therefore necessitates the value of iterFile to be 47 // reverted. 48 dir int 49 straddle Span 50 straddleDir int 51 // The iter for the current file (iterFile). It is nil under any of the 52 // following conditions: 53 // - files.Current() == nil 54 // - err != nil 55 // - straddleDir != 0, in which case iterFile is not nil and points to the 56 // next file (in the straddleDir direction). 57 // - some other constraint, like the bounds in opts, caused the file at index to not 58 // be relevant to the iteration. 59 iter FragmentIterator 60 // iterFile holds the current file. 61 // INVARIANT: iterFile = files.Current() 62 iterFile *manifest.FileMetadata 63 newIter TableNewSpanIter 64 files manifest.LevelIterator 65 err error 66 67 // The options that were passed in. 68 tableOpts SpanIterOptions 69 70 // TODO(bilal): Add InternalIteratorStats. 71 } 72 73 // LevelIter implements the keyspan.FragmentIterator interface. 74 var _ FragmentIterator = (*LevelIter)(nil) 75 76 // NewLevelIter returns a LevelIter. 77 func NewLevelIter( 78 opts SpanIterOptions, 79 cmp base.Compare, 80 newIter TableNewSpanIter, 81 files manifest.LevelIterator, 82 level manifest.Level, 83 keyType manifest.KeyType, 84 ) *LevelIter { 85 l := &LevelIter{} 86 l.Init(opts, cmp, newIter, files, level, keyType) 87 return l 88 } 89 90 // Init initializes a LevelIter. 91 func (l *LevelIter) Init( 92 opts SpanIterOptions, 93 cmp base.Compare, 94 newIter TableNewSpanIter, 95 files manifest.LevelIterator, 96 level manifest.Level, 97 keyType manifest.KeyType, 98 ) { 99 l.err = nil 100 l.level = level 101 l.tableOpts = opts 102 l.cmp = cmp 103 l.iterFile = nil 104 l.newIter = newIter 105 switch keyType { 106 case manifest.KeyTypePoint: 107 l.keyType = keyType 108 l.files = files.Filter(keyType) 109 case manifest.KeyTypeRange: 110 l.keyType = keyType 111 l.files = files.Filter(keyType) 112 default: 113 panic(fmt.Sprintf("unsupported key type: %v", keyType)) 114 } 115 } 116 117 func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata { 118 // Find the earliest file whose largest key is >= key. 119 // 120 // If the earliest file has its largest key == key and that largest key is a 121 // range deletion sentinel, we know that we manufactured this sentinel to convert 122 // the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum 123 // is the form of a range deletion sentinel which can contribute a largest key = end#sentinel). 124 // In this case we don't return this as the earliest file since there is nothing actually 125 // equal to key in it. 126 127 m := l.files.SeekGE(l.cmp, key) 128 for m != nil { 129 largestKey := m.LargestRangeKey 130 if l.keyType == manifest.KeyTypePoint { 131 largestKey = m.LargestPointKey 132 } 133 if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 { 134 break 135 } 136 m = l.files.Next() 137 } 138 return m 139 } 140 141 func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata { 142 // Find the last file whose smallest key is < key. 143 return l.files.SeekLT(l.cmp, key) 144 } 145 146 type loadFileReturnIndicator int8 147 148 const ( 149 noFileLoaded loadFileReturnIndicator = iota 150 fileAlreadyLoaded 151 newFileLoaded 152 ) 153 154 func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator { 155 indicator := noFileLoaded 156 if l.iterFile == file { 157 if l.err != nil { 158 return noFileLoaded 159 } 160 if l.iter != nil { 161 // We are already at the file, but we would need to check for bounds. 162 // Set indicator accordingly. 163 indicator = fileAlreadyLoaded 164 } 165 // We were already at file, but don't have an iterator, probably because the file was 166 // beyond the iteration bounds. It may still be, but it is also possible that the bounds 167 // have changed. We handle that below. 168 } 169 170 // Note that LevelIter.Close() can be called multiple times. 171 if indicator != fileAlreadyLoaded { 172 if err := l.Close(); err != nil { 173 return noFileLoaded 174 } 175 } 176 177 l.iterFile = file 178 if file == nil { 179 return noFileLoaded 180 } 181 if indicator != fileAlreadyLoaded { 182 l.iter, l.err = l.newIter(file, l.tableOpts) 183 indicator = newFileLoaded 184 } 185 if l.err != nil { 186 return noFileLoaded 187 } 188 return indicator 189 } 190 191 // SeekGE implements keyspan.FragmentIterator. 192 func (l *LevelIter) SeekGE(key []byte) *Span { 193 l.dir = +1 194 l.straddle = Span{} 195 l.straddleDir = 0 196 l.err = nil // clear cached iteration error 197 198 f := l.findFileGE(key) 199 if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 { 200 // Peek at the previous file. 201 prevFile := l.files.Prev() 202 l.files.Next() 203 if prevFile != nil { 204 // We could unconditionally return an empty span between the seek key and 205 // f.SmallestRangeKey, however if this span is to the left of all range 206 // keys on this level, it could lead to inconsistent behaviour in relative 207 // positioning operations. Consider this example, with a b-c range key: 208 // 209 // SeekGE(a) -> a-b:{} 210 // Next() -> b-c{(#5,RANGEKEYSET,@4,foo)} 211 // Prev() -> nil 212 // 213 // Iterators higher up in the iterator stack rely on this sort of relative 214 // positioning consistency. 215 // 216 // TODO(bilal): Investigate ways to be able to return straddle spans in 217 // cases similar to the above, while still retaining correctness. 218 // Return a straddling key instead of loading the file. 219 l.iterFile = f 220 if err := l.Close(); err != nil { 221 return l.verify(nil) 222 } 223 l.straddleDir = +1 224 l.straddle = Span{ 225 Start: prevFile.LargestRangeKey.UserKey, 226 End: f.SmallestRangeKey.UserKey, 227 Keys: nil, 228 } 229 return l.verify(&l.straddle) 230 } 231 } 232 loadFileIndicator := l.loadFile(f, +1) 233 if loadFileIndicator == noFileLoaded { 234 return l.verify(nil) 235 } 236 if span := l.iter.SeekGE(key); span != nil { 237 return l.verify(span) 238 } 239 return l.skipEmptyFileForward() 240 } 241 242 // SeekLT implements keyspan.FragmentIterator. 243 func (l *LevelIter) SeekLT(key []byte) *Span { 244 l.dir = -1 245 l.straddle = Span{} 246 l.straddleDir = 0 247 l.err = nil // clear cached iteration error 248 249 f := l.findFileLT(key) 250 if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 { 251 // Peek at the next file. 252 nextFile := l.files.Next() 253 l.files.Prev() 254 if nextFile != nil { 255 // We could unconditionally return an empty span between f.LargestRangeKey 256 // and the seek key, however if this span is to the right of all range keys 257 // on this level, it could lead to inconsistent behaviour in relative 258 // positioning operations. Consider this example, with a b-c range key: 259 // 260 // SeekLT(d) -> c-d:{} 261 // Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)} 262 // Next() -> nil 263 // 264 // Iterators higher up in the iterator stack rely on this sort of relative 265 // positioning consistency. 266 // 267 // TODO(bilal): Investigate ways to be able to return straddle spans in 268 // cases similar to the above, while still retaining correctness. 269 // Return a straddling key instead of loading the file. 270 l.iterFile = f 271 if err := l.Close(); err != nil { 272 return l.verify(nil) 273 } 274 l.straddleDir = -1 275 l.straddle = Span{ 276 Start: f.LargestRangeKey.UserKey, 277 End: nextFile.SmallestRangeKey.UserKey, 278 Keys: nil, 279 } 280 return l.verify(&l.straddle) 281 } 282 } 283 if l.loadFile(f, -1) == noFileLoaded { 284 return l.verify(nil) 285 } 286 if span := l.iter.SeekLT(key); span != nil { 287 return l.verify(span) 288 } 289 return l.skipEmptyFileBackward() 290 } 291 292 // First implements keyspan.FragmentIterator. 293 func (l *LevelIter) First() *Span { 294 l.dir = +1 295 l.straddle = Span{} 296 l.straddleDir = 0 297 l.err = nil // clear cached iteration error 298 299 if l.loadFile(l.files.First(), +1) == noFileLoaded { 300 return l.verify(nil) 301 } 302 if span := l.iter.First(); span != nil { 303 return l.verify(span) 304 } 305 return l.skipEmptyFileForward() 306 } 307 308 // Last implements keyspan.FragmentIterator. 309 func (l *LevelIter) Last() *Span { 310 l.dir = -1 311 l.straddle = Span{} 312 l.straddleDir = 0 313 l.err = nil // clear cached iteration error 314 315 if l.loadFile(l.files.Last(), -1) == noFileLoaded { 316 return l.verify(nil) 317 } 318 if span := l.iter.Last(); span != nil { 319 return l.verify(span) 320 } 321 return l.skipEmptyFileBackward() 322 } 323 324 // Next implements keyspan.FragmentIterator. 325 func (l *LevelIter) Next() *Span { 326 if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) { 327 return l.verify(nil) 328 } 329 if l.iter == nil && l.iterFile == nil { 330 // l.dir <= 0 331 return l.First() 332 } 333 l.dir = +1 334 335 if l.iter != nil { 336 if span := l.iter.Next(); span != nil { 337 return l.verify(span) 338 } 339 } 340 return l.skipEmptyFileForward() 341 } 342 343 // Prev implements keyspan.FragmentIterator. 344 func (l *LevelIter) Prev() *Span { 345 if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) { 346 return l.verify(nil) 347 } 348 if l.iter == nil && l.iterFile == nil { 349 // l.dir >= 0 350 return l.Last() 351 } 352 l.dir = -1 353 354 if l.iter != nil { 355 if span := l.iter.Prev(); span != nil { 356 return l.verify(span) 357 } 358 } 359 return l.skipEmptyFileBackward() 360 } 361 362 func (l *LevelIter) skipEmptyFileForward() *Span { 363 if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange && 364 l.iterFile != nil && l.iter != nil { 365 // We were at a file that had spans. Check if the next file that has 366 // spans is not directly adjacent to the current file i.e. there is a 367 // gap in the span keyspace between the two files. In that case, synthesize 368 // a "straddle span" in l.straddle and return that. 369 // 370 // Straddle spans are not created in rangedel mode. 371 if err := l.Close(); err != nil { 372 l.err = err 373 return l.verify(nil) 374 } 375 startKey := l.iterFile.LargestRangeKey.UserKey 376 // Resetting l.iterFile without loading the file into l.iter is okay and 377 // does not change the logic in loadFile() as long as l.iter is also nil; 378 // which it should be due to the Close() call above. 379 l.iterFile = l.files.Next() 380 if l.iterFile == nil { 381 return l.verify(nil) 382 } 383 endKey := l.iterFile.SmallestRangeKey.UserKey 384 if l.cmp(startKey, endKey) < 0 { 385 // There is a gap between the two files. Synthesize a straddling span 386 // to avoid unnecessarily loading the next file. 387 l.straddle = Span{ 388 Start: startKey, 389 End: endKey, 390 } 391 l.straddleDir = +1 392 return l.verify(&l.straddle) 393 } 394 } else if l.straddleDir < 0 { 395 // We were at a straddle key, but are now changing directions. l.iterFile 396 // was already moved backward by skipEmptyFileBackward, so advance it 397 // forward. 398 l.iterFile = l.files.Next() 399 } 400 l.straddle = Span{} 401 l.straddleDir = 0 402 var span *Span 403 for span.Empty() { 404 fileToLoad := l.iterFile 405 if l.keyType == manifest.KeyTypePoint { 406 // We haven't iterated to the next file yet if we're in point key 407 // (rangedel) mode. 408 fileToLoad = l.files.Next() 409 } 410 if l.loadFile(fileToLoad, +1) == noFileLoaded { 411 return l.verify(nil) 412 } 413 span = l.iter.First() 414 // In rangedel mode, we can expect to get empty files that we'd need to 415 // skip over, but not in range key mode. 416 if l.keyType == manifest.KeyTypeRange { 417 break 418 } 419 } 420 return l.verify(span) 421 } 422 423 func (l *LevelIter) skipEmptyFileBackward() *Span { 424 // We were at a file that had spans. Check if the previous file that has 425 // spans is not directly adjacent to the current file i.e. there is a 426 // gap in the span keyspace between the two files. In that case, synthesize 427 // a "straddle span" in l.straddle and return that. 428 // 429 // Straddle spans are not created in rangedel mode. 430 if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange && 431 l.iterFile != nil && l.iter != nil { 432 if err := l.Close(); err != nil { 433 l.err = err 434 return l.verify(nil) 435 } 436 endKey := l.iterFile.SmallestRangeKey.UserKey 437 // Resetting l.iterFile without loading the file into l.iter is okay and 438 // does not change the logic in loadFile() as long as l.iter is also nil; 439 // which it should be due to the Close() call above. 440 l.iterFile = l.files.Prev() 441 if l.iterFile == nil { 442 return l.verify(nil) 443 } 444 startKey := l.iterFile.LargestRangeKey.UserKey 445 if l.cmp(startKey, endKey) < 0 { 446 // There is a gap between the two files. Synthesize a straddling span 447 // to avoid unnecessarily loading the next file. 448 l.straddle = Span{ 449 Start: startKey, 450 End: endKey, 451 } 452 l.straddleDir = -1 453 return l.verify(&l.straddle) 454 } 455 } else if l.straddleDir > 0 { 456 // We were at a straddle key, but are now changing directions. l.iterFile 457 // was already advanced forward by skipEmptyFileForward, so move it 458 // backward. 459 l.iterFile = l.files.Prev() 460 } 461 l.straddle = Span{} 462 l.straddleDir = 0 463 var span *Span 464 for span.Empty() { 465 fileToLoad := l.iterFile 466 if l.keyType == manifest.KeyTypePoint { 467 fileToLoad = l.files.Prev() 468 } 469 if l.loadFile(fileToLoad, -1) == noFileLoaded { 470 return l.verify(nil) 471 } 472 span = l.iter.Last() 473 // In rangedel mode, we can expect to get empty files that we'd need to 474 // skip over, but not in range key mode as the filter on the FileMetadata 475 // should guarantee we always get a non-empty file. 476 if l.keyType == manifest.KeyTypeRange { 477 break 478 } 479 } 480 return l.verify(span) 481 } 482 483 // verify is invoked whenever a span is returned from an iterator positioning 484 // method to a caller. During invariant builds, it asserts invariants to the 485 // caller. 486 func (l *LevelIter) verify(s *Span) *Span { 487 // NB: Do not add any logic outside the invariants.Enabled conditional to 488 // ensure that verify is always compiled away in production builds. 489 if invariants.Enabled { 490 if f := l.files.Current(); f != l.iterFile { 491 panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged", 492 f, l.iterFile)) 493 } 494 } 495 return s 496 } 497 498 // Error implements keyspan.FragmentIterator. 499 func (l *LevelIter) Error() error { 500 if l.err != nil || l.iter == nil { 501 return l.err 502 } 503 return l.iter.Error() 504 } 505 506 // Close implements keyspan.FragmentIterator. 507 func (l *LevelIter) Close() error { 508 if l.iter != nil { 509 l.err = l.iter.Close() 510 l.iter = nil 511 } 512 return l.err 513 } 514 515 // String implements keyspan.FragmentIterator. 516 func (l *LevelIter) String() string { 517 if l.iterFile != nil { 518 return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum) 519 } 520 return fmt.Sprintf("%s: fileNum=<nil>", l.level) 521 }