github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/manifest/level_metadata.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package manifest 6 7 import ( 8 "bytes" 9 "fmt" 10 11 "github.com/zuoyebang/bitalostable/internal/base" 12 ) 13 14 // LevelMetadata contains metadata for all of the files within 15 // a level of the LSM. 16 type LevelMetadata struct { 17 level int 18 tree btree 19 } 20 21 // clone makes a copy of the level metadata, implicitly increasing the ref 22 // count of every file contained within lm. 23 func (lm *LevelMetadata) clone() LevelMetadata { 24 return LevelMetadata{ 25 level: lm.level, 26 tree: lm.tree.clone(), 27 } 28 } 29 30 func (lm *LevelMetadata) release() (obsolete []*FileMetadata) { 31 return lm.tree.release() 32 } 33 34 func makeLevelMetadata(cmp Compare, level int, files []*FileMetadata) LevelMetadata { 35 bcmp := btreeCmpSeqNum 36 if level > 0 { 37 bcmp = btreeCmpSmallestKey(cmp) 38 } 39 var lm LevelMetadata 40 lm.level = level 41 lm.tree, _ = makeBTree(bcmp, files) 42 return lm 43 } 44 45 func makeBTree(cmp btreeCmp, files []*FileMetadata) (btree, LevelSlice) { 46 var t btree 47 t.cmp = cmp 48 for _, f := range files { 49 t.insert(f) 50 } 51 return t, LevelSlice{iter: t.iter(), length: t.length} 52 } 53 54 // Empty indicates whether there are any files in the level. 55 func (lm *LevelMetadata) Empty() bool { 56 return lm.tree.length == 0 57 } 58 59 // Len returns the number of files within the level. 60 func (lm *LevelMetadata) Len() int { 61 return lm.tree.length 62 } 63 64 // Iter constructs a LevelIterator over the entire level. 65 func (lm *LevelMetadata) Iter() LevelIterator { 66 return LevelIterator{iter: lm.tree.iter()} 67 } 68 69 // Slice constructs a slice containing the entire level. 70 func (lm *LevelMetadata) Slice() LevelSlice { 71 return LevelSlice{iter: lm.tree.iter(), length: lm.tree.length} 72 } 73 74 // Find finds the provided file in the level if it exists. 75 func (lm *LevelMetadata) Find(cmp base.Compare, m *FileMetadata) *LevelFile { 76 iter := lm.Iter() 77 if lm.level != 0 { 78 // If lm holds files for levels >0, we can narrow our search by binary 79 // searching by bounds. 80 o := overlaps(iter, cmp, m.Smallest.UserKey, 81 m.Largest.UserKey, m.Largest.IsExclusiveSentinel()) 82 iter = o.Iter() 83 } 84 for f := iter.First(); f != nil; f = iter.Next() { 85 if f == m { 86 lf := iter.Take() 87 return &lf 88 } 89 } 90 return nil 91 } 92 93 // Annotation lazily calculates and returns the annotation defined by 94 // Annotator. The Annotator is used as the key for pre-calculated 95 // values, so equal Annotators must be used to avoid duplicate computations 96 // and cached annotations. Annotation must not be called concurrently, and in 97 // practice this is achieved by requiring callers to hold DB.mu. 98 func (lm *LevelMetadata) Annotation(annotator Annotator) interface{} { 99 if lm.Empty() { 100 return annotator.Zero(nil) 101 } 102 v, _ := lm.tree.root.annotation(annotator) 103 return v 104 } 105 106 // InvalidateAnnotation clears any cached annotations defined by Annotator. The 107 // Annotator is used as the key for pre-calculated values, so equal Annotators 108 // must be used to clear the appropriate cached annotation. InvalidateAnnotation 109 // must not be called concurrently, and in practice this is achieved by 110 // requiring callers to hold DB.mu. 111 func (lm *LevelMetadata) InvalidateAnnotation(annotator Annotator) { 112 if lm.Empty() { 113 return 114 } 115 lm.tree.root.invalidateAnnotation(annotator) 116 } 117 118 // LevelFile holds a file's metadata along with its position 119 // within a level of the LSM. 120 type LevelFile struct { 121 *FileMetadata 122 slice LevelSlice 123 } 124 125 // Slice constructs a LevelSlice containing only this file. 126 func (lf LevelFile) Slice() LevelSlice { 127 return lf.slice 128 } 129 130 // NewLevelSliceSeqSorted constructs a LevelSlice over the provided files, 131 // sorted by the L0 sequence number sort order. 132 // TODO(jackson): Can we improve this interface or avoid needing to export 133 // a slice constructor like this? 134 func NewLevelSliceSeqSorted(files []*FileMetadata) LevelSlice { 135 tr, slice := makeBTree(btreeCmpSeqNum, files) 136 tr.release() 137 return slice 138 } 139 140 // NewLevelSliceKeySorted constructs a LevelSlice over the provided files, 141 // sorted by the files smallest keys. 142 // TODO(jackson): Can we improve this interface or avoid needing to export 143 // a slice constructor like this? 144 func NewLevelSliceKeySorted(cmp base.Compare, files []*FileMetadata) LevelSlice { 145 tr, slice := makeBTree(btreeCmpSmallestKey(cmp), files) 146 tr.release() 147 return slice 148 } 149 150 // NewLevelSliceSpecificOrder constructs a LevelSlice over the provided files, 151 // ordering the files by their order in the provided slice. It's used in 152 // tests. 153 // TODO(jackson): Update tests to avoid requiring this and remove it. 154 func NewLevelSliceSpecificOrder(files []*FileMetadata) LevelSlice { 155 tr, slice := makeBTree(btreeCmpSpecificOrder(files), files) 156 tr.release() 157 return slice 158 } 159 160 // LevelSlice contains a slice of the files within a level of the LSM. 161 // A LevelSlice is immutable once created, but may be used to construct a 162 // mutable LevelIterator over the slice's files. 163 type LevelSlice struct { 164 iter iterator 165 length int 166 // start and end form the inclusive bounds of a slice of files within a 167 // level of the LSM. They may be nil if the entire B-Tree backing iter is 168 // accessible. 169 start *iterator 170 end *iterator 171 } 172 173 // Each invokes fn for each element in the slice. 174 func (ls LevelSlice) Each(fn func(*FileMetadata)) { 175 iter := ls.Iter() 176 for f := iter.First(); f != nil; f = iter.Next() { 177 fn(f) 178 } 179 } 180 181 // String implements fmt.Stringer. 182 func (ls LevelSlice) String() string { 183 var buf bytes.Buffer 184 ls.Each(func(f *FileMetadata) { 185 if buf.Len() > 0 { 186 fmt.Fprintf(&buf, " ") 187 } 188 fmt.Fprint(&buf, f) 189 }) 190 return buf.String() 191 } 192 193 // Empty indicates whether the slice contains any files. 194 func (ls *LevelSlice) Empty() bool { 195 return emptyWithBounds(ls.iter, ls.start, ls.end) 196 } 197 198 // Iter constructs a LevelIterator that iterates over the slice. 199 func (ls *LevelSlice) Iter() LevelIterator { 200 return LevelIterator{ 201 start: ls.start, 202 end: ls.end, 203 iter: ls.iter.clone(), 204 } 205 } 206 207 // Len returns the number of files in the slice. Its runtime is constant. 208 func (ls *LevelSlice) Len() int { 209 return ls.length 210 } 211 212 // SizeSum sums the size of all files in the slice. Its runtime is linear in 213 // the length of the slice. 214 func (ls *LevelSlice) SizeSum() uint64 { 215 var sum uint64 216 iter := ls.Iter() 217 for f := iter.First(); f != nil; f = iter.Next() { 218 sum += f.Size 219 } 220 return sum 221 } 222 223 // Reslice constructs a new slice backed by the same underlying level, with 224 // new start and end positions. Reslice invokes the provided function, passing 225 // two LevelIterators: one positioned to i's inclusive start and one 226 // positioned to i's inclusive end. The resliceFunc may move either iterator 227 // forward or backwards, including beyond the callee's original bounds to 228 // capture additional files from the underlying level. Reslice constructs and 229 // returns a new LevelSlice with the final bounds of the iterators after 230 // calling resliceFunc. 231 func (ls LevelSlice) Reslice(resliceFunc func(start, end *LevelIterator)) LevelSlice { 232 if ls.iter.r == nil { 233 return ls 234 } 235 var start, end LevelIterator 236 if ls.start == nil { 237 start.iter = ls.iter.clone() 238 start.iter.first() 239 } else { 240 start.iter = ls.start.clone() 241 } 242 if ls.end == nil { 243 end.iter = ls.iter.clone() 244 end.iter.last() 245 } else { 246 end.iter = ls.end.clone() 247 } 248 resliceFunc(&start, &end) 249 250 s := LevelSlice{ 251 iter: start.iter.clone(), 252 start: &start.iter, 253 end: &end.iter, 254 } 255 // Calculate the new slice's length. 256 iter := s.Iter() 257 for f := iter.First(); f != nil; f = iter.Next() { 258 s.length++ 259 } 260 return s 261 } 262 263 // KeyType is used to specify the type of keys we're looking for in 264 // LevelIterator positioning operations. Files not containing any keys of the 265 // desired type are skipped. 266 type KeyType int8 267 268 const ( 269 // KeyTypePointAndRange denotes a search among the entire keyspace, including 270 // both point keys and range keys. No sstables are skipped. 271 KeyTypePointAndRange KeyType = iota 272 // KeyTypePoint denotes a search among the point keyspace. SSTables with no 273 // point keys will be skipped. Note that the point keyspace includes rangedels. 274 KeyTypePoint 275 // KeyTypeRange denotes a search among the range keyspace. SSTables with no 276 // range keys will be skipped. 277 KeyTypeRange 278 ) 279 280 type keyTypeAnnotator struct{} 281 282 var _ Annotator = keyTypeAnnotator{} 283 284 func (k keyTypeAnnotator) Zero(dst interface{}) interface{} { 285 var val *KeyType 286 if dst != nil { 287 val = dst.(*KeyType) 288 } else { 289 val = new(KeyType) 290 } 291 *val = KeyTypePoint 292 return val 293 } 294 295 func (k keyTypeAnnotator) Accumulate(m *FileMetadata, dst interface{}) (interface{}, bool) { 296 v := dst.(*KeyType) 297 switch *v { 298 case KeyTypePoint: 299 if m.HasRangeKeys { 300 *v = KeyTypePointAndRange 301 } 302 case KeyTypePointAndRange: 303 // Do nothing. 304 default: 305 panic("unexpected key type") 306 } 307 return v, true 308 } 309 310 func (k keyTypeAnnotator) Merge(src interface{}, dst interface{}) interface{} { 311 v := dst.(*KeyType) 312 srcVal := src.(*KeyType) 313 switch *v { 314 case KeyTypePoint: 315 if *srcVal == KeyTypePointAndRange { 316 *v = KeyTypePointAndRange 317 } 318 case KeyTypePointAndRange: 319 // Do nothing. 320 default: 321 panic("unexpected key type") 322 } 323 return v 324 } 325 326 // LevelIterator iterates over a set of files' metadata. Its zero value is an 327 // empty iterator. 328 type LevelIterator struct { 329 iter iterator 330 start *iterator 331 end *iterator 332 filter KeyType 333 } 334 335 func (i LevelIterator) String() string { 336 var buf bytes.Buffer 337 iter := i.iter.clone() 338 iter.first() 339 iter.prev() 340 if i.iter.pos == -1 { 341 fmt.Fprint(&buf, "(<start>)*") 342 } 343 iter.next() 344 for ; iter.valid(); iter.next() { 345 if buf.Len() > 0 { 346 fmt.Fprint(&buf, " ") 347 } 348 349 if i.start != nil && cmpIter(iter, *i.start) == 0 { 350 fmt.Fprintf(&buf, " [ ") 351 } 352 isCurrentPos := cmpIter(iter, i.iter) == 0 353 if isCurrentPos { 354 fmt.Fprint(&buf, " ( ") 355 } 356 fmt.Fprint(&buf, iter.cur().String()) 357 if isCurrentPos { 358 fmt.Fprint(&buf, " )*") 359 } 360 if i.end != nil && cmpIter(iter, *i.end) == 0 { 361 fmt.Fprintf(&buf, " ]") 362 } 363 } 364 if i.iter.n != nil && i.iter.pos >= i.iter.n.count { 365 if buf.Len() > 0 { 366 fmt.Fprint(&buf, " ") 367 } 368 fmt.Fprint(&buf, "(<end>)*") 369 } 370 return buf.String() 371 } 372 373 // Clone copies the iterator, returning an independent iterator at the same 374 // position. 375 func (i *LevelIterator) Clone() LevelIterator { 376 if i.iter.r == nil { 377 return *i 378 } 379 // The start and end iterators are not cloned and are treated as 380 // immutable. 381 return LevelIterator{ 382 iter: i.iter.clone(), 383 start: i.start, 384 end: i.end, 385 filter: i.filter, 386 } 387 } 388 389 // Current returns the item at the current iterator position. 390 func (i *LevelIterator) Current() *FileMetadata { 391 if !i.iter.valid() { 392 return nil 393 } 394 return i.iter.cur() 395 } 396 397 func (i *LevelIterator) empty() bool { 398 return emptyWithBounds(i.iter, i.start, i.end) 399 } 400 401 // Filter clones the iterator and sets the desired KeyType as the key to filter 402 // files on. 403 func (i *LevelIterator) Filter(keyType KeyType) LevelIterator { 404 l := i.Clone() 405 l.filter = keyType 406 return l 407 } 408 409 func emptyWithBounds(i iterator, start, end *iterator) bool { 410 // If i.r is nil, the iterator was constructed from an empty btree. 411 // If the end bound is before the start bound, the bounds represent an 412 // empty slice of the B-Tree. 413 return i.r == nil || (start != nil && end != nil && cmpIter(*end, *start) < 0) 414 } 415 416 // First seeks to the first file in the iterator and returns it. 417 func (i *LevelIterator) First() *FileMetadata { 418 if i.empty() { 419 return nil 420 } 421 if i.start != nil { 422 i.iter = i.start.clone() 423 } else { 424 i.iter.first() 425 } 426 if !i.iter.valid() { 427 return nil 428 } 429 return i.filteredNextFile(i.iter.cur()) 430 } 431 432 // Last seeks to the last file in the iterator and returns it. 433 func (i *LevelIterator) Last() *FileMetadata { 434 if i.empty() { 435 return nil 436 } 437 if i.end != nil { 438 i.iter = i.end.clone() 439 } else { 440 i.iter.last() 441 } 442 if !i.iter.valid() { 443 return nil 444 } 445 return i.filteredPrevFile(i.iter.cur()) 446 } 447 448 // Next advances the iterator to the next file and returns it. 449 func (i *LevelIterator) Next() *FileMetadata { 450 i.iter.next() 451 if !i.iter.valid() { 452 return nil 453 } 454 if i.end != nil && cmpIter(i.iter, *i.end) > 0 { 455 return nil 456 } 457 return i.filteredNextFile(i.iter.cur()) 458 } 459 460 // Prev moves the iterator the previous file and returns it. 461 func (i *LevelIterator) Prev() *FileMetadata { 462 i.iter.prev() 463 if !i.iter.valid() { 464 return nil 465 } 466 if i.start != nil && cmpIter(i.iter, *i.start) < 0 { 467 return nil 468 } 469 return i.filteredPrevFile(i.iter.cur()) 470 } 471 472 // SeekGE seeks to the first file in the iterator's file set with a largest 473 // user key greater than or equal to the provided user key. The iterator must 474 // have been constructed from L1+, because it requires the underlying files to 475 // be sorted by user keys and non-overlapping. 476 func (i *LevelIterator) SeekGE(cmp Compare, userKey []byte) *FileMetadata { 477 // TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey. 478 if i.empty() { 479 return nil 480 } 481 meta := i.seek(func(m *FileMetadata) bool { 482 return cmp(m.Largest.UserKey, userKey) >= 0 483 }) 484 for meta != nil { 485 switch i.filter { 486 case KeyTypePointAndRange: 487 return meta 488 case KeyTypePoint: 489 if meta.HasPointKeys && cmp(meta.LargestPointKey.UserKey, userKey) >= 0 { 490 return meta 491 } 492 case KeyTypeRange: 493 if meta.HasRangeKeys && cmp(meta.LargestRangeKey.UserKey, userKey) >= 0 { 494 return meta 495 } 496 } 497 meta = i.Next() 498 } 499 return i.filteredNextFile(meta) 500 } 501 502 // SeekLT seeks to the last file in the iterator's file set with a smallest 503 // user key less than the provided user key. The iterator must have been 504 // constructed from L1+, because it requires the underlying files to be sorted 505 // by user keys and non-overlapping. 506 func (i *LevelIterator) SeekLT(cmp Compare, userKey []byte) *FileMetadata { 507 // TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey. 508 if i.empty() { 509 return nil 510 } 511 i.seek(func(m *FileMetadata) bool { 512 return cmp(m.Smallest.UserKey, userKey) >= 0 513 }) 514 meta := i.Prev() 515 for meta != nil { 516 switch i.filter { 517 case KeyTypePointAndRange: 518 return meta 519 case KeyTypePoint: 520 if meta.HasPointKeys && cmp(meta.SmallestPointKey.UserKey, userKey) < 0 { 521 return meta 522 } 523 case KeyTypeRange: 524 if meta.HasRangeKeys && cmp(meta.SmallestRangeKey.UserKey, userKey) < 0 { 525 return meta 526 } 527 } 528 meta = i.Prev() 529 } 530 return i.filteredPrevFile(meta) 531 } 532 533 func (i *LevelIterator) filteredNextFile(meta *FileMetadata) *FileMetadata { 534 switch i.filter { 535 case KeyTypePoint: 536 for meta != nil && !meta.HasPointKeys { 537 meta = i.Next() 538 } 539 return meta 540 case KeyTypeRange: 541 // TODO(bilal): Range keys are expected to be rare and sparse. Add an 542 // optimization to annotate the tree and efficiently skip over files that 543 // do not contain range keys right at the seek step, to reduce iterations 544 // here. 545 for meta != nil && !meta.HasRangeKeys { 546 meta = i.Next() 547 } 548 return meta 549 default: 550 return meta 551 } 552 } 553 554 func (i *LevelIterator) filteredPrevFile(meta *FileMetadata) *FileMetadata { 555 switch i.filter { 556 case KeyTypePoint: 557 for meta != nil && !meta.HasPointKeys { 558 meta = i.Prev() 559 } 560 return meta 561 case KeyTypeRange: 562 // TODO(bilal): Range keys are expected to be rare and sparse. Add an 563 // optimization to annotate the tree and efficiently skip over files that 564 // do not contain range keys right at the seek step, to reduce iterations 565 // here. 566 for meta != nil && !meta.HasRangeKeys { 567 meta = i.Prev() 568 } 569 return meta 570 default: 571 return meta 572 } 573 } 574 575 func (i *LevelIterator) seek(fn func(*FileMetadata) bool) *FileMetadata { 576 i.iter.seek(fn) 577 578 // i.iter.seek seeked in the unbounded underlying B-Tree. If the iterator 579 // has start or end bounds, we may have exceeded them. Reset to the bounds 580 // if necessary. 581 // 582 // NB: The LevelIterator and LevelSlice semantics require that a bounded 583 // LevelIterator/LevelSlice containing files x0, x1, ..., xn behave 584 // identically to an unbounded LevelIterator/LevelSlice of a B-Tree 585 // containing x0, x1, ..., xn. In other words, any files outside the 586 // LevelIterator's bounds should not influence the iterator's behavior. 587 // When seeking, this means a SeekGE that seeks beyond the end bound, 588 // followed by a Prev should return the last element within bounds. 589 if i.end != nil && cmpIter(i.iter, *i.end) > 0 { 590 i.iter = i.end.clone() 591 // Since seek(fn) positioned beyond i.end, we know there is nothing to 592 // return within bounds. 593 i.iter.next() 594 return nil 595 } else if i.start != nil && cmpIter(i.iter, *i.start) < 0 { 596 i.iter = i.start.clone() 597 return i.iter.cur() 598 } 599 if !i.iter.valid() { 600 return nil 601 } 602 return i.iter.cur() 603 } 604 605 // Take constructs a LevelFile containing the file at the iterator's current 606 // position. Take panics if the iterator is not currently positioned over a 607 // file. 608 func (i *LevelIterator) Take() LevelFile { 609 m := i.Current() 610 if m == nil { 611 panic("Take called on invalid LevelIterator") 612 } 613 // LevelSlice's start and end fields are immutable and are positioned to 614 // the same position for a LevelFile because they're inclusive, so we can 615 // share one iterator stack between the two bounds. 616 boundsIter := i.iter.clone() 617 return LevelFile{ 618 FileMetadata: m, 619 slice: LevelSlice{ 620 iter: i.iter.clone(), 621 start: &boundsIter, 622 end: &boundsIter, 623 length: 1, 624 }, 625 } 626 }