github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/manifest/btree.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package manifest 6 7 import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "sync/atomic" 12 "unsafe" 13 14 "github.com/cockroachdb/errors" 15 ) 16 17 // The Annotator type defined below is used by other packages to lazily 18 // compute a value over a B-Tree. Each node of the B-Tree stores one 19 // `annotation` per annotator, containing the result of the computation over 20 // the node's subtree. 21 // 22 // An annotation is marked as valid if it's current with the current subtree 23 // state. Annotations are marked as invalid whenever a node will be mutated 24 // (in mut). Annotators may also return `false` from `Accumulate` to signal 25 // that a computation for a file is not stable and may change in the future. 26 // Annotations that include these unstable values are also marked as invalid 27 // on the node, ensuring that future queries for the annotation will recompute 28 // the value. 29 30 // An Annotator defines a computation over a level's FileMetadata. If the 31 // computation is stable and uses inputs that are fixed for the lifetime of 32 // a FileMetadata, the LevelMetadata's internal data structures are annotated 33 // with the intermediary computations. This allows the computation to be 34 // computed incrementally as edits are applied to a level. 35 type Annotator interface { 36 // Zero returns the zero value of an annotation. This value is returned 37 // when a LevelMetadata is empty. The dst argument, if non-nil, is an 38 // obsolete value previously returned by this Annotator and may be 39 // overwritten and reused to avoid a memory allocation. 40 Zero(dst interface{}) (v interface{}) 41 42 // Accumulate computes the annotation for a single file in a level's 43 // metadata. It merges the file's value into dst and returns a bool flag 44 // indicating whether or not the value is stable and okay to cache as an 45 // annotation. If the file's value may change over the life of the file, 46 // the annotator must return false. 47 // 48 // Implementations may modify dst and return it to avoid an allocation. 49 Accumulate(m *FileMetadata, dst interface{}) (v interface{}, cacheOK bool) 50 51 // Merge combines two values src and dst, returning the result. 52 // Implementations may modify dst and return it to avoid an allocation. 53 Merge(src interface{}, dst interface{}) interface{} 54 } 55 56 type btreeCmp func(*FileMetadata, *FileMetadata) int 57 58 func btreeCmpSeqNum(a, b *FileMetadata) int { 59 return a.cmpSeqNum(b) 60 } 61 62 func btreeCmpSmallestKey(cmp Compare) btreeCmp { 63 return func(a, b *FileMetadata) int { 64 return a.cmpSmallestKey(b, cmp) 65 } 66 } 67 68 // btreeCmpSpecificOrder is used in tests to construct a B-Tree with a 69 // specific ordering of FileMetadata within the tree. It's typically used to 70 // test consistency checking code that needs to construct a malformed B-Tree. 71 func btreeCmpSpecificOrder(files []*FileMetadata) btreeCmp { 72 m := map[*FileMetadata]int{} 73 for i, f := range files { 74 m[f] = i 75 } 76 return func(a, b *FileMetadata) int { 77 ai, aok := m[a] 78 bi, bok := m[b] 79 if !aok || !bok { 80 panic("btreeCmpSliceOrder called with unknown files") 81 } 82 switch { 83 case ai < bi: 84 return -1 85 case ai > bi: 86 return +1 87 default: 88 return 0 89 } 90 } 91 } 92 93 const ( 94 degree = 16 95 maxItems = 2*degree - 1 96 minItems = degree - 1 97 ) 98 99 type annotation struct { 100 annotator Annotator 101 // v is an annotation value, the output of either 102 // annotator.Value or annotator.Merge. 103 v interface{} 104 // valid indicates whether future reads of the annotation may use v as-is. 105 // If false, v will be zeroed and recalculated. 106 valid bool 107 } 108 109 type leafNode struct { 110 ref int32 111 count int16 112 leaf bool 113 items [maxItems]*FileMetadata 114 // annot contains one annotation per annotator, merged over the entire 115 // node's files (and all descendants for non-leaf nodes). 116 annot []annotation 117 } 118 119 type node struct { 120 leafNode 121 children [maxItems + 1]*node 122 } 123 124 //go:nocheckptr casts a ptr to a smaller struct to a ptr to a larger struct. 125 func leafToNode(ln *leafNode) *node { 126 return (*node)(unsafe.Pointer(ln)) 127 } 128 129 func newLeafNode() *node { 130 n := leafToNode(new(leafNode)) 131 n.leaf = true 132 n.ref = 1 133 return n 134 } 135 136 func newNode() *node { 137 n := new(node) 138 n.ref = 1 139 return n 140 } 141 142 // mut creates and returns a mutable node reference. If the node is not shared 143 // with any other trees then it can be modified in place. Otherwise, it must be 144 // cloned to ensure unique ownership. In this way, we enforce a copy-on-write 145 // policy which transparently incorporates the idea of local mutations, like 146 // Clojure's transients or Haskell's ST monad, where nodes are only copied 147 // during the first time that they are modified between Clone operations. 148 // 149 // When a node is cloned, the provided pointer will be redirected to the new 150 // mutable node. 151 func mut(n **node) *node { 152 if atomic.LoadInt32(&(*n).ref) == 1 { 153 // Exclusive ownership. Can mutate in place. 154 155 // Whenever a node will be mutated, reset its annotations to be marked 156 // as uncached. This ensures any future calls to (*node).annotation 157 // will recompute annotations on the modified subtree. 158 for i := range (*n).annot { 159 (*n).annot[i].valid = false 160 } 161 return *n 162 } 163 // If we do not have unique ownership over the node then we 164 // clone it to gain unique ownership. After doing so, we can 165 // release our reference to the old node. We pass recursive 166 // as true because even though we just observed the node's 167 // reference count to be greater than 1, we might be racing 168 // with another call to decRef on this node. 169 c := (*n).clone() 170 (*n).decRef(true /* recursive */, nil) 171 *n = c 172 // NB: We don't need to clear annotations, because (*node).clone does not 173 // copy them. 174 return *n 175 } 176 177 // incRef acquires a reference to the node. 178 func (n *node) incRef() { 179 atomic.AddInt32(&n.ref, 1) 180 } 181 182 // decRef releases a reference to the node. If requested, the method 183 // will recurse into child nodes and decrease their refcounts as well. 184 // When a node is released, its contained files are dereferenced. 185 func (n *node) decRef(recursive bool, obsolete *[]*FileMetadata) { 186 if atomic.AddInt32(&n.ref, -1) > 0 { 187 // Other references remain. Can't free. 188 return 189 } 190 191 // Dereference the node's metadata and release child references. 192 if recursive { 193 for _, f := range n.items[:n.count] { 194 if atomic.AddInt32(&f.refs, -1) == 0 { 195 // There are two sources of node dereferences: tree mutations 196 // and Version dereferences. Files should only be made obsolete 197 // during Version dereferences, during which `obsolete` will be 198 // non-nil. 199 if obsolete == nil { 200 panic(fmt.Sprintf("file metadata %s dereferenced to zero during tree mutation", f.FileNum)) 201 } 202 *obsolete = append(*obsolete, f) 203 } 204 } 205 if !n.leaf { 206 for i := int16(0); i <= n.count; i++ { 207 n.children[i].decRef(true /* recursive */, obsolete) 208 } 209 } 210 } 211 } 212 213 // clone creates a clone of the receiver with a single reference count. 214 func (n *node) clone() *node { 215 var c *node 216 if n.leaf { 217 c = newLeafNode() 218 } else { 219 c = newNode() 220 } 221 // NB: copy field-by-field without touching n.ref to avoid 222 // triggering the race detector and looking like a data race. 223 c.count = n.count 224 c.items = n.items 225 // Increase the refcount of each contained item. 226 for _, f := range n.items[:n.count] { 227 atomic.AddInt32(&f.refs, 1) 228 } 229 if !c.leaf { 230 // Copy children and increase each refcount. 231 c.children = n.children 232 for i := int16(0); i <= c.count; i++ { 233 c.children[i].incRef() 234 } 235 } 236 return c 237 } 238 239 func (n *node) insertAt(index int, item *FileMetadata, nd *node) { 240 if index < int(n.count) { 241 copy(n.items[index+1:n.count+1], n.items[index:n.count]) 242 if !n.leaf { 243 copy(n.children[index+2:n.count+2], n.children[index+1:n.count+1]) 244 } 245 } 246 n.items[index] = item 247 if !n.leaf { 248 n.children[index+1] = nd 249 } 250 n.count++ 251 } 252 253 func (n *node) pushBack(item *FileMetadata, nd *node) { 254 n.items[n.count] = item 255 if !n.leaf { 256 n.children[n.count+1] = nd 257 } 258 n.count++ 259 } 260 261 func (n *node) pushFront(item *FileMetadata, nd *node) { 262 if !n.leaf { 263 copy(n.children[1:n.count+2], n.children[:n.count+1]) 264 n.children[0] = nd 265 } 266 copy(n.items[1:n.count+1], n.items[:n.count]) 267 n.items[0] = item 268 n.count++ 269 } 270 271 // removeAt removes a value at a given index, pulling all subsequent values 272 // back. 273 func (n *node) removeAt(index int) (*FileMetadata, *node) { 274 var child *node 275 if !n.leaf { 276 child = n.children[index+1] 277 copy(n.children[index+1:n.count], n.children[index+2:n.count+1]) 278 n.children[n.count] = nil 279 } 280 n.count-- 281 out := n.items[index] 282 copy(n.items[index:n.count], n.items[index+1:n.count+1]) 283 n.items[n.count] = nil 284 return out, child 285 } 286 287 // popBack removes and returns the last element in the list. 288 func (n *node) popBack() (*FileMetadata, *node) { 289 n.count-- 290 out := n.items[n.count] 291 n.items[n.count] = nil 292 if n.leaf { 293 return out, nil 294 } 295 child := n.children[n.count+1] 296 n.children[n.count+1] = nil 297 return out, child 298 } 299 300 // popFront removes and returns the first element in the list. 301 func (n *node) popFront() (*FileMetadata, *node) { 302 n.count-- 303 var child *node 304 if !n.leaf { 305 child = n.children[0] 306 copy(n.children[:n.count+1], n.children[1:n.count+2]) 307 n.children[n.count+1] = nil 308 } 309 out := n.items[0] 310 copy(n.items[:n.count], n.items[1:n.count+1]) 311 n.items[n.count] = nil 312 return out, child 313 } 314 315 // find returns the index where the given item should be inserted into this 316 // list. 'found' is true if the item already exists in the list at the given 317 // index. 318 func (n *node) find(cmp btreeCmp, item *FileMetadata) (index int, found bool) { 319 // Logic copied from sort.Search. Inlining this gave 320 // an 11% speedup on BenchmarkBTreeDeleteInsert. 321 i, j := 0, int(n.count) 322 for i < j { 323 h := int(uint(i+j) >> 1) // avoid overflow when computing h 324 // i ≤ h < j 325 v := cmp(item, n.items[h]) 326 if v == 0 { 327 return h, true 328 } else if v > 0 { 329 i = h + 1 330 } else { 331 j = h 332 } 333 } 334 return i, false 335 } 336 337 // split splits the given node at the given index. The current node shrinks, 338 // and this function returns the item that existed at that index and a new 339 // node containing all items/children after it. 340 // 341 // Before: 342 // 343 // +-----------+ 344 // | x y z | 345 // +--/-/-\-\--+ 346 // 347 // After: 348 // 349 // +-----------+ 350 // | y | 351 // +----/-\----+ 352 // / \ 353 // v v 354 // 355 // +-----------+ +-----------+ 356 // | x | | z | 357 // +-----------+ +-----------+ 358 func (n *node) split(i int) (*FileMetadata, *node) { 359 out := n.items[i] 360 var next *node 361 if n.leaf { 362 next = newLeafNode() 363 } else { 364 next = newNode() 365 } 366 next.count = n.count - int16(i+1) 367 copy(next.items[:], n.items[i+1:n.count]) 368 for j := int16(i); j < n.count; j++ { 369 n.items[j] = nil 370 } 371 if !n.leaf { 372 copy(next.children[:], n.children[i+1:n.count+1]) 373 for j := int16(i + 1); j <= n.count; j++ { 374 n.children[j] = nil 375 } 376 } 377 n.count = int16(i) 378 return out, next 379 } 380 381 // insert inserts a item into the subtree rooted at this node, making sure no 382 // nodes in the subtree exceed maxItems items. 383 func (n *node) insert(cmp btreeCmp, item *FileMetadata) error { 384 i, found := n.find(cmp, item) 385 if found { 386 // cmp provides a total ordering of the files within a level. 387 // If we're inserting a metadata that's equal to an existing item 388 // in the tree, we're inserting a file into a level twice. 389 return errors.Errorf("files %s and %s collided on sort keys", 390 errors.Safe(item.FileNum), errors.Safe(n.items[i].FileNum)) 391 } 392 if n.leaf { 393 n.insertAt(i, item, nil) 394 return nil 395 } 396 if n.children[i].count >= maxItems { 397 splitLa, splitNode := mut(&n.children[i]).split(maxItems / 2) 398 n.insertAt(i, splitLa, splitNode) 399 400 switch cmp := cmp(item, n.items[i]); { 401 case cmp < 0: 402 // no change, we want first split node 403 case cmp > 0: 404 i++ // we want second split node 405 default: 406 // cmp provides a total ordering of the files within a level. 407 // If we're inserting a metadata that's equal to an existing item 408 // in the tree, we're inserting a file into a level twice. 409 return errors.Errorf("files %s and %s collided on sort keys", 410 errors.Safe(item.FileNum), errors.Safe(n.items[i].FileNum)) 411 } 412 } 413 return mut(&n.children[i]).insert(cmp, item) 414 } 415 416 // removeMax removes and returns the maximum item from the subtree rooted 417 // at this node. 418 func (n *node) removeMax() *FileMetadata { 419 if n.leaf { 420 n.count-- 421 out := n.items[n.count] 422 n.items[n.count] = nil 423 return out 424 } 425 child := mut(&n.children[n.count]) 426 if child.count <= minItems { 427 n.rebalanceOrMerge(int(n.count)) 428 return n.removeMax() 429 } 430 return child.removeMax() 431 } 432 433 // remove removes a item from the subtree rooted at this node. Returns 434 // the item that was removed or nil if no matching item was found. 435 func (n *node) remove(cmp btreeCmp, item *FileMetadata) (out *FileMetadata) { 436 i, found := n.find(cmp, item) 437 if n.leaf { 438 if found { 439 out, _ = n.removeAt(i) 440 return out 441 } 442 return nil 443 } 444 if n.children[i].count <= minItems { 445 // Child not large enough to remove from. 446 n.rebalanceOrMerge(i) 447 return n.remove(cmp, item) 448 } 449 child := mut(&n.children[i]) 450 if found { 451 // Replace the item being removed with the max item in our left child. 452 out = n.items[i] 453 n.items[i] = child.removeMax() 454 return out 455 } 456 // Latch is not in this node and child is large enough to remove from. 457 out = child.remove(cmp, item) 458 return out 459 } 460 461 // rebalanceOrMerge grows child 'i' to ensure it has sufficient room to remove 462 // a item from it while keeping it at or above minItems. 463 func (n *node) rebalanceOrMerge(i int) { 464 switch { 465 case i > 0 && n.children[i-1].count > minItems: 466 // Rebalance from left sibling. 467 // 468 // +-----------+ 469 // | y | 470 // +----/-\----+ 471 // / \ 472 // v v 473 // +-----------+ +-----------+ 474 // | x | | | 475 // +----------\+ +-----------+ 476 // \ 477 // v 478 // a 479 // 480 // After: 481 // 482 // +-----------+ 483 // | x | 484 // +----/-\----+ 485 // / \ 486 // v v 487 // +-----------+ +-----------+ 488 // | | | y | 489 // +-----------+ +/----------+ 490 // / 491 // v 492 // a 493 // 494 left := mut(&n.children[i-1]) 495 child := mut(&n.children[i]) 496 xLa, grandChild := left.popBack() 497 yLa := n.items[i-1] 498 child.pushFront(yLa, grandChild) 499 n.items[i-1] = xLa 500 501 case i < int(n.count) && n.children[i+1].count > minItems: 502 // Rebalance from right sibling. 503 // 504 // +-----------+ 505 // | y | 506 // +----/-\----+ 507 // / \ 508 // v v 509 // +-----------+ +-----------+ 510 // | | | x | 511 // +-----------+ +/----------+ 512 // / 513 // v 514 // a 515 // 516 // After: 517 // 518 // +-----------+ 519 // | x | 520 // +----/-\----+ 521 // / \ 522 // v v 523 // +-----------+ +-----------+ 524 // | y | | | 525 // +----------\+ +-----------+ 526 // \ 527 // v 528 // a 529 // 530 right := mut(&n.children[i+1]) 531 child := mut(&n.children[i]) 532 xLa, grandChild := right.popFront() 533 yLa := n.items[i] 534 child.pushBack(yLa, grandChild) 535 n.items[i] = xLa 536 537 default: 538 // Merge with either the left or right sibling. 539 // 540 // +-----------+ 541 // | u y v | 542 // +----/-\----+ 543 // / \ 544 // v v 545 // +-----------+ +-----------+ 546 // | x | | z | 547 // +-----------+ +-----------+ 548 // 549 // After: 550 // 551 // +-----------+ 552 // | u v | 553 // +-----|-----+ 554 // | 555 // v 556 // +-----------+ 557 // | x y z | 558 // +-----------+ 559 // 560 if i >= int(n.count) { 561 i = int(n.count - 1) 562 } 563 child := mut(&n.children[i]) 564 // Make mergeChild mutable, bumping the refcounts on its children if necessary. 565 _ = mut(&n.children[i+1]) 566 mergeLa, mergeChild := n.removeAt(i) 567 child.items[child.count] = mergeLa 568 copy(child.items[child.count+1:], mergeChild.items[:mergeChild.count]) 569 if !child.leaf { 570 copy(child.children[child.count+1:], mergeChild.children[:mergeChild.count+1]) 571 } 572 child.count += mergeChild.count + 1 573 574 mergeChild.decRef(false /* recursive */, nil) 575 } 576 } 577 578 func (n *node) invalidateAnnotation(a Annotator) { 579 // Find this annotator's annotation on this node. 580 var annot *annotation 581 for i := range n.annot { 582 if n.annot[i].annotator == a { 583 annot = &n.annot[i] 584 } 585 } 586 587 if annot != nil && annot.valid { 588 annot.valid = false 589 annot.v = a.Zero(annot.v) 590 } 591 if !n.leaf { 592 for i := int16(0); i <= n.count; i++ { 593 n.children[i].invalidateAnnotation(a) 594 } 595 } 596 } 597 598 func (n *node) annotation(a Annotator) (interface{}, bool) { 599 // Find this annotator's annotation on this node. 600 var annot *annotation 601 for i := range n.annot { 602 if n.annot[i].annotator == a { 603 annot = &n.annot[i] 604 } 605 } 606 607 // If it exists and is marked as valid, we can return it without 608 // recomputing anything. 609 if annot != nil && annot.valid { 610 return annot.v, true 611 } 612 613 if annot == nil { 614 // This is n's first time being annotated by a. 615 // Create a new zeroed annotation. 616 n.annot = append(n.annot, annotation{ 617 annotator: a, 618 v: a.Zero(nil), 619 }) 620 annot = &n.annot[len(n.annot)-1] 621 } else { 622 // There's an existing annotation that must be recomputed. 623 // Zero its value. 624 annot.v = a.Zero(annot.v) 625 } 626 627 annot.valid = true 628 for i := int16(0); i <= n.count; i++ { 629 if !n.leaf { 630 v, ok := n.children[i].annotation(a) 631 annot.v = a.Merge(v, annot.v) 632 annot.valid = annot.valid && ok 633 } 634 if i < n.count { 635 v, ok := a.Accumulate(n.items[i], annot.v) 636 annot.v = v 637 annot.valid = annot.valid && ok 638 } 639 } 640 return annot.v, annot.valid 641 } 642 643 // btree is an implementation of a B-Tree. 644 // 645 // btree stores FileMetadata in an ordered structure, allowing easy insertion, 646 // removal, and iteration. The B-Tree stores items in order based on cmp. The 647 // first level of the LSM uses a cmp function that compares sequence numbers. 648 // All other levels compare using the FileMetadata.Smallest. 649 // 650 // Write operations are not safe for concurrent mutation by multiple 651 // goroutines, but Read operations are. 652 type btree struct { 653 root *node 654 length int 655 cmp btreeCmp 656 } 657 658 // release dereferences and clears the root node of the btree, removing all 659 // items from the btree. In doing so, it decrements contained file counts. 660 // It returns a slice of newly obsolete files, if any. 661 func (t *btree) release() (obsolete []*FileMetadata) { 662 if t.root != nil { 663 t.root.decRef(true /* recursive */, &obsolete) 664 t.root = nil 665 } 666 t.length = 0 667 return obsolete 668 } 669 670 // clone clones the btree, lazily. It does so in constant time. 671 func (t *btree) clone() btree { 672 c := *t 673 if c.root != nil { 674 // Incrementing the reference count on the root node is sufficient to 675 // ensure that no node in the cloned tree can be mutated by an actor 676 // holding a reference to the original tree and vice versa. This 677 // property is upheld because the root node in the receiver btree and 678 // the returned btree will both necessarily have a reference count of at 679 // least 2 when this method returns. All tree mutations recursively 680 // acquire mutable node references (see mut) as they traverse down the 681 // tree. The act of acquiring a mutable node reference performs a clone 682 // if a node's reference count is greater than one. Cloning a node (see 683 // clone) increases the reference count on each of its children, 684 // ensuring that they have a reference count of at least 2. This, in 685 // turn, ensures that any of the child nodes that are modified will also 686 // be copied-on-write, recursively ensuring the immutability property 687 // over the entire tree. 688 c.root.incRef() 689 } 690 return c 691 } 692 693 // delete removes the provided file from the tree. 694 // It returns true if the file now has a zero reference count. 695 func (t *btree) delete(item *FileMetadata) (obsolete bool) { 696 if t.root == nil || t.root.count == 0 { 697 return false 698 } 699 if out := mut(&t.root).remove(t.cmp, item); out != nil { 700 t.length-- 701 obsolete = atomic.AddInt32(&out.refs, -1) == 0 702 } 703 if t.root.count == 0 { 704 old := t.root 705 if t.root.leaf { 706 t.root = nil 707 } else { 708 t.root = t.root.children[0] 709 } 710 old.decRef(false /* recursive */, nil) 711 } 712 return obsolete 713 } 714 715 // insert adds the given item to the tree. If a item in the tree already 716 // equals the given one, insert panics. 717 func (t *btree) insert(item *FileMetadata) error { 718 if t.root == nil { 719 t.root = newLeafNode() 720 } else if t.root.count >= maxItems { 721 splitLa, splitNode := mut(&t.root).split(maxItems / 2) 722 newRoot := newNode() 723 newRoot.count = 1 724 newRoot.items[0] = splitLa 725 newRoot.children[0] = t.root 726 newRoot.children[1] = splitNode 727 t.root = newRoot 728 } 729 atomic.AddInt32(&item.refs, 1) 730 err := mut(&t.root).insert(t.cmp, item) 731 t.length++ 732 return err 733 } 734 735 // iter returns a new iterator object. It is not safe to continue using an 736 // iterator after modifications are made to the tree. If modifications are made, 737 // create a new iterator. 738 func (t *btree) iter() iterator { 739 return iterator{r: t.root, pos: -1, cmp: t.cmp} 740 } 741 742 // height returns the height of the tree. 743 func (t *btree) height() int { 744 if t.root == nil { 745 return 0 746 } 747 h := 1 748 n := t.root 749 for !n.leaf { 750 n = n.children[0] 751 h++ 752 } 753 return h 754 } 755 756 // String returns a string description of the tree. The format is 757 // similar to the https://en.wikipedia.org/wiki/Newick_format. 758 func (t *btree) String() string { 759 if t.length == 0 { 760 return ";" 761 } 762 var b strings.Builder 763 t.root.writeString(&b) 764 return b.String() 765 } 766 767 func (n *node) writeString(b *strings.Builder) { 768 if n.leaf { 769 for i := int16(0); i < n.count; i++ { 770 if i != 0 { 771 b.WriteString(",") 772 } 773 b.WriteString(n.items[i].String()) 774 } 775 return 776 } 777 for i := int16(0); i <= n.count; i++ { 778 b.WriteString("(") 779 n.children[i].writeString(b) 780 b.WriteString(")") 781 if i < n.count { 782 b.WriteString(n.items[i].String()) 783 } 784 } 785 } 786 787 // iterStack represents a stack of (node, pos) tuples, which captures 788 // iteration state as an iterator descends a btree. 789 type iterStack struct { 790 // a contains aLen stack frames when an iterator stack is short enough. 791 // If the iterator stack overflows the capacity of iterStackArr, the stack 792 // is moved to s and aLen is set to -1. 793 a iterStackArr 794 aLen int16 // -1 when using s 795 s []iterFrame 796 } 797 798 // Used to avoid allocations for stacks below a certain size. 799 type iterStackArr [3]iterFrame 800 801 type iterFrame struct { 802 n *node 803 pos int16 804 } 805 806 func (is *iterStack) push(f iterFrame) { 807 if is.aLen == -1 { 808 is.s = append(is.s, f) 809 } else if int(is.aLen) == len(is.a) { 810 is.s = make([]iterFrame, int(is.aLen)+1, 2*int(is.aLen)) 811 copy(is.s, is.a[:]) 812 is.s[int(is.aLen)] = f 813 is.aLen = -1 814 } else { 815 is.a[is.aLen] = f 816 is.aLen++ 817 } 818 } 819 820 func (is *iterStack) pop() iterFrame { 821 if is.aLen == -1 { 822 f := is.s[len(is.s)-1] 823 is.s = is.s[:len(is.s)-1] 824 return f 825 } 826 is.aLen-- 827 return is.a[is.aLen] 828 } 829 830 func (is *iterStack) len() int { 831 if is.aLen == -1 { 832 return len(is.s) 833 } 834 return int(is.aLen) 835 } 836 837 func (is *iterStack) clone() iterStack { 838 // If the iterator is using the embedded iterStackArr, we only need to 839 // copy the struct itself. 840 if is.s == nil { 841 return *is 842 } 843 clone := *is 844 clone.s = make([]iterFrame, len(is.s)) 845 copy(clone.s, is.s) 846 return clone 847 } 848 849 func (is *iterStack) nth(n int) (f iterFrame, ok bool) { 850 if is.aLen == -1 { 851 if n >= len(is.s) { 852 return f, false 853 } 854 return is.s[n], true 855 } 856 if int16(n) >= is.aLen { 857 return f, false 858 } 859 return is.a[n], true 860 } 861 862 func (is *iterStack) reset() { 863 if is.aLen == -1 { 864 is.s = is.s[:0] 865 } else { 866 is.aLen = 0 867 } 868 } 869 870 // iterator is responsible for search and traversal within a btree. 871 type iterator struct { 872 // the root node of the B-Tree. 873 r *node 874 // n and pos make up the current position of the iterator. 875 // If valid, n.items[pos] is the current value of the iterator. 876 n *node 877 pos int16 878 // cmp dictates the ordering of the FileMetadata. 879 cmp func(*FileMetadata, *FileMetadata) int 880 // a stack of n's ancestors within the B-Tree, alongside the position 881 // taken to arrive at n. If non-empty, the bottommost frame of the stack 882 // will always contain the B-Tree root. 883 s iterStack 884 } 885 886 func (i *iterator) clone() iterator { 887 c := *i 888 c.s = i.s.clone() 889 return c 890 } 891 892 func (i *iterator) reset() { 893 i.n = i.r 894 i.pos = -1 895 i.s.reset() 896 } 897 898 func (i iterator) String() string { 899 var buf bytes.Buffer 900 for n := 0; ; n++ { 901 f, ok := i.s.nth(n) 902 if !ok { 903 break 904 } 905 fmt.Fprintf(&buf, "%p: %02d/%02d\n", f.n, f.pos, f.n.count) 906 } 907 if i.n == nil { 908 fmt.Fprintf(&buf, "<nil>: %02d", i.pos) 909 } else { 910 fmt.Fprintf(&buf, "%p: %02d/%02d", i.n, i.pos, i.n.count) 911 } 912 return buf.String() 913 } 914 915 func cmpIter(a, b iterator) int { 916 if a.r != b.r { 917 panic("compared iterators from different btrees") 918 } 919 920 // Each iterator has a stack of frames marking the path from the root node 921 // to the current iterator position. We walk both paths formed by the 922 // iterators' stacks simultaneously, descending from the shared root node, 923 // always comparing nodes at the same level in the tree. 924 // 925 // If the iterators' paths ever diverge and point to different nodes, the 926 // iterators are not equal and we use the node positions to evaluate the 927 // comparison. 928 // 929 // If an iterator's stack ends, we stop descending and use its current 930 // node and position for the final comparison. One iterator's stack may 931 // end before another's if one iterator is positioned deeper in the tree. 932 // 933 // a b 934 // +------------------------+ +--------------------------+ - 935 // | Root pos:5 | = | Root pos:5 | | 936 // +------------------------+ +--------------------------+ | stack 937 // | Root/5 pos:3 | = | Root/5 pos:3 | | frames 938 // +------------------------+ +--------------------------+ | 939 // | Root/5/3 pos:9 | > | Root/5/3 pos:1 | | 940 // +========================+ +==========================+ - 941 // | | | | 942 // | a.n: Root/5/3/9 a.pos:2| | b.n: Root/5/3/1, b.pos:5 | 943 // +------------------------+ +--------------------------+ 944 945 // Initialize with the iterator's current node and position. These are 946 // conceptually the most-recent/current frame of the iterator stack. 947 an, apos := a.n, a.pos 948 bn, bpos := b.n, b.pos 949 950 // aok, bok are set while traversing the iterator's path down the B-Tree. 951 // They're declared in the outer scope because they help distinguish the 952 // sentinel case when both iterators' first frame points to the last child 953 // of the root. If an iterator has no other frames in its stack, it's the 954 // end sentinel state which sorts after everything else. 955 var aok, bok bool 956 for i := 0; ; i++ { 957 var af, bf iterFrame 958 af, aok = a.s.nth(i) 959 bf, bok = b.s.nth(i) 960 if !aok || !bok { 961 if aok { 962 // Iterator a, unlike iterator b, still has a frame. Set an, 963 // apos so we compare using the frame from the stack. 964 an, apos = af.n, af.pos 965 } 966 if bok { 967 // Iterator b, unlike iterator a, still has a frame. Set bn, 968 // bpos so we compare using the frame from the stack. 969 bn, bpos = bf.n, bf.pos 970 } 971 break 972 } 973 974 // aok && bok 975 if af.n != bf.n { 976 panic("nonmatching nodes during btree iterator comparison") 977 } 978 switch { 979 case af.pos < bf.pos: 980 return -1 981 case af.pos > bf.pos: 982 return +1 983 default: 984 // Continue up both iterators' stacks (equivalently, down the 985 // B-Tree away from the root). 986 } 987 } 988 989 if aok && bok { 990 panic("expected one or more stacks to have been exhausted") 991 } 992 if an != bn { 993 panic("nonmatching nodes during btree iterator comparison") 994 } 995 switch { 996 case apos < bpos: 997 return -1 998 case apos > bpos: 999 return +1 1000 default: 1001 switch { 1002 case aok: 1003 // a is positioned at a leaf child at this position and b is at an 1004 // end sentinel state. 1005 return -1 1006 case bok: 1007 // b is positioned at a leaf child at this position and a is at an 1008 // end sentinel state. 1009 return +1 1010 default: 1011 return 0 1012 } 1013 } 1014 } 1015 1016 func (i *iterator) descend(n *node, pos int16) { 1017 i.s.push(iterFrame{n: n, pos: pos}) 1018 i.n = n.children[pos] 1019 i.pos = 0 1020 } 1021 1022 // ascend ascends up to the current node's parent and resets the position 1023 // to the one previously set for this parent node. 1024 func (i *iterator) ascend() { 1025 f := i.s.pop() 1026 i.n = f.n 1027 i.pos = f.pos 1028 } 1029 1030 // seek repositions the iterator over the first file for which fn returns 1031 // true, mirroring the semantics of the standard library's sort.Search 1032 // function. Like sort.Search, seek requires the iterator's B-Tree to be 1033 // ordered such that fn returns false for some (possibly empty) prefix of the 1034 // tree's files, and then true for the (possibly empty) remainder. 1035 func (i *iterator) seek(fn func(*FileMetadata) bool) { 1036 i.reset() 1037 if i.n == nil { 1038 return 1039 } 1040 1041 for { 1042 // Logic copied from sort.Search. 1043 j, k := 0, int(i.n.count) 1044 for j < k { 1045 h := int(uint(j+k) >> 1) // avoid overflow when computing h 1046 1047 // j ≤ h < k 1048 if !fn(i.n.items[h]) { 1049 j = h + 1 // preserves f(j-1) == false 1050 } else { 1051 k = h // preserves f(k) == true 1052 } 1053 } 1054 1055 i.pos = int16(j) 1056 if i.n.leaf { 1057 if i.pos == i.n.count { 1058 i.next() 1059 } 1060 return 1061 } 1062 i.descend(i.n, i.pos) 1063 } 1064 } 1065 1066 // first seeks to the first item in the btree. 1067 func (i *iterator) first() { 1068 i.reset() 1069 if i.n == nil { 1070 return 1071 } 1072 for !i.n.leaf { 1073 i.descend(i.n, 0) 1074 } 1075 i.pos = 0 1076 } 1077 1078 // last seeks to the last item in the btree. 1079 func (i *iterator) last() { 1080 i.reset() 1081 if i.n == nil { 1082 return 1083 } 1084 for !i.n.leaf { 1085 i.descend(i.n, i.n.count) 1086 } 1087 i.pos = i.n.count - 1 1088 } 1089 1090 // next positions the iterator to the item immediately following 1091 // its current position. 1092 func (i *iterator) next() { 1093 if i.n == nil { 1094 return 1095 } 1096 1097 if i.n.leaf { 1098 if i.pos < i.n.count { 1099 i.pos++ 1100 } 1101 if i.pos < i.n.count { 1102 return 1103 } 1104 for i.s.len() > 0 && i.pos >= i.n.count { 1105 i.ascend() 1106 } 1107 return 1108 } 1109 1110 i.descend(i.n, i.pos+1) 1111 for !i.n.leaf { 1112 i.descend(i.n, 0) 1113 } 1114 i.pos = 0 1115 } 1116 1117 // prev positions the iterator to the item immediately preceding 1118 // its current position. 1119 func (i *iterator) prev() { 1120 if i.n == nil { 1121 return 1122 } 1123 1124 if i.n.leaf { 1125 i.pos-- 1126 if i.pos >= 0 { 1127 return 1128 } 1129 for i.s.len() > 0 && i.pos < 0 { 1130 i.ascend() 1131 i.pos-- 1132 } 1133 return 1134 } 1135 1136 i.descend(i.n, i.pos) 1137 for !i.n.leaf { 1138 i.descend(i.n, i.n.count) 1139 } 1140 i.pos = i.n.count - 1 1141 } 1142 1143 // valid returns whether the iterator is positioned at a valid position. 1144 func (i *iterator) valid() bool { 1145 return i.r != nil && i.pos >= 0 && i.pos < i.n.count 1146 } 1147 1148 // cur returns the item at the iterator's current position. It is illegal 1149 // to call cur if the iterator is not valid. 1150 func (i *iterator) cur() *FileMetadata { 1151 return i.n.items[i.pos] 1152 }