github.com/scottcagno/storage@v1.8.0/pkg/lsmt/mtbl/rbtree.go (about) 1 package mtbl 2 3 import ( 4 "bytes" 5 "github.com/scottcagno/storage/pkg/lsmt/binary" 6 "runtime" 7 "strings" 8 ) 9 10 var empty *binary.Entry = nil 11 12 func compare(this, that *binary.Entry) int { 13 return bytes.Compare(this.Key, that.Key) 14 } 15 16 const ( 17 RED = 0 18 BLACK = 1 19 ) 20 21 type rbNode struct { 22 left *rbNode 23 right *rbNode 24 parent *rbNode 25 color uint 26 entry *binary.Entry 27 } 28 29 type RBTree = rbTree 30 31 // rbTree is a struct representing a rbTree 32 type rbTree struct { 33 NIL *rbNode 34 root *rbNode 35 count int 36 size int64 37 } 38 39 func NewRBTree() *rbTree { 40 return newRBTree() 41 } 42 43 // NewTree creates and returns a new rbTree 44 func newRBTree() *rbTree { 45 n := &rbNode{ 46 left: nil, 47 right: nil, 48 parent: nil, 49 color: BLACK, 50 entry: empty, 51 } 52 return &rbTree{ 53 NIL: n, 54 root: n, 55 count: 0, 56 size: 0, 57 } 58 } 59 60 func (t *rbTree) Count() int { 61 return t.count 62 } 63 64 // Has tests and returns a boolean value if the 65 // provided key exists in the tree 66 func (t *rbTree) Has(entry *binary.Entry) bool { 67 _, ok := t.getInternal(entry) 68 return ok 69 } 70 71 // HasKey tests and returns a boolean value if the 72 // provided key exists in the tree 73 func (t *rbTree) HasKey(k string) bool { 74 e, ok := t.getInternal(&binary.Entry{Key: []byte(k)}) 75 return ok && e != nil && e.Value != nil 76 } 77 78 // Add adds the provided key and value only if it does not 79 // already exist in the tree. It returns false if the key and 80 // value was not able to be added, and true if it was added 81 // successfully 82 func (t *rbTree) Add(entry *binary.Entry) bool { 83 _, ok := t.getInternal(entry) 84 if ok { 85 // key already exists, so we are not adding 86 return false 87 } 88 t.putInternal(entry) 89 return true 90 } 91 92 func (t *rbTree) Put(entry *binary.Entry) (*binary.Entry, bool) { 93 return t.putInternal(entry) 94 } 95 96 // UpsertAndCheckIfFull updates the provided entry if it already 97 // exists or inserts the supplied entry as a new entry if it 98 // does not exist. UpsertAndCheckIfFull returns the current size 99 // in bytes after performing the insert or update. It also returns 100 // a boolean reporting true if the tree has met or exceeded the 101 // provided threshold, and false if the current size is less than 102 // the provided threshold. 103 func (t *rbTree) UpsertAndCheckIfFull(entry *binary.Entry, threshold int64) (int64, bool) { 104 // TODO: possibly perform pre-check in future somehow?? 105 // 106 // insert the entry in to the mem-table 107 t.putInternal(entry) 108 if t.size >= threshold { 109 // size is greater or equal to supplied threshold 110 // return size along with a true value (need flush) 111 return t.size, true 112 } 113 // size has not met or exceeded supplied threshold 114 // simply return the current size, and a false value 115 return t.size, false 116 } 117 118 // UpsertBatchAndCheckIfFull ranges the batch of entries, and it 119 // updates the provided entry if it already exists or inserts the 120 // supplied entry as a new entry if it does not exist. When it's 121 // finished, UpsertBatchAndCheckIfFull returns the current size in 122 // bytes after performing the insert or update. It also returns a 123 // boolean value reporting true if the tree has met or exceeded the 124 // provided threshold, and false if the current size is less than 125 // the provided threshold. 126 func (t *rbTree) UpsertBatchAndCheckIfFull(batch *binary.Batch, threshold int64) (int64, bool) { 127 // TODO: possibly perform pre-check in future somehow?? 128 // 129 // range the batch entries 130 for _, e := range batch.Entries { 131 // insert the entry in to the mem-table 132 t.putInternal(e) 133 } 134 // TODO: possibly think about dealing with partial batches?? 135 if t.size >= threshold { 136 // size is greater or equal to supplied threshold 137 // return size along with a true value (need flush) 138 return t.size, true 139 } 140 // size has not met or exceeded supplied threshold 141 // simply return the current size, and a false value 142 return t.size, false 143 } 144 145 func (t *rbTree) PutBatch(batch *binary.Batch) { 146 for _, entry := range batch.Entries { 147 t.putInternal(entry) 148 } 149 } 150 151 func (t *rbTree) putInternal(entry *binary.Entry) (*binary.Entry, bool) { 152 if entry == nil { 153 return nil, false 154 } 155 // insert return the node along with 156 // a boolean value signaling true if 157 // the node was updated, and false if 158 // the node was newly added. 159 ret, ok := t.insert(&rbNode{ 160 left: t.NIL, 161 right: t.NIL, 162 parent: t.NIL, 163 color: RED, 164 entry: entry, 165 }) 166 return ret.entry, ok 167 } 168 169 func (t *rbTree) Get(entry *binary.Entry) (*binary.Entry, bool) { 170 return t.getInternal(entry) 171 } 172 173 // GetNearMin performs an approximate search for the specified key 174 // and returns the closest key that is less than (the predecessor) 175 // to the searched key as well as a boolean reporting true if an 176 // exact match was found for the key, and false if it is unknown 177 // or and exact match was not found 178 func (t *rbTree) GetNearMin(entry *binary.Entry) (*binary.Entry, bool) { 179 if entry == nil { 180 return nil, false 181 } 182 ret := t.searchApprox(&rbNode{ 183 left: t.NIL, 184 right: t.NIL, 185 parent: t.NIL, 186 color: RED, 187 entry: entry, 188 }) 189 prev := t.predecessor(ret).entry 190 if prev == nil { 191 prev, _ = t.Min() 192 } 193 return prev, compare(ret.entry, entry) == 0 194 } 195 196 // GetNearMax performs an approximate search for the specified key 197 // and returns the closest key that is greater than (the successor) 198 // to the searched key as well as a boolean reporting true if an 199 // exact match was found for the key, and false if it is unknown or 200 // and exact match was not found 201 func (t *rbTree) GetNearMax(entry *binary.Entry) (*binary.Entry, bool) { 202 if entry == nil { 203 return nil, false 204 } 205 ret := t.searchApprox(&rbNode{ 206 left: t.NIL, 207 right: t.NIL, 208 parent: t.NIL, 209 color: RED, 210 entry: entry, 211 }) 212 return t.successor(ret).entry, compare(ret.entry, entry) == 0 213 } 214 215 // GetApproxPrevNext performs an approximate search for the specified key 216 // and returns the searched key, the predecessor, and the successor and a 217 // boolean reporting true if an exact match was found for the key, and false 218 // if it is unknown or and exact match was not found 219 func (t *rbTree) GetApproxPrevNext(entry *binary.Entry) (*binary.Entry, *binary.Entry, *binary.Entry, bool) { 220 if entry == nil { 221 return nil, nil, nil, false 222 } 223 ret := t.searchApprox(&rbNode{ 224 left: t.NIL, 225 right: t.NIL, 226 parent: t.NIL, 227 color: RED, 228 entry: entry, 229 }) 230 return ret.entry, t.predecessor(ret).entry, t.successor(ret).entry, 231 compare(ret.entry, entry) == 0 232 } 233 234 func (t *rbTree) getInternal(entry *binary.Entry) (*binary.Entry, bool) { 235 if entry == nil { 236 return nil, false 237 } 238 ret := t.search(&rbNode{ 239 left: t.NIL, 240 right: t.NIL, 241 parent: t.NIL, 242 color: RED, 243 entry: entry, 244 }) 245 return ret.entry, ret.entry != nil 246 } 247 248 func (t *rbTree) Del(entry *binary.Entry) (*binary.Entry, bool) { 249 return t.delInternal(entry) 250 } 251 252 func (t *rbTree) delInternal(entry *binary.Entry) (*binary.Entry, bool) { 253 if entry == nil { 254 return nil, false 255 } 256 cnt := t.count 257 ret := t.delete(&rbNode{ 258 left: t.NIL, 259 right: t.NIL, 260 parent: t.NIL, 261 color: RED, 262 entry: entry, 263 }) 264 return ret.entry, cnt == t.count+1 265 } 266 267 func (t *rbTree) Len() int { 268 return t.count 269 } 270 271 // Size returns the size in bytes 272 func (t *rbTree) Size() int64 { 273 return t.size 274 } 275 276 func (t *rbTree) Min() (*binary.Entry, bool) { 277 x := t.min(t.root) 278 if x == t.NIL { 279 return nil, false 280 } 281 return x.entry, true 282 } 283 284 func (t *rbTree) Max() (*binary.Entry, bool) { 285 x := t.max(t.root) 286 if x == t.NIL { 287 return nil, false 288 } 289 return x.entry, true 290 } 291 292 // helper function for clone 293 func (t *rbTree) cloneEntries(t2 *rbTree) { 294 t.ascend(t.root, t.min(t.root).entry, func(e *binary.Entry) bool { 295 t2.putInternal(e) 296 return true 297 }) 298 } 299 300 type Iterator func(entry *binary.Entry) bool 301 302 func (t *rbTree) Scan(iter Iterator) { 303 t.ascend(t.root, t.min(t.root).entry, iter) 304 } 305 306 func (t *rbTree) ScanBack(iter Iterator) { 307 t.descend(t.root, t.max(t.root).entry, iter) 308 } 309 310 func (t *rbTree) ScanRange(start, end *binary.Entry, iter Iterator) { 311 t.ascendRange(t.root, start, end, iter) 312 } 313 314 func (t *rbTree) String() string { 315 var sb strings.Builder 316 t.ascend(t.root, t.min(t.root).entry, func(entry *binary.Entry) bool { 317 sb.WriteString(entry.String()) 318 return true 319 }) 320 return sb.String() 321 } 322 323 func (t *rbTree) Close() { 324 t.NIL = nil 325 t.root = nil 326 t.count = 0 327 return 328 } 329 330 func (t *rbTree) Reset() { 331 t.NIL = nil 332 t.root = nil 333 t.count = 0 334 runtime.GC() 335 n := &rbNode{ 336 left: nil, 337 right: nil, 338 parent: nil, 339 color: BLACK, 340 entry: empty, 341 } 342 t.NIL = n 343 t.root = n 344 t.count = 0 345 t.size = 0 346 } 347 348 func (t *rbTree) insert(z *rbNode) (*rbNode, bool) { 349 x := t.root 350 y := t.NIL 351 for x != t.NIL { 352 y = x 353 if compare(z.entry, x.entry) == -1 { 354 x = x.left 355 } else if compare(x.entry, z.entry) == -1 { 356 x = x.right 357 } else { 358 t.size -= int64(x.entry.Size()) 359 t.size += int64(z.entry.Size()) 360 // originally we were just returning x 361 // without updating the RBEntry, but if we 362 // want it to have similar behavior to 363 // a hashmap then we need to update any 364 // entries that already exist in the tree 365 x.entry = z.entry 366 return x, true // true means an existing 367 // value was found and updated. It should 368 // be noted that we don't need to re-balance 369 // the tree because they keys are not changing 370 // and the tree is balance is maintained by 371 // the keys and not their values. 372 } 373 } 374 z.parent = y 375 if y == t.NIL { 376 t.root = z 377 } else if compare(z.entry, y.entry) == -1 { 378 y.left = z 379 } else { 380 y.right = z 381 } 382 t.count++ 383 t.size += int64(z.entry.Size()) 384 t.insertFixup(z) 385 return z, false 386 } 387 388 func (t *rbTree) leftRotate(x *rbNode) { 389 if x.right == t.NIL { 390 return 391 } 392 y := x.right 393 x.right = y.left 394 if y.left != t.NIL { 395 y.left.parent = x 396 } 397 y.parent = x.parent 398 if x.parent == t.NIL { 399 t.root = y 400 } else if x == x.parent.left { 401 x.parent.left = y 402 } else { 403 x.parent.right = y 404 } 405 y.left = x 406 x.parent = y 407 } 408 409 func (t *rbTree) rightRotate(x *rbNode) { 410 if x.left == t.NIL { 411 return 412 } 413 y := x.left 414 x.left = y.right 415 if y.right != t.NIL { 416 y.right.parent = x 417 } 418 y.parent = x.parent 419 420 if x.parent == t.NIL { 421 t.root = y 422 } else if x == x.parent.left { 423 x.parent.left = y 424 } else { 425 x.parent.right = y 426 } 427 428 y.right = x 429 x.parent = y 430 } 431 432 func (t *rbTree) insertFixup(z *rbNode) { 433 for z.parent.color == RED { 434 if z.parent == z.parent.parent.left { 435 y := z.parent.parent.right 436 if y.color == RED { 437 z.parent.color = BLACK 438 y.color = BLACK 439 z.parent.parent.color = RED 440 z = z.parent.parent 441 } else { 442 if z == z.parent.right { 443 z = z.parent 444 t.leftRotate(z) 445 } 446 z.parent.color = BLACK 447 z.parent.parent.color = RED 448 t.rightRotate(z.parent.parent) 449 } 450 } else { 451 y := z.parent.parent.left 452 if y.color == RED { 453 z.parent.color = BLACK 454 y.color = BLACK 455 z.parent.parent.color = RED 456 z = z.parent.parent 457 } else { 458 if z == z.parent.left { 459 z = z.parent 460 t.rightRotate(z) 461 } 462 z.parent.color = BLACK 463 z.parent.parent.color = RED 464 t.leftRotate(z.parent.parent) 465 } 466 } 467 } 468 t.root.color = BLACK 469 } 470 471 // trying out a slightly different search method 472 // that (hopefully) will not return nil values and 473 // instead will return approximate node matches 474 func (t *rbTree) searchApprox(x *rbNode) *rbNode { 475 p := t.root 476 for p != t.NIL { 477 if compare(p.entry, x.entry) == -1 { 478 if p.right == t.NIL { 479 break 480 } 481 p = p.right 482 } else if compare(x.entry, p.entry) == -1 { 483 if p.left == t.NIL { 484 break 485 } 486 p = p.left 487 } else { 488 break 489 } 490 } 491 return p 492 } 493 494 func (t *rbTree) search(x *rbNode) *rbNode { 495 p := t.root 496 for p != t.NIL { 497 if compare(p.entry, x.entry) == -1 { 498 p = p.right 499 } else if compare(x.entry, p.entry) == -1 { 500 p = p.left 501 } else { 502 break 503 } 504 } 505 return p 506 } 507 508 // min traverses from root to left recursively until left is NIL 509 func (t *rbTree) min(x *rbNode) *rbNode { 510 if x == t.NIL { 511 return t.NIL 512 } 513 for x.left != t.NIL { 514 x = x.left 515 } 516 return x 517 } 518 519 // max traverses from root to right recursively until right is NIL 520 func (t *rbTree) max(x *rbNode) *rbNode { 521 if x == t.NIL { 522 return t.NIL 523 } 524 for x.right != t.NIL { 525 x = x.right 526 } 527 return x 528 } 529 530 func (t *rbTree) predecessor(x *rbNode) *rbNode { 531 if x == t.NIL { 532 return t.NIL 533 } 534 if x.left != t.NIL { 535 return t.max(x.left) 536 } 537 y := x.parent 538 for y != t.NIL && x == y.left { 539 x = y 540 y = y.parent 541 } 542 return y 543 } 544 545 func (t *rbTree) successor(x *rbNode) *rbNode { 546 if x == t.NIL { 547 return t.NIL 548 } 549 if x.right != t.NIL { 550 return t.min(x.right) 551 } 552 y := x.parent 553 for y != t.NIL && x == y.right { 554 x = y 555 y = y.parent 556 } 557 return y 558 } 559 560 func (t *rbTree) delete(key *rbNode) *rbNode { 561 z := t.search(key) 562 if z == t.NIL { 563 return t.NIL 564 } 565 ret := &rbNode{t.NIL, t.NIL, t.NIL, z.color, z.entry} 566 var y *rbNode 567 var x *rbNode 568 if z.left == t.NIL || z.right == t.NIL { 569 y = z 570 } else { 571 y = t.successor(z) 572 } 573 if y.left != t.NIL { 574 x = y.left 575 } else { 576 x = y.right 577 } 578 x.parent = y.parent 579 580 if y.parent == t.NIL { 581 t.root = x 582 } else if y == y.parent.left { 583 y.parent.left = x 584 } else { 585 y.parent.right = x 586 } 587 if y != z { 588 z.entry = y.entry 589 } 590 if y.color == BLACK { 591 t.deleteFixup(x) 592 } 593 t.size -= int64(ret.entry.Size()) 594 t.count-- 595 return ret 596 } 597 598 func (t *rbTree) deleteFixup(x *rbNode) { 599 for x != t.root && x.color == BLACK { 600 if x == x.parent.left { 601 w := x.parent.right 602 if w.color == RED { 603 w.color = BLACK 604 x.parent.color = RED 605 t.leftRotate(x.parent) 606 w = x.parent.right 607 } 608 if w.left.color == BLACK && w.right.color == BLACK { 609 w.color = RED 610 x = x.parent 611 } else { 612 if w.right.color == BLACK { 613 w.left.color = BLACK 614 w.color = RED 615 t.rightRotate(w) 616 w = x.parent.right 617 } 618 w.color = x.parent.color 619 x.parent.color = BLACK 620 w.right.color = BLACK 621 t.leftRotate(x.parent) 622 // this is to exit while loop 623 x = t.root 624 } 625 } else { 626 w := x.parent.left 627 if w.color == RED { 628 w.color = BLACK 629 x.parent.color = RED 630 t.rightRotate(x.parent) 631 w = x.parent.left 632 } 633 if w.left.color == BLACK && w.right.color == BLACK { 634 w.color = RED 635 x = x.parent 636 } else { 637 if w.left.color == BLACK { 638 w.right.color = BLACK 639 w.color = RED 640 t.leftRotate(w) 641 w = x.parent.left 642 } 643 w.color = x.parent.color 644 x.parent.color = BLACK 645 w.left.color = BLACK 646 t.rightRotate(x.parent) 647 x = t.root 648 } 649 } 650 } 651 x.color = BLACK 652 } 653 654 func (t *rbTree) ascend(x *rbNode, entry *binary.Entry, iter Iterator) bool { 655 if x == t.NIL { 656 return true 657 } 658 if !(compare(x.entry, entry) == -1) { 659 if !t.ascend(x.left, entry, iter) { 660 return false 661 } 662 if !iter(x.entry) { 663 return false 664 } 665 } 666 return t.ascend(x.right, entry, iter) 667 } 668 669 func (t *rbTree) descend(x *rbNode, pivot *binary.Entry, iter Iterator) bool { 670 if x == t.NIL { 671 return true 672 } 673 if !(compare(pivot, x.entry) == -1) { 674 if !t.descend(x.right, pivot, iter) { 675 return false 676 } 677 if !iter(x.entry) { 678 return false 679 } 680 } 681 return t.descend(x.left, pivot, iter) 682 } 683 684 func (t *rbTree) ascendRange(x *rbNode, inf, sup *binary.Entry, iter Iterator) bool { 685 if x == t.NIL { 686 return true 687 } 688 if !(compare(x.entry, sup) == -1) { 689 return t.ascendRange(x.left, inf, sup, iter) 690 } 691 if compare(x.entry, inf) == -1 { 692 return t.ascendRange(x.right, inf, sup, iter) 693 } 694 if !t.ascendRange(x.left, inf, sup, iter) { 695 return false 696 } 697 if !iter(x.entry) { 698 return false 699 } 700 return t.ascendRange(x.right, inf, sup, iter) 701 }