github.com/songzhibin97/go-baseutils@v0.0.2-0.20240302024150-487d8ce9c082/structure/trees/btree/btree.go (about) 1 // Package btree implements a B tree. 2 // 3 // According to Knuth's definition, a B-tree of order m is a tree which satisfies the following properties: 4 // - Every node has at most m children. 5 // - Every non-leaf node (except root) has at least âm/2â children. 6 // - The root has at least two children if it is not a leaf node. 7 // - A non-leaf node with k children contains kâ1 keys. 8 // - All leaves appear in the same level 9 // 10 // Structure is not thread safe. 11 // 12 // References: https://en.wikipedia.org/wiki/B-tree 13 package btree 14 15 import ( 16 "bytes" 17 "encoding/json" 18 "fmt" 19 "github.com/songzhibin97/go-baseutils/base/bcomparator" 20 "github.com/songzhibin97/go-baseutils/structure/trees" 21 22 "strings" 23 ) 24 25 // Assert Tree implementation 26 var _ trees.Tree[any] = (*Tree[any, any])(nil) 27 28 // Tree holds elements of the B-tree 29 type Tree[K, V any] struct { 30 Root *Node[K, V] // Root node 31 Comparator bcomparator.Comparator[K] // Key comparator 32 size int // Total number of keys in the tree 33 m int // order (maximum number of children) 34 zeroK K 35 zeroV V 36 } 37 38 // Node is a single element within the tree 39 type Node[K, V any] struct { 40 Parent *Node[K, V] 41 Entries []*Entry[K, V] // Contained keys in node 42 Children []*Node[K, V] // Children nodes 43 } 44 45 // Entry represents the key-value pair contained within nodes 46 type Entry[K, V any] struct { 47 Key K 48 Value V 49 } 50 51 // NewWith instantiates a B-tree with the order (maximum number of children) and a custom key comparator. 52 func NewWith[K, V any](order int, comparator bcomparator.Comparator[K]) *Tree[K, V] { 53 if order < 3 { 54 panic("Invalid order, should be at least 3") 55 } 56 return &Tree[K, V]{m: order, Comparator: comparator} 57 } 58 59 // NewWithIntComparator instantiates a B-tree with the order (maximum number of children) and the IntComparator, i.e. keys are of type int. 60 func NewWithIntComparator[V any](order int) *Tree[int, V] { 61 return NewWith[int, V](order, bcomparator.IntComparator()) 62 } 63 64 // NewWithStringComparator instantiates a B-tree with the order (maximum number of children) and the StringComparator, i.e. keys are of type string. 65 func NewWithStringComparator[V any](order int) *Tree[string, V] { 66 return NewWith[string, V](order, bcomparator.StringComparator()) 67 } 68 69 // Put inserts key-value pair node into the tree. 70 // If key already exists, then its value is updated with the new value. 71 // Key should adhere to the comparator's type assertion, otherwise method panics. 72 func (tree *Tree[K, V]) Put(key K, value V) { 73 entry := &Entry[K, V]{Key: key, Value: value} 74 75 if tree.Root == nil { 76 tree.Root = &Node[K, V]{Entries: []*Entry[K, V]{entry}, Children: []*Node[K, V]{}} 77 tree.size++ 78 return 79 } 80 81 if tree.insert(tree.Root, entry) { 82 tree.size++ 83 } 84 } 85 86 // Get searches the node in the tree by key and returns its value or nil if key is not found in tree. 87 // Second return parameter is true if key was found, otherwise false. 88 // Key should adhere to the comparator's type assertion, otherwise method panics. 89 func (tree *Tree[K, V]) Get(key K) (value V, found bool) { 90 node, index, found := tree.searchRecursively(tree.Root, key) 91 if found { 92 return node.Entries[index].Value, true 93 } 94 return tree.zeroV, false 95 } 96 97 // GetNode searches the node in the tree by key and returns its node or nil if key is not found in tree. 98 // Key should adhere to the comparator's type assertion, otherwise method panics. 99 func (tree *Tree[K, V]) GetNode(key K) *Node[K, V] { 100 node, _, _ := tree.searchRecursively(tree.Root, key) 101 return node 102 } 103 104 // Remove remove the node from the tree by key. 105 // Key should adhere to the comparator's type assertion, otherwise method panics. 106 func (tree *Tree[K, V]) Remove(key K) { 107 node, index, found := tree.searchRecursively(tree.Root, key) 108 if found { 109 tree.delete(node, index) 110 tree.size-- 111 } 112 } 113 114 // Empty returns true if tree does not contain any nodes 115 func (tree *Tree[K, V]) Empty() bool { 116 return tree.size == 0 117 } 118 119 // Size returns number of nodes in the tree. 120 func (tree *Tree[K, V]) Size() int { 121 return tree.size 122 } 123 124 // Size returns the number of elements stored in the subtree. 125 // Computed dynamically on each call, i.e. the subtree is traversed to count the number of the nodes. 126 func (node *Node[K, V]) Size() int { 127 if node == nil { 128 return 0 129 } 130 size := 1 131 for _, child := range node.Children { 132 size += child.Size() 133 } 134 return size 135 } 136 137 // Keys returns all keys in-order 138 func (tree *Tree[K, V]) Keys() []K { 139 keys := make([]K, tree.size) 140 it := tree.Iterator() 141 for i := 0; it.Next(); i++ { 142 keys[i] = it.Key() 143 } 144 return keys 145 } 146 147 // Values returns all values in-order based on the key. 148 func (tree *Tree[K, V]) Values() []V { 149 values := make([]V, tree.size) 150 it := tree.Iterator() 151 for i := 0; it.Next(); i++ { 152 values[i] = it.Value() 153 } 154 return values 155 } 156 157 // Clear removes all nodes from the tree. 158 func (tree *Tree[K, V]) Clear() { 159 tree.Root = nil 160 tree.size = 0 161 } 162 163 // Height returns the height of the tree. 164 func (tree *Tree[K, V]) Height() int { 165 return tree.Root.height() 166 } 167 168 // Left returns the left-most (min) node or nil if tree is empty. 169 func (tree *Tree[K, V]) Left() *Node[K, V] { 170 return tree.left(tree.Root) 171 } 172 173 // LeftKey returns the left-most (min) key or nil if tree is empty. 174 func (tree *Tree[K, V]) LeftKey() K { 175 if left := tree.Left(); left != nil { 176 return left.Entries[0].Key 177 } 178 return tree.zeroK 179 } 180 181 // LeftValue returns the left-most value or nil if tree is empty. 182 func (tree *Tree[K, V]) LeftValue() V { 183 if left := tree.Left(); left != nil { 184 return left.Entries[0].Value 185 } 186 return tree.zeroV 187 } 188 189 // Right returns the right-most (max) node or nil if tree is empty. 190 func (tree *Tree[K, V]) Right() *Node[K, V] { 191 return tree.right(tree.Root) 192 } 193 194 // RightKey returns the right-most (max) key or nil if tree is empty. 195 func (tree *Tree[K, V]) RightKey() K { 196 if right := tree.Right(); right != nil { 197 return right.Entries[len(right.Entries)-1].Key 198 } 199 return tree.zeroK 200 } 201 202 // RightValue returns the right-most value or nil if tree is empty. 203 func (tree *Tree[K, V]) RightValue() V { 204 if right := tree.Right(); right != nil { 205 return right.Entries[len(right.Entries)-1].Value 206 } 207 return tree.zeroV 208 } 209 210 // String returns a string representation of container (for debugging purposes) 211 func (tree *Tree[K, V]) String() string { 212 var buffer bytes.Buffer 213 buffer.WriteString("BTree\n") 214 if !tree.Empty() { 215 tree.output(&buffer, tree.Root, 0, true) 216 } 217 return buffer.String() 218 } 219 220 func (entry *Entry[K, V]) String() string { 221 return fmt.Sprintf("%v", entry.Key) 222 } 223 224 func (tree *Tree[K, V]) output(buffer *bytes.Buffer, node *Node[K, V], level int, isTail bool) { 225 for e := 0; e < len(node.Entries)+1; e++ { 226 if e < len(node.Children) { 227 tree.output(buffer, node.Children[e], level+1, true) 228 } 229 if e < len(node.Entries) { 230 buffer.WriteString(strings.Repeat(" ", level)) 231 buffer.WriteString(fmt.Sprintf("%v", node.Entries[e].Key) + "\n") 232 } 233 } 234 } 235 236 func (node *Node[K, V]) height() int { 237 height := 0 238 for ; node != nil; node = node.Children[0] { 239 height++ 240 if len(node.Children) == 0 { 241 break 242 } 243 } 244 return height 245 } 246 247 func (tree *Tree[K, V]) isLeaf(node *Node[K, V]) bool { 248 return len(node.Children) == 0 249 } 250 251 func (tree *Tree[K, V]) isFull(node *Node[K, V]) bool { 252 return len(node.Entries) == tree.maxEntries() 253 } 254 255 func (tree *Tree[K, V]) shouldSplit(node *Node[K, V]) bool { 256 return len(node.Entries) > tree.maxEntries() 257 } 258 259 func (tree *Tree[K, V]) maxChildren() int { 260 return tree.m 261 } 262 263 func (tree *Tree[K, V]) minChildren() int { 264 return (tree.m + 1) / 2 // ceil(m/2) 265 } 266 267 func (tree *Tree[K, V]) maxEntries() int { 268 return tree.maxChildren() - 1 269 } 270 271 func (tree *Tree[K, V]) minEntries() int { 272 return tree.minChildren() - 1 273 } 274 275 func (tree *Tree[K, V]) middle() int { 276 return (tree.m - 1) / 2 // "-1" to favor right nodes to have more keys when splitting 277 } 278 279 // search searches only within the single node among its entries 280 func (tree *Tree[K, V]) search(node *Node[K, V], key K) (index int, found bool) { 281 low, high := 0, len(node.Entries)-1 282 var mid int 283 for low <= high { 284 mid = (high + low) / 2 285 compare := tree.Comparator(key, node.Entries[mid].Key) 286 switch { 287 case compare > 0: 288 low = mid + 1 289 case compare < 0: 290 high = mid - 1 291 case compare == 0: 292 return mid, true 293 } 294 } 295 return low, false 296 } 297 298 // searchRecursively searches recursively down the tree starting at the startNode 299 func (tree *Tree[K, V]) searchRecursively(startNode *Node[K, V], key K) (node *Node[K, V], index int, found bool) { 300 if tree.Empty() { 301 return nil, -1, false 302 } 303 node = startNode 304 for { 305 index, found = tree.search(node, key) 306 if found { 307 return node, index, true 308 } 309 if tree.isLeaf(node) { 310 return nil, -1, false 311 } 312 node = node.Children[index] 313 } 314 } 315 316 func (tree *Tree[K, V]) insert(node *Node[K, V], entry *Entry[K, V]) (inserted bool) { 317 if tree.isLeaf(node) { 318 return tree.insertIntoLeaf(node, entry) 319 } 320 return tree.insertIntoInternal(node, entry) 321 } 322 323 func (tree *Tree[K, V]) insertIntoLeaf(node *Node[K, V], entry *Entry[K, V]) (inserted bool) { 324 insertPosition, found := tree.search(node, entry.Key) 325 if found { 326 node.Entries[insertPosition] = entry 327 return false 328 } 329 // Insert entry's key in the middle of the node 330 node.Entries = append(node.Entries, nil) 331 copy(node.Entries[insertPosition+1:], node.Entries[insertPosition:]) 332 node.Entries[insertPosition] = entry 333 tree.split(node) 334 return true 335 } 336 337 func (tree *Tree[K, V]) insertIntoInternal(node *Node[K, V], entry *Entry[K, V]) (inserted bool) { 338 insertPosition, found := tree.search(node, entry.Key) 339 if found { 340 node.Entries[insertPosition] = entry 341 return false 342 } 343 return tree.insert(node.Children[insertPosition], entry) 344 } 345 346 func (tree *Tree[K, V]) split(node *Node[K, V]) { 347 if !tree.shouldSplit(node) { 348 return 349 } 350 351 if node == tree.Root { 352 tree.splitRoot() 353 return 354 } 355 356 tree.splitNonRoot(node) 357 } 358 359 func (tree *Tree[K, V]) splitNonRoot(node *Node[K, V]) { 360 middle := tree.middle() 361 parent := node.Parent 362 363 left := &Node[K, V]{Entries: append([]*Entry[K, V](nil), node.Entries[:middle]...), Parent: parent} 364 right := &Node[K, V]{Entries: append([]*Entry[K, V](nil), node.Entries[middle+1:]...), Parent: parent} 365 366 // Move children from the node to be split into left and right nodes 367 if !tree.isLeaf(node) { 368 left.Children = append([]*Node[K, V](nil), node.Children[:middle+1]...) 369 right.Children = append([]*Node[K, V](nil), node.Children[middle+1:]...) 370 setParent(left.Children, left) 371 setParent(right.Children, right) 372 } 373 374 insertPosition, _ := tree.search(parent, node.Entries[middle].Key) 375 376 // Insert middle key into parent 377 parent.Entries = append(parent.Entries, nil) 378 copy(parent.Entries[insertPosition+1:], parent.Entries[insertPosition:]) 379 parent.Entries[insertPosition] = node.Entries[middle] 380 381 // Set child left of inserted key in parent to the created left node 382 parent.Children[insertPosition] = left 383 384 // Set child right of inserted key in parent to the created right node 385 parent.Children = append(parent.Children, nil) 386 copy(parent.Children[insertPosition+2:], parent.Children[insertPosition+1:]) 387 parent.Children[insertPosition+1] = right 388 389 tree.split(parent) 390 } 391 392 func (tree *Tree[K, V]) splitRoot() { 393 middle := tree.middle() 394 395 left := &Node[K, V]{Entries: append([]*Entry[K, V](nil), tree.Root.Entries[:middle]...)} 396 right := &Node[K, V]{Entries: append([]*Entry[K, V](nil), tree.Root.Entries[middle+1:]...)} 397 398 // Move children from the node to be split into left and right nodes 399 if !tree.isLeaf(tree.Root) { 400 left.Children = append([]*Node[K, V](nil), tree.Root.Children[:middle+1]...) 401 right.Children = append([]*Node[K, V](nil), tree.Root.Children[middle+1:]...) 402 setParent(left.Children, left) 403 setParent(right.Children, right) 404 } 405 406 // Root is a node with one entry and two children (left and right) 407 newRoot := &Node[K, V]{ 408 Entries: []*Entry[K, V]{tree.Root.Entries[middle]}, 409 Children: []*Node[K, V]{left, right}, 410 } 411 412 left.Parent = newRoot 413 right.Parent = newRoot 414 tree.Root = newRoot 415 } 416 417 func setParent[K, V any](nodes []*Node[K, V], parent *Node[K, V]) { 418 for _, node := range nodes { 419 node.Parent = parent 420 } 421 } 422 423 func (tree *Tree[K, V]) left(node *Node[K, V]) *Node[K, V] { 424 if tree.Empty() { 425 return nil 426 } 427 current := node 428 for { 429 if tree.isLeaf(current) { 430 return current 431 } 432 current = current.Children[0] 433 } 434 } 435 436 func (tree *Tree[K, V]) right(node *Node[K, V]) *Node[K, V] { 437 if tree.Empty() { 438 return nil 439 } 440 current := node 441 for { 442 if tree.isLeaf(current) { 443 return current 444 } 445 current = current.Children[len(current.Children)-1] 446 } 447 } 448 449 // leftSibling returns the node's left sibling and child index (in parent) if it exists, otherwise (nil,-1) 450 // key is any of keys in node (could even be deleted). 451 func (tree *Tree[K, V]) leftSibling(node *Node[K, V], key K) (*Node[K, V], int) { 452 if node.Parent != nil { 453 index, _ := tree.search(node.Parent, key) 454 index-- 455 if index >= 0 && index < len(node.Parent.Children) { 456 return node.Parent.Children[index], index 457 } 458 } 459 return nil, -1 460 } 461 462 // rightSibling returns the node's right sibling and child index (in parent) if it exists, otherwise (nil,-1) 463 // key is any of keys in node (could even be deleted). 464 func (tree *Tree[K, V]) rightSibling(node *Node[K, V], key K) (*Node[K, V], int) { 465 if node.Parent != nil { 466 index, _ := tree.search(node.Parent, key) 467 index++ 468 if index < len(node.Parent.Children) { 469 return node.Parent.Children[index], index 470 } 471 } 472 return nil, -1 473 } 474 475 // delete deletes an entry in node at entries' index 476 // ref.: https://en.wikipedia.org/wiki/B-tree#Deletion 477 func (tree *Tree[K, V]) delete(node *Node[K, V], index int) { 478 // deleting from a leaf node 479 if tree.isLeaf(node) { 480 deletedKey := node.Entries[index].Key 481 tree.deleteEntry(node, index) 482 tree.rebalance(node, deletedKey) 483 if len(tree.Root.Entries) == 0 { 484 tree.Root = nil 485 } 486 return 487 } 488 489 // deleting from an internal node 490 leftLargestNode := tree.right(node.Children[index]) // largest node in the left sub-tree (assumed to exist) 491 leftLargestEntryIndex := len(leftLargestNode.Entries) - 1 492 node.Entries[index] = leftLargestNode.Entries[leftLargestEntryIndex] 493 deletedKey := leftLargestNode.Entries[leftLargestEntryIndex].Key 494 tree.deleteEntry(leftLargestNode, leftLargestEntryIndex) 495 tree.rebalance(leftLargestNode, deletedKey) 496 } 497 498 // rebalance rebalances the tree after deletion if necessary and returns true, otherwise false. 499 // Note that we first delete the entry and then call rebalance, thus the passed deleted key as reference. 500 func (tree *Tree[K, V]) rebalance(node *Node[K, V], deletedKey K) { 501 // check if rebalancing is needed 502 if node == nil || len(node.Entries) >= tree.minEntries() { 503 return 504 } 505 506 // try to borrow from left sibling 507 leftSibling, leftSiblingIndex := tree.leftSibling(node, deletedKey) 508 if leftSibling != nil && len(leftSibling.Entries) > tree.minEntries() { 509 // rotate right 510 node.Entries = append([]*Entry[K, V]{node.Parent.Entries[leftSiblingIndex]}, node.Entries...) // prepend parent's separator entry to node's entries 511 node.Parent.Entries[leftSiblingIndex] = leftSibling.Entries[len(leftSibling.Entries)-1] 512 tree.deleteEntry(leftSibling, len(leftSibling.Entries)-1) 513 if !tree.isLeaf(leftSibling) { 514 leftSiblingRightMostChild := leftSibling.Children[len(leftSibling.Children)-1] 515 leftSiblingRightMostChild.Parent = node 516 node.Children = append([]*Node[K, V]{leftSiblingRightMostChild}, node.Children...) 517 tree.deleteChild(leftSibling, len(leftSibling.Children)-1) 518 } 519 return 520 } 521 522 // try to borrow from right sibling 523 rightSibling, rightSiblingIndex := tree.rightSibling(node, deletedKey) 524 if rightSibling != nil && len(rightSibling.Entries) > tree.minEntries() { 525 // rotate left 526 node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1]) // append parent's separator entry to node's entries 527 node.Parent.Entries[rightSiblingIndex-1] = rightSibling.Entries[0] 528 tree.deleteEntry(rightSibling, 0) 529 if !tree.isLeaf(rightSibling) { 530 rightSiblingLeftMostChild := rightSibling.Children[0] 531 rightSiblingLeftMostChild.Parent = node 532 node.Children = append(node.Children, rightSiblingLeftMostChild) 533 tree.deleteChild(rightSibling, 0) 534 } 535 return 536 } 537 538 // merge with siblings 539 if rightSibling != nil { 540 // merge with right sibling 541 node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1]) 542 node.Entries = append(node.Entries, rightSibling.Entries...) 543 deletedKey = node.Parent.Entries[rightSiblingIndex-1].Key 544 tree.deleteEntry(node.Parent, rightSiblingIndex-1) 545 tree.appendChildren(node.Parent.Children[rightSiblingIndex], node) 546 tree.deleteChild(node.Parent, rightSiblingIndex) 547 } else if leftSibling != nil { 548 // merge with left sibling 549 entries := append([]*Entry[K, V](nil), leftSibling.Entries...) 550 entries = append(entries, node.Parent.Entries[leftSiblingIndex]) 551 node.Entries = append(entries, node.Entries...) 552 deletedKey = node.Parent.Entries[leftSiblingIndex].Key 553 tree.deleteEntry(node.Parent, leftSiblingIndex) 554 tree.prependChildren(node.Parent.Children[leftSiblingIndex], node) 555 tree.deleteChild(node.Parent, leftSiblingIndex) 556 } 557 558 // make the merged node the root if its parent was the root and the root is empty 559 if node.Parent == tree.Root && len(tree.Root.Entries) == 0 { 560 tree.Root = node 561 node.Parent = nil 562 return 563 } 564 565 // parent might underflow, so try to rebalance if necessary 566 tree.rebalance(node.Parent, deletedKey) 567 } 568 569 func (tree *Tree[K, V]) prependChildren(fromNode *Node[K, V], toNode *Node[K, V]) { 570 children := append([]*Node[K, V](nil), fromNode.Children...) 571 toNode.Children = append(children, toNode.Children...) 572 setParent(fromNode.Children, toNode) 573 } 574 575 func (tree *Tree[K, V]) appendChildren(fromNode *Node[K, V], toNode *Node[K, V]) { 576 toNode.Children = append(toNode.Children, fromNode.Children...) 577 setParent(fromNode.Children, toNode) 578 } 579 580 func (tree *Tree[K, V]) deleteEntry(node *Node[K, V], index int) { 581 copy(node.Entries[index:], node.Entries[index+1:]) 582 node.Entries[len(node.Entries)-1] = nil 583 node.Entries = node.Entries[:len(node.Entries)-1] 584 } 585 586 func (tree *Tree[K, V]) deleteChild(node *Node[K, V], index int) { 587 if index >= len(node.Children) { 588 return 589 } 590 copy(node.Children[index:], node.Children[index+1:]) 591 node.Children[len(node.Children)-1] = nil 592 node.Children = node.Children[:len(node.Children)-1] 593 } 594 595 // UnmarshalJSON @implements json.Unmarshaler 596 func (tree *Tree[K, V]) UnmarshalJSON(bytes []byte) error { 597 elements := make(map[string]V) 598 err := json.Unmarshal(bytes, &elements) 599 if err == nil { 600 tree.Clear() 601 for key, value := range elements { 602 var nk K 603 err = tree.Comparator.Unmarshal([]byte(key), &nk) 604 if err != nil { 605 return err 606 } 607 tree.Put(nk, value) 608 } 609 } 610 return err 611 } 612 613 // MarshalJSON @implements json.Marshaler 614 func (tree *Tree[K, V]) MarshalJSON() ([]byte, error) { 615 elements := make(map[string]V) 616 it := tree.Iterator() 617 for it.Next() { 618 k, err := tree.Comparator.Marshal(it.Key()) 619 if err != nil { 620 return nil, err 621 } 622 elements[string(k)] = it.Value() 623 } 624 return json.Marshal(&elements) 625 }