github.com/mymmsc/gox@v1.3.33/util/btree/btree.go (about)

     1  // Copyright (c) 2015, Emir Pasic. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package btree implements a B tree.
     6  //
     7  // According to Knuth's definition, a B-tree of order m is a tree which satisfies the following properties:
     8  // - Every node has at most m children.
     9  // - Every non-leaf node (except root) has at least ⌈m/2⌉ children.
    10  // - The root has at least two children if it is not a leaf node.
    11  // - A non-leaf node with k children contains k−1 keys.
    12  // - All leaves appear in the same level
    13  //
    14  // Structure is not thread safe.
    15  //
    16  // References: https://en.wikipedia.org/wiki/B-tree
    17  package btree
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"github.com/mymmsc/gox/util"
    23  	"strings"
    24  )
    25  
    26  func assertTreeImplementation() {
    27  	var _ util.Tree = (*Tree)(nil)
    28  }
    29  
    30  // Tree holds elements of the B-tree
    31  type Tree struct {
    32  	Root       *Node           // Root node
    33  	Comparator util.Comparator // Key comparator
    34  	size       int             // Total number of keys in the tree
    35  	m          int             // order (maximum number of children)
    36  }
    37  
    38  // Node is a single element within the tree
    39  type Node struct {
    40  	Parent   *Node
    41  	Entries  []*Entry // Contained keys in node
    42  	Children []*Node  // Children nodes
    43  }
    44  
    45  // Entry represents the key-value pair contained within nodes
    46  type Entry struct {
    47  	Key   interface{}
    48  	Value interface{}
    49  }
    50  
    51  // NewWith instantiates a B-tree with the order (maximum number of children) and a custom key comparator.
    52  func NewWith(order int, comparator util.Comparator) *Tree {
    53  	if order < 3 {
    54  		panic("Invalid order, should be at least 3")
    55  	}
    56  	return &Tree{m: order, Comparator: comparator}
    57  }
    58  
    59  // NewWithIntComparator instantiates a B-tree with the order (maximum number of children) and the IntComparator, i.e. keys are of type int.
    60  func NewWithIntComparator(order int) *Tree {
    61  	return NewWith(order, util.IntComparator)
    62  }
    63  
    64  // NewWithStringComparator instantiates a B-tree with the order (maximum number of children) and the StringComparator, i.e. keys are of type string.
    65  func NewWithStringComparator(order int) *Tree {
    66  	return NewWith(order, util.StringComparator)
    67  }
    68  
    69  // Put inserts key-value pair node into the tree.
    70  // If key already exists, then its value is updated with the new value.
    71  // Key should adhere to the comparator's type assertion, otherwise method panics.
    72  func (tree *Tree) Put(key interface{}, value interface{}) {
    73  	entry := &Entry{Key: key, Value: value}
    74  
    75  	if tree.Root == nil {
    76  		tree.Root = &Node{Entries: []*Entry{entry}, Children: []*Node{}}
    77  		tree.size++
    78  		return
    79  	}
    80  
    81  	if tree.insert(tree.Root, entry) {
    82  		tree.size++
    83  	}
    84  }
    85  
    86  // Get searches the node in the tree by key and returns its value or nil if key is not found in tree.
    87  // Second return parameter is true if key was found, otherwise false.
    88  // Key should adhere to the comparator's type assertion, otherwise method panics.
    89  func (tree *Tree) Get(key interface{}) (value interface{}, found bool) {
    90  	node, index, found := tree.searchRecursively(tree.Root, key)
    91  	if found {
    92  		return node.Entries[index].Value, true
    93  	}
    94  	return nil, false
    95  }
    96  
    97  // Remove remove the node from the tree by key.
    98  // Key should adhere to the comparator's type assertion, otherwise method panics.
    99  func (tree *Tree) Remove(key interface{}) {
   100  	node, index, found := tree.searchRecursively(tree.Root, key)
   101  	if found {
   102  		tree.delete(node, index)
   103  		tree.size--
   104  	}
   105  }
   106  
   107  // Empty returns true if tree does not contain any nodes
   108  func (tree *Tree) Empty() bool {
   109  	return tree.size == 0
   110  }
   111  
   112  // Size returns number of nodes in the tree.
   113  func (tree *Tree) Size() int {
   114  	return tree.size
   115  }
   116  
   117  // Keys returns all keys in-order
   118  func (tree *Tree) Keys() []interface{} {
   119  	keys := make([]interface{}, tree.size)
   120  	it := tree.Iterator()
   121  	for i := 0; it.Next(); i++ {
   122  		keys[i] = it.Key()
   123  	}
   124  	return keys
   125  }
   126  
   127  // Values returns all values in-order based on the key.
   128  func (tree *Tree) Values() []interface{} {
   129  	values := make([]interface{}, tree.size)
   130  	it := tree.Iterator()
   131  	for i := 0; it.Next(); i++ {
   132  		values[i] = it.Value()
   133  	}
   134  	return values
   135  }
   136  
   137  // Clear removes all nodes from the tree.
   138  func (tree *Tree) Clear() {
   139  	tree.Root = nil
   140  	tree.size = 0
   141  }
   142  
   143  // Height returns the height of the tree.
   144  func (tree *Tree) Height() int {
   145  	return tree.Root.height()
   146  }
   147  
   148  // Left returns the left-most (min) node or nil if tree is empty.
   149  func (tree *Tree) Left() *Node {
   150  	return tree.left(tree.Root)
   151  }
   152  
   153  // LeftKey returns the left-most (min) key or nil if tree is empty.
   154  func (tree *Tree) LeftKey() interface{} {
   155  	if left := tree.Left(); left != nil {
   156  		return left.Entries[0].Key
   157  	}
   158  	return nil
   159  }
   160  
   161  // LeftValue returns the left-most value or nil if tree is empty.
   162  func (tree *Tree) LeftValue() interface{} {
   163  	if left := tree.Left(); left != nil {
   164  		return left.Entries[0].Value
   165  	}
   166  	return nil
   167  }
   168  
   169  // Right returns the right-most (max) node or nil if tree is empty.
   170  func (tree *Tree) Right() *Node {
   171  	return tree.right(tree.Root)
   172  }
   173  
   174  // RightKey returns the right-most (max) key or nil if tree is empty.
   175  func (tree *Tree) RightKey() interface{} {
   176  	if right := tree.Right(); right != nil {
   177  		return right.Entries[len(right.Entries)-1].Key
   178  	}
   179  	return nil
   180  }
   181  
   182  // RightValue returns the right-most value or nil if tree is empty.
   183  func (tree *Tree) RightValue() interface{} {
   184  	if right := tree.Right(); right != nil {
   185  		return right.Entries[len(right.Entries)-1].Value
   186  	}
   187  	return nil
   188  }
   189  
   190  // String returns a string representation of container (for debugging purposes)
   191  func (tree *Tree) String() string {
   192  	var buffer bytes.Buffer
   193  	if _, err := buffer.WriteString("BTree\n"); err != nil {
   194  	}
   195  	if !tree.Empty() {
   196  		tree.output(&buffer, tree.Root, 0, true)
   197  	}
   198  	return buffer.String()
   199  }
   200  
   201  func (entry *Entry) String() string {
   202  	return fmt.Sprintf("%v", entry.Key)
   203  }
   204  
   205  func (tree *Tree) output(buffer *bytes.Buffer, node *Node, level int, isTail bool) {
   206  	for e := 0; e < len(node.Entries)+1; e++ {
   207  		if e < len(node.Children) {
   208  			tree.output(buffer, node.Children[e], level+1, true)
   209  		}
   210  		if e < len(node.Entries) {
   211  			if _, err := buffer.WriteString(strings.Repeat("    ", level)); err != nil {
   212  			}
   213  			if _, err := buffer.WriteString(fmt.Sprintf("%v", node.Entries[e].Key) + "\n"); err != nil {
   214  			}
   215  		}
   216  	}
   217  }
   218  
   219  func (node *Node) height() int {
   220  	height := 0
   221  	for ; node != nil; node = node.Children[0] {
   222  		height++
   223  		if len(node.Children) == 0 {
   224  			break
   225  		}
   226  	}
   227  	return height
   228  }
   229  
   230  func (tree *Tree) isLeaf(node *Node) bool {
   231  	return len(node.Children) == 0
   232  }
   233  
   234  func (tree *Tree) isFull(node *Node) bool {
   235  	return len(node.Entries) == tree.maxEntries()
   236  }
   237  
   238  func (tree *Tree) shouldSplit(node *Node) bool {
   239  	return len(node.Entries) > tree.maxEntries()
   240  }
   241  
   242  func (tree *Tree) maxChildren() int {
   243  	return tree.m
   244  }
   245  
   246  func (tree *Tree) minChildren() int {
   247  	return (tree.m + 1) / 2 // ceil(m/2)
   248  }
   249  
   250  func (tree *Tree) maxEntries() int {
   251  	return tree.maxChildren() - 1
   252  }
   253  
   254  func (tree *Tree) minEntries() int {
   255  	return tree.minChildren() - 1
   256  }
   257  
   258  func (tree *Tree) middle() int {
   259  	return (tree.m - 1) / 2 // "-1" to favor right nodes to have more keys when splitting
   260  }
   261  
   262  // search searches only within the single node among its entries
   263  func (tree *Tree) search(node *Node, key interface{}) (index int, found bool) {
   264  	low, high := 0, len(node.Entries)-1
   265  	var mid int
   266  	for low <= high {
   267  		mid = (high + low) / 2
   268  		compare := tree.Comparator(key, node.Entries[mid].Key)
   269  		switch {
   270  		case compare > 0:
   271  			low = mid + 1
   272  		case compare < 0:
   273  			high = mid - 1
   274  		case compare == 0:
   275  			return mid, true
   276  		}
   277  	}
   278  	return low, false
   279  }
   280  
   281  // searchRecursively searches recursively down the tree starting at the startNode
   282  func (tree *Tree) searchRecursively(startNode *Node, key interface{}) (node *Node, index int, found bool) {
   283  	if tree.Empty() {
   284  		return nil, -1, false
   285  	}
   286  	node = startNode
   287  	for {
   288  		index, found = tree.search(node, key)
   289  		if found {
   290  			return node, index, true
   291  		}
   292  		if tree.isLeaf(node) {
   293  			return nil, -1, false
   294  		}
   295  		node = node.Children[index]
   296  	}
   297  }
   298  
   299  func (tree *Tree) insert(node *Node, entry *Entry) (inserted bool) {
   300  	if tree.isLeaf(node) {
   301  		return tree.insertIntoLeaf(node, entry)
   302  	}
   303  	return tree.insertIntoInternal(node, entry)
   304  }
   305  
   306  func (tree *Tree) insertIntoLeaf(node *Node, entry *Entry) (inserted bool) {
   307  	insertPosition, found := tree.search(node, entry.Key)
   308  	if found {
   309  		node.Entries[insertPosition] = entry
   310  		return false
   311  	}
   312  	// Insert entry's key in the middle of the node
   313  	node.Entries = append(node.Entries, nil)
   314  	copy(node.Entries[insertPosition+1:], node.Entries[insertPosition:])
   315  	node.Entries[insertPosition] = entry
   316  	tree.split(node)
   317  	return true
   318  }
   319  
   320  func (tree *Tree) insertIntoInternal(node *Node, entry *Entry) (inserted bool) {
   321  	insertPosition, found := tree.search(node, entry.Key)
   322  	if found {
   323  		node.Entries[insertPosition] = entry
   324  		return false
   325  	}
   326  	return tree.insert(node.Children[insertPosition], entry)
   327  }
   328  
   329  func (tree *Tree) split(node *Node) {
   330  	if !tree.shouldSplit(node) {
   331  		return
   332  	}
   333  
   334  	if node == tree.Root {
   335  		tree.splitRoot()
   336  		return
   337  	}
   338  
   339  	tree.splitNonRoot(node)
   340  }
   341  
   342  func (tree *Tree) splitNonRoot(node *Node) {
   343  	middle := tree.middle()
   344  	parent := node.Parent
   345  
   346  	left := &Node{Entries: append([]*Entry(nil), node.Entries[:middle]...), Parent: parent}
   347  	right := &Node{Entries: append([]*Entry(nil), node.Entries[middle+1:]...), Parent: parent}
   348  
   349  	// Move children from the node to be split into left and right nodes
   350  	if !tree.isLeaf(node) {
   351  		left.Children = append([]*Node(nil), node.Children[:middle+1]...)
   352  		right.Children = append([]*Node(nil), node.Children[middle+1:]...)
   353  		setParent(left.Children, left)
   354  		setParent(right.Children, right)
   355  	}
   356  
   357  	insertPosition, _ := tree.search(parent, node.Entries[middle].Key)
   358  
   359  	// Insert middle key into parent
   360  	parent.Entries = append(parent.Entries, nil)
   361  	copy(parent.Entries[insertPosition+1:], parent.Entries[insertPosition:])
   362  	parent.Entries[insertPosition] = node.Entries[middle]
   363  
   364  	// Set child left of inserted key in parent to the created left node
   365  	parent.Children[insertPosition] = left
   366  
   367  	// Set child right of inserted key in parent to the created right node
   368  	parent.Children = append(parent.Children, nil)
   369  	copy(parent.Children[insertPosition+2:], parent.Children[insertPosition+1:])
   370  	parent.Children[insertPosition+1] = right
   371  
   372  	tree.split(parent)
   373  }
   374  
   375  func (tree *Tree) splitRoot() {
   376  	middle := tree.middle()
   377  
   378  	left := &Node{Entries: append([]*Entry(nil), tree.Root.Entries[:middle]...)}
   379  	right := &Node{Entries: append([]*Entry(nil), tree.Root.Entries[middle+1:]...)}
   380  
   381  	// Move children from the node to be split into left and right nodes
   382  	if !tree.isLeaf(tree.Root) {
   383  		left.Children = append([]*Node(nil), tree.Root.Children[:middle+1]...)
   384  		right.Children = append([]*Node(nil), tree.Root.Children[middle+1:]...)
   385  		setParent(left.Children, left)
   386  		setParent(right.Children, right)
   387  	}
   388  
   389  	// Root is a node with one entry and two children (left and right)
   390  	newRoot := &Node{
   391  		Entries:  []*Entry{tree.Root.Entries[middle]},
   392  		Children: []*Node{left, right},
   393  	}
   394  
   395  	left.Parent = newRoot
   396  	right.Parent = newRoot
   397  	tree.Root = newRoot
   398  }
   399  
   400  func setParent(nodes []*Node, parent *Node) {
   401  	for _, node := range nodes {
   402  		node.Parent = parent
   403  	}
   404  }
   405  
   406  func (tree *Tree) left(node *Node) *Node {
   407  	if tree.Empty() {
   408  		return nil
   409  	}
   410  	current := node
   411  	for {
   412  		if tree.isLeaf(current) {
   413  			return current
   414  		}
   415  		current = current.Children[0]
   416  	}
   417  }
   418  
   419  func (tree *Tree) right(node *Node) *Node {
   420  	if tree.Empty() {
   421  		return nil
   422  	}
   423  	current := node
   424  	for {
   425  		if tree.isLeaf(current) {
   426  			return current
   427  		}
   428  		current = current.Children[len(current.Children)-1]
   429  	}
   430  }
   431  
   432  // leftSibling returns the node's left sibling and child index (in parent) if it exists, otherwise (nil,-1)
   433  // key is any of keys in node (could even be deleted).
   434  func (tree *Tree) leftSibling(node *Node, key interface{}) (*Node, int) {
   435  	if node.Parent != nil {
   436  		index, _ := tree.search(node.Parent, key)
   437  		index--
   438  		if index >= 0 && index < len(node.Parent.Children) {
   439  			return node.Parent.Children[index], index
   440  		}
   441  	}
   442  	return nil, -1
   443  }
   444  
   445  // rightSibling returns the node's right sibling and child index (in parent) if it exists, otherwise (nil,-1)
   446  // key is any of keys in node (could even be deleted).
   447  func (tree *Tree) rightSibling(node *Node, key interface{}) (*Node, int) {
   448  	if node.Parent != nil {
   449  		index, _ := tree.search(node.Parent, key)
   450  		index++
   451  		if index < len(node.Parent.Children) {
   452  			return node.Parent.Children[index], index
   453  		}
   454  	}
   455  	return nil, -1
   456  }
   457  
   458  // delete deletes an entry in node at entries' index
   459  // ref.: https://en.wikipedia.org/wiki/B-tree#Deletion
   460  func (tree *Tree) delete(node *Node, index int) {
   461  	// deleting from a leaf node
   462  	if tree.isLeaf(node) {
   463  		deletedKey := node.Entries[index].Key
   464  		tree.deleteEntry(node, index)
   465  		tree.rebalance(node, deletedKey)
   466  		if len(tree.Root.Entries) == 0 {
   467  			tree.Root = nil
   468  		}
   469  		return
   470  	}
   471  
   472  	// deleting from an internal node
   473  	leftLargestNode := tree.right(node.Children[index]) // largest node in the left sub-tree (assumed to exist)
   474  	leftLargestEntryIndex := len(leftLargestNode.Entries) - 1
   475  	node.Entries[index] = leftLargestNode.Entries[leftLargestEntryIndex]
   476  	deletedKey := leftLargestNode.Entries[leftLargestEntryIndex].Key
   477  	tree.deleteEntry(leftLargestNode, leftLargestEntryIndex)
   478  	tree.rebalance(leftLargestNode, deletedKey)
   479  }
   480  
   481  // rebalance rebalances the tree after deletion if necessary and returns true, otherwise false.
   482  // Note that we first delete the entry and then call rebalance, thus the passed deleted key as reference.
   483  func (tree *Tree) rebalance(node *Node, deletedKey interface{}) {
   484  	// check if rebalancing is needed
   485  	if node == nil || len(node.Entries) >= tree.minEntries() {
   486  		return
   487  	}
   488  
   489  	// try to borrow from left sibling
   490  	leftSibling, leftSiblingIndex := tree.leftSibling(node, deletedKey)
   491  	if leftSibling != nil && len(leftSibling.Entries) > tree.minEntries() {
   492  		// rotate right
   493  		node.Entries = append([]*Entry{node.Parent.Entries[leftSiblingIndex]}, node.Entries...) // prepend parent's separator entry to node's entries
   494  		node.Parent.Entries[leftSiblingIndex] = leftSibling.Entries[len(leftSibling.Entries)-1]
   495  		tree.deleteEntry(leftSibling, len(leftSibling.Entries)-1)
   496  		if !tree.isLeaf(leftSibling) {
   497  			leftSiblingRightMostChild := leftSibling.Children[len(leftSibling.Children)-1]
   498  			leftSiblingRightMostChild.Parent = node
   499  			node.Children = append([]*Node{leftSiblingRightMostChild}, node.Children...)
   500  			tree.deleteChild(leftSibling, len(leftSibling.Children)-1)
   501  		}
   502  		return
   503  	}
   504  
   505  	// try to borrow from right sibling
   506  	rightSibling, rightSiblingIndex := tree.rightSibling(node, deletedKey)
   507  	if rightSibling != nil && len(rightSibling.Entries) > tree.minEntries() {
   508  		// rotate left
   509  		node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1]) // append parent's separator entry to node's entries
   510  		node.Parent.Entries[rightSiblingIndex-1] = rightSibling.Entries[0]
   511  		tree.deleteEntry(rightSibling, 0)
   512  		if !tree.isLeaf(rightSibling) {
   513  			rightSiblingLeftMostChild := rightSibling.Children[0]
   514  			rightSiblingLeftMostChild.Parent = node
   515  			node.Children = append(node.Children, rightSiblingLeftMostChild)
   516  			tree.deleteChild(rightSibling, 0)
   517  		}
   518  		return
   519  	}
   520  
   521  	// merge with siblings
   522  	if rightSibling != nil {
   523  		// merge with right sibling
   524  		node.Entries = append(node.Entries, node.Parent.Entries[rightSiblingIndex-1])
   525  		node.Entries = append(node.Entries, rightSibling.Entries...)
   526  		deletedKey = node.Parent.Entries[rightSiblingIndex-1].Key
   527  		tree.deleteEntry(node.Parent, rightSiblingIndex-1)
   528  		tree.appendChildren(node.Parent.Children[rightSiblingIndex], node)
   529  		tree.deleteChild(node.Parent, rightSiblingIndex)
   530  	} else if leftSibling != nil {
   531  		// merge with left sibling
   532  		entries := append([]*Entry(nil), leftSibling.Entries...)
   533  		entries = append(entries, node.Parent.Entries[leftSiblingIndex])
   534  		node.Entries = append(entries, node.Entries...)
   535  		deletedKey = node.Parent.Entries[leftSiblingIndex].Key
   536  		tree.deleteEntry(node.Parent, leftSiblingIndex)
   537  		tree.prependChildren(node.Parent.Children[leftSiblingIndex], node)
   538  		tree.deleteChild(node.Parent, leftSiblingIndex)
   539  	}
   540  
   541  	// make the merged node the root if its parent was the root and the root is empty
   542  	if node.Parent == tree.Root && len(tree.Root.Entries) == 0 {
   543  		tree.Root = node
   544  		node.Parent = nil
   545  		return
   546  	}
   547  
   548  	// parent might underflow, so try to rebalance if necessary
   549  	tree.rebalance(node.Parent, deletedKey)
   550  }
   551  
   552  func (tree *Tree) prependChildren(fromNode *Node, toNode *Node) {
   553  	children := append([]*Node(nil), fromNode.Children...)
   554  	toNode.Children = append(children, toNode.Children...)
   555  	setParent(fromNode.Children, toNode)
   556  }
   557  
   558  func (tree *Tree) appendChildren(fromNode *Node, toNode *Node) {
   559  	toNode.Children = append(toNode.Children, fromNode.Children...)
   560  	setParent(fromNode.Children, toNode)
   561  }
   562  
   563  func (tree *Tree) deleteEntry(node *Node, index int) {
   564  	copy(node.Entries[index:], node.Entries[index+1:])
   565  	node.Entries[len(node.Entries)-1] = nil
   566  	node.Entries = node.Entries[:len(node.Entries)-1]
   567  }
   568  
   569  func (tree *Tree) deleteChild(node *Node, index int) {
   570  	if index >= len(node.Children) {
   571  		return
   572  	}
   573  	copy(node.Children[index:], node.Children[index+1:])
   574  	node.Children[len(node.Children)-1] = nil
   575  	node.Children = node.Children[:len(node.Children)-1]
   576  }