github.com/scottcagno/storage@v1.8.0/pkg/lsmtree/rbtree.go (about)

     1  package lsmtree
     2  
     3  import (
     4  	"bytes"
     5  	"runtime"
     6  	"strings"
     7  	"sync"
     8  )
     9  
    10  const (
    11  	colorRED uint8 = iota
    12  	colorBLK
    13  )
    14  
    15  var lock sync.RWMutex
    16  
    17  // rbNode is a node of a rbtree
    18  type rbNode struct {
    19  	left   *rbNode // left is a left child node
    20  	right  *rbNode // right is a right child node
    21  	parent *rbNode // parent is a parent node
    22  	color  uint8   // color is the color if this node
    23  	entry  *Entry  // entry is the data this node holds
    24  }
    25  
    26  // rbTree is a struct representing a rbTree
    27  type rbTree struct {
    28  	nilNode *rbNode // NIL is a "leaf"; end of the line
    29  	root    *rbNode // root is the head of the tree
    30  	count   int     // count is the number of items in the tree
    31  	size    int64   // size is the estimated size (in bytes) the tree is holding
    32  }
    33  
    34  // NewTree creates and returns a new rbTree
    35  func newRBTree() *rbTree {
    36  	n := &rbNode{
    37  		left:   nil,
    38  		right:  nil,
    39  		parent: nil,
    40  		color:  colorBLK,
    41  		entry:  nil,
    42  	}
    43  	return &rbTree{
    44  		nilNode: n,
    45  		root:    n,
    46  		count:   0,
    47  		size:    0,
    48  	}
    49  }
    50  
    51  // compare is the main comparator for the tree
    52  func compare(this, that *Entry) int {
    53  	return bytes.Compare(this.Key, that.Key)
    54  }
    55  
    56  // upsertAndCheckSize updates the provided entry if it already
    57  // exists or inserts the supplied entry as a new entry if it
    58  // does not exist. It returns the current size in bytes after
    59  // performing the insert or update. It also returns a boolean
    60  // reporting true if the tree has met or exceeded the provided
    61  // threshold, and false if the current size is less than the
    62  // provided threshold.
    63  func (t *rbTree) upsertAndCheckSize(entry *Entry, threshold int64) (int64, bool) {
    64  	// insert the entry in to the mem-table
    65  	t.putInternal(entry)
    66  	if t.size >= threshold {
    67  		// size is greater or equal to supplied threshold
    68  		// return size along with a true value (need flush)
    69  		return t.size, true
    70  	}
    71  	// size has not met or exceeded supplied threshold
    72  	// simply return the current size, and a false value
    73  	return t.size, false
    74  }
    75  
    76  // getNearMin performs an approximate search for the key of the
    77  // entry provided and returns the closest entry that contains a
    78  // key that is less than (the predecessor) the searched entry key
    79  // as well as a boolean reporting true if an exact match was found,
    80  // and false if it is unknown or an exact match was not found.
    81  func (t *rbTree) getNearMin(entry *Entry) (*Entry, bool) {
    82  	if entry == nil {
    83  		return nil, false
    84  	}
    85  	ret := t.searchApprox(&rbNode{
    86  		left:   t.nilNode,
    87  		right:  t.nilNode,
    88  		parent: t.nilNode,
    89  		color:  colorRED,
    90  		entry:  entry,
    91  	})
    92  	prev := t.predecessor(ret).entry
    93  	if prev == nil {
    94  		prev, _ = t.firstEntry()
    95  	}
    96  	return prev, compare(ret.entry, entry) == 0
    97  }
    98  
    99  // getNearMax performs an approximate search for the key of the
   100  // entry provided and returns the closest entry that contains a
   101  // key that is greater than (the successor) the searched entry key
   102  // as well as a boolean reporting true if an exact match was found,
   103  // and false if it is unknown or an exact match was not found.
   104  func (t *rbTree) getNearMax(entry *Entry) (*Entry, bool) {
   105  	if entry == nil {
   106  		return nil, false
   107  	}
   108  	ret := t.searchApprox(&rbNode{
   109  		left:   t.nilNode,
   110  		right:  t.nilNode,
   111  		parent: t.nilNode,
   112  		color:  colorRED,
   113  		entry:  entry,
   114  	})
   115  	next := t.successor(ret).entry
   116  	if next == nil {
   117  		next, _ = t.lastEntry()
   118  	}
   119  	return next, compare(ret.entry, entry) == 0
   120  }
   121  
   122  // hasEntry tests and returns a boolean value if the
   123  // provided key exists in the tree
   124  func (t *rbTree) hasEntry(entry *Entry) bool {
   125  	_, ok := t.getInternal(entry)
   126  	return ok
   127  }
   128  
   129  // addEntry adds the provided key and value only if it does not
   130  // already exist in the tree. It returns false if the key and
   131  // value was not able to be added, and true if it was added
   132  // successfully
   133  func (t *rbTree) addEntry(entry *Entry) bool {
   134  	_, ok := t.getInternal(entry)
   135  	if ok {
   136  		// key already exists, so we are not adding
   137  		return false
   138  	}
   139  	t.putInternal(entry)
   140  	return true
   141  }
   142  
   143  // putEntry acts as a regular upsert. It returns true if
   144  // the entry was updated and false if it was added.
   145  func (t *rbTree) putEntry(entry *Entry) (*Entry, bool) {
   146  	return t.putInternal(entry)
   147  }
   148  
   149  // getEntry attempts to locate the entry with the matching
   150  // the provided entry's key. It returns false if a matching
   151  // entry could not be found.
   152  func (t *rbTree) getEntry(entry *Entry) (*Entry, bool) {
   153  	return t.getInternal(entry)
   154  }
   155  
   156  // delEntry attempts to locate the entry matching the
   157  // provided entry's key and remove it from the tree.
   158  // It returns true if the correct entry was found and
   159  // removed and false if it could not be found or removed.
   160  func (t *rbTree) delEntry(entry *Entry) (*Entry, bool) {
   161  	return t.delInternal(entry)
   162  }
   163  
   164  // putInternal inserts and return the node along with a
   165  // boolean value signaling true if the node was updated,
   166  // and false if the node was a new addition.
   167  func (t *rbTree) putInternal(entry *Entry) (*Entry, bool) {
   168  	if entry == nil {
   169  		return nil, false
   170  	}
   171  	// insert return the node along with
   172  	// a boolean value signaling true if
   173  	// the node was updated, and false if
   174  	// the node was newly added.
   175  	ret, ok := t.insert(&rbNode{
   176  		left:   t.nilNode,
   177  		right:  t.nilNode,
   178  		parent: t.nilNode,
   179  		color:  colorRED,
   180  		entry:  entry,
   181  	})
   182  	return ret.entry, ok
   183  }
   184  
   185  // getInternal is the internal search wrapper. It
   186  // attempts to locate the entry with a matching key and
   187  // return it. If it succeeds it will return true, if it
   188  // cannot find a matching entry it will return false.
   189  func (t *rbTree) getInternal(entry *Entry) (*Entry, bool) {
   190  	if entry == nil {
   191  		return nil, false
   192  	}
   193  	ret := t.search(&rbNode{
   194  		left:   t.nilNode,
   195  		right:  t.nilNode,
   196  		parent: t.nilNode,
   197  		color:  colorRED,
   198  		entry:  entry,
   199  	})
   200  	return ret.entry, ret.entry != nil
   201  }
   202  
   203  // delInternal is the internal delete wrapper. It attempts
   204  // to locate the entry with the matching key and remove it
   205  // from the tree. It returns true if the correct entry was
   206  // found and removed; false if a  matching entry could not
   207  // be found or removed.
   208  func (t *rbTree) delInternal(entry *Entry) (*Entry, bool) {
   209  	if entry == nil {
   210  		return nil, false
   211  	}
   212  	cnt := t.count
   213  	ret := t.delete(&rbNode{
   214  		left:   t.nilNode,
   215  		right:  t.nilNode,
   216  		parent: t.nilNode,
   217  		color:  colorRED,
   218  		entry:  entry,
   219  	})
   220  	return ret.entry, cnt == t.count+1
   221  }
   222  
   223  // firstEntry returns the first (or min) entry
   224  func (t *rbTree) firstEntry() (*Entry, bool) {
   225  	x := t.min(t.root)
   226  	if x == t.nilNode {
   227  		return nil, false
   228  	}
   229  	return x.entry, true
   230  }
   231  
   232  // lastEntry returns the last (or max) entry
   233  func (t *rbTree) lastEntry() (*Entry, bool) {
   234  	x := t.max(t.root)
   235  	if x == t.nilNode {
   236  		return nil, false
   237  	}
   238  	return x.entry, true
   239  }
   240  
   241  // rangeFront calls f sequentially in a "forward" direction
   242  // for each entry present in the tree (going from min, to
   243  // max.) If f returns false, the iteration stops.
   244  func (t *rbTree) rangeFront(f func(entry *Entry) bool) {
   245  	t.ascend(t.root, t.min(t.root).entry, f)
   246  }
   247  
   248  // rangeFront calls f sequentially in a "reverse" direction
   249  // for each entry present in the tree (going from max, to
   250  // min.) If f returns false, the iteration stops.
   251  func (t *rbTree) rangeBack(f func(entry *Entry) bool) {
   252  	t.descend(t.root, t.max(t.root).entry, f)
   253  }
   254  
   255  // sizeOfEntries returns the total size in bytes that the
   256  // entries are occupying
   257  func (t *rbTree) sizeOfEntries() int64 {
   258  	return t.size
   259  }
   260  
   261  // countOfEntries returns the total number of entries in
   262  // the tree currently
   263  func (t *rbTree) countOfEntries() int {
   264  	return t.count
   265  }
   266  
   267  // close is an internal close method
   268  // that frees up the tree.
   269  func (t *rbTree) close() {
   270  	t.nilNode = nil
   271  	t.root = nil
   272  	t.count = 0
   273  	return
   274  }
   275  
   276  // reset is an internal reset that wipes
   277  // the tree data and then "resets" it back
   278  // to a newly created state
   279  func (t *rbTree) reset() {
   280  	t.nilNode = nil
   281  	t.root = nil
   282  	t.count = 0
   283  	runtime.GC()
   284  	n := &rbNode{
   285  		left:   nil,
   286  		right:  nil,
   287  		parent: nil,
   288  		color:  colorBLK,
   289  		entry:  nil,
   290  	}
   291  	t.nilNode = n
   292  	t.root = n
   293  	t.count = 0
   294  	t.size = 0
   295  }
   296  
   297  // searchApprox will not return nil values and
   298  // instead will return approximate node matches
   299  // if it cannot find an exact match
   300  func (t *rbTree) searchApprox(x *rbNode) *rbNode {
   301  	p := t.root
   302  	for p != t.nilNode {
   303  		if compare(p.entry, x.entry) == -1 {
   304  			if p.right == t.nilNode {
   305  				break
   306  			}
   307  			p = p.right
   308  		} else if compare(x.entry, p.entry) == -1 {
   309  			if p.left == t.nilNode {
   310  				break
   311  			}
   312  			p = p.left
   313  		} else {
   314  			break
   315  		}
   316  	}
   317  	return p
   318  }
   319  
   320  // insert is the inner-most insert call for the tree.
   321  // It inserts the provided node, updating the entry
   322  // if it already exists, or adding a new one if it
   323  // is not currently in the tree. It returns true if
   324  // an existing entry was found and updated, and false
   325  // if an entry was simply added.
   326  func (t *rbTree) insert(z *rbNode) (*rbNode, bool) {
   327  	x := t.root
   328  	y := t.nilNode
   329  	for x != t.nilNode {
   330  		y = x
   331  		if compare(z.entry, x.entry) == -1 {
   332  			x = x.left
   333  		} else if compare(x.entry, z.entry) == -1 {
   334  			x = x.right
   335  		} else {
   336  			t.size -= int64(x.entry.Size())
   337  			t.size += int64(z.entry.Size())
   338  			// originally we were just returning x
   339  			// without updating the RBEntry, but if we
   340  			// want it to have similar behavior to
   341  			// a hashmap then we need to update any
   342  			// entries that already exist in the tree
   343  			x.entry = z.entry
   344  			return x, true // true means an existing
   345  			// value was found and updated. It should
   346  			// be noted that we don't need to re-balance
   347  			// the tree because they keys are not changing
   348  			// and the tree is balance is maintained by
   349  			// the keys and not their values.
   350  		}
   351  	}
   352  	z.parent = y
   353  	if y == t.nilNode {
   354  		t.root = z
   355  	} else if compare(z.entry, y.entry) == -1 {
   356  		y.left = z
   357  	} else {
   358  		y.right = z
   359  	}
   360  	t.count++
   361  	t.size += int64(z.entry.Size())
   362  	t.insertFixup(z)
   363  	return z, false
   364  }
   365  
   366  // leftRotate is the inner leftRotate method (standard)
   367  func (t *rbTree) leftRotate(x *rbNode) {
   368  	if x.right == t.nilNode {
   369  		return
   370  	}
   371  	y := x.right
   372  	x.right = y.left
   373  	if y.left != t.nilNode {
   374  		y.left.parent = x
   375  	}
   376  	y.parent = x.parent
   377  	if x.parent == t.nilNode {
   378  		t.root = y
   379  	} else if x == x.parent.left {
   380  		x.parent.left = y
   381  	} else {
   382  		x.parent.right = y
   383  	}
   384  	y.left = x
   385  	x.parent = y
   386  }
   387  
   388  // leftRotate is the inner leftRotate method (standard)
   389  func (t *rbTree) rightRotate(x *rbNode) {
   390  	if x.left == t.nilNode {
   391  		return
   392  	}
   393  	y := x.left
   394  	x.left = y.right
   395  	if y.right != t.nilNode {
   396  		y.right.parent = x
   397  	}
   398  	y.parent = x.parent
   399  
   400  	if x.parent == t.nilNode {
   401  		t.root = y
   402  	} else if x == x.parent.left {
   403  		x.parent.left = y
   404  	} else {
   405  		x.parent.right = y
   406  	}
   407  
   408  	y.right = x
   409  	x.parent = y
   410  }
   411  
   412  // insertFixup is the internal fixup after insert (standard)
   413  func (t *rbTree) insertFixup(z *rbNode) {
   414  	for z.parent.color == colorRED {
   415  		if z.parent == z.parent.parent.left {
   416  			y := z.parent.parent.right
   417  			if y.color == colorRED {
   418  				z.parent.color = colorBLK
   419  				y.color = colorBLK
   420  				z.parent.parent.color = colorRED
   421  				z = z.parent.parent
   422  			} else {
   423  				if z == z.parent.right {
   424  					z = z.parent
   425  					t.leftRotate(z)
   426  				}
   427  				z.parent.color = colorBLK
   428  				z.parent.parent.color = colorRED
   429  				t.rightRotate(z.parent.parent)
   430  			}
   431  		} else {
   432  			y := z.parent.parent.left
   433  			if y.color == colorRED {
   434  				z.parent.color = colorBLK
   435  				y.color = colorBLK
   436  				z.parent.parent.color = colorRED
   437  				z = z.parent.parent
   438  			} else {
   439  				if z == z.parent.left {
   440  					z = z.parent
   441  					t.rightRotate(z)
   442  				}
   443  				z.parent.color = colorBLK
   444  				z.parent.parent.color = colorRED
   445  				t.leftRotate(z.parent.parent)
   446  			}
   447  		}
   448  	}
   449  	t.root.color = colorBLK
   450  }
   451  
   452  // search is the internal search method (standard)
   453  func (t *rbTree) search(x *rbNode) *rbNode {
   454  	p := t.root
   455  	for p != t.nilNode {
   456  		if compare(p.entry, x.entry) == -1 {
   457  			p = p.right
   458  		} else if compare(x.entry, p.entry) == -1 {
   459  			p = p.left
   460  		} else {
   461  			break
   462  		}
   463  	}
   464  	return p
   465  }
   466  
   467  // min traverses from root to left recursively until left is NIL
   468  func (t *rbTree) min(x *rbNode) *rbNode {
   469  	if x == t.nilNode {
   470  		return t.nilNode
   471  	}
   472  	for x.left != t.nilNode {
   473  		x = x.left
   474  	}
   475  	return x
   476  }
   477  
   478  // max traverses from root to right recursively until right is NIL
   479  func (t *rbTree) max(x *rbNode) *rbNode {
   480  	if x == t.nilNode {
   481  		return t.nilNode
   482  	}
   483  	for x.right != t.nilNode {
   484  		x = x.right
   485  	}
   486  	return x
   487  }
   488  
   489  // predecessor returns the first node that is less than the one provided.
   490  func (t *rbTree) predecessor(x *rbNode) *rbNode {
   491  	if x == t.nilNode {
   492  		return t.nilNode
   493  	}
   494  	if x.left != t.nilNode {
   495  		return t.max(x.left)
   496  	}
   497  	y := x.parent
   498  	for y != t.nilNode && x == y.left {
   499  		x = y
   500  		y = y.parent
   501  	}
   502  	return y
   503  }
   504  
   505  // successor returns the first node the is greater than the one provided.
   506  func (t *rbTree) successor(x *rbNode) *rbNode {
   507  	if x == t.nilNode {
   508  		return t.nilNode
   509  	}
   510  	if x.right != t.nilNode {
   511  		return t.min(x.right)
   512  	}
   513  	y := x.parent
   514  	for y != t.nilNode && x == y.right {
   515  		x = y
   516  		y = y.parent
   517  	}
   518  	return y
   519  }
   520  
   521  // delete is the internal delete method (standard)
   522  func (t *rbTree) delete(key *rbNode) *rbNode {
   523  	z := t.search(key)
   524  	if z == t.nilNode {
   525  		return t.nilNode
   526  	}
   527  	ret := &rbNode{t.nilNode, t.nilNode, t.nilNode, z.color, z.entry}
   528  	var y *rbNode
   529  	var x *rbNode
   530  	if z.left == t.nilNode || z.right == t.nilNode {
   531  		y = z
   532  	} else {
   533  		y = t.successor(z)
   534  	}
   535  	if y.left != t.nilNode {
   536  		x = y.left
   537  	} else {
   538  		x = y.right
   539  	}
   540  	x.parent = y.parent
   541  
   542  	if y.parent == t.nilNode {
   543  		t.root = x
   544  	} else if y == y.parent.left {
   545  		y.parent.left = x
   546  	} else {
   547  		y.parent.right = x
   548  	}
   549  	if y != z {
   550  		z.entry = y.entry
   551  	}
   552  	if y.color == colorBLK {
   553  		t.deleteFixup(x)
   554  	}
   555  	t.size -= int64(ret.entry.Size())
   556  	t.count--
   557  	return ret
   558  }
   559  
   560  // deleteFixup is the internal fixup after delete (standard)
   561  func (t *rbTree) deleteFixup(x *rbNode) {
   562  	for x != t.root && x.color == colorBLK {
   563  		if x == x.parent.left {
   564  			w := x.parent.right
   565  			if w.color == colorRED {
   566  				w.color = colorBLK
   567  				x.parent.color = colorRED
   568  				t.leftRotate(x.parent)
   569  				w = x.parent.right
   570  			}
   571  			if w.left.color == colorBLK && w.right.color == colorBLK {
   572  				w.color = colorRED
   573  				x = x.parent
   574  			} else {
   575  				if w.right.color == colorBLK {
   576  					w.left.color = colorBLK
   577  					w.color = colorRED
   578  					t.rightRotate(w)
   579  					w = x.parent.right
   580  				}
   581  				w.color = x.parent.color
   582  				x.parent.color = colorBLK
   583  				w.right.color = colorBLK
   584  				t.leftRotate(x.parent)
   585  				// this is to exit while loop
   586  				x = t.root
   587  			}
   588  		} else {
   589  			w := x.parent.left
   590  			if w.color == colorRED {
   591  				w.color = colorBLK
   592  				x.parent.color = colorRED
   593  				t.rightRotate(x.parent)
   594  				w = x.parent.left
   595  			}
   596  			if w.left.color == colorBLK && w.right.color == colorBLK {
   597  				w.color = colorRED
   598  				x = x.parent
   599  			} else {
   600  				if w.left.color == colorBLK {
   601  					w.right.color = colorBLK
   602  					w.color = colorRED
   603  					t.leftRotate(w)
   604  					w = x.parent.left
   605  				}
   606  				w.color = x.parent.color
   607  				x.parent.color = colorBLK
   608  				w.left.color = colorBLK
   609  				t.rightRotate(x.parent)
   610  				x = t.root
   611  			}
   612  		}
   613  	}
   614  	x.color = colorBLK
   615  }
   616  
   617  // ascend traverses the tree in ascending entry order
   618  func (t *rbTree) ascend(x *rbNode, entry *Entry, f func(e *Entry) bool) bool {
   619  	if x == t.nilNode {
   620  		return true
   621  	}
   622  	if !(compare(x.entry, entry) == -1) {
   623  		if !t.ascend(x.left, entry, f) {
   624  			return false
   625  		}
   626  		if !f(x.entry) {
   627  			return false
   628  		}
   629  	}
   630  	return t.ascend(x.right, entry, f)
   631  }
   632  
   633  // descend traverses the tree in descending entry order
   634  func (t *rbTree) descend(x *rbNode, pivot *Entry, f func(e *Entry) bool) bool {
   635  	if x == t.nilNode {
   636  		return true
   637  	}
   638  	if !(compare(pivot, x.entry) == -1) {
   639  		if !t.descend(x.right, pivot, f) {
   640  			return false
   641  		}
   642  		if !f(x.entry) {
   643  			return false
   644  		}
   645  	}
   646  	return t.descend(x.left, pivot, f)
   647  }
   648  
   649  // ascendRange traverses the tree in ascending entry order within the bounds
   650  // of the inferior to the superior entries provided
   651  func (t *rbTree) ascendRange(x *rbNode, inf, sup *Entry, f func(e *Entry) bool) bool {
   652  	if x == t.nilNode {
   653  		return true
   654  	}
   655  	if !(compare(x.entry, sup) == -1) {
   656  		return t.ascendRange(x.left, inf, sup, f)
   657  	}
   658  	if compare(x.entry, inf) == -1 {
   659  		return t.ascendRange(x.right, inf, sup, f)
   660  	}
   661  	if !t.ascendRange(x.left, inf, sup, f) {
   662  		return false
   663  	}
   664  	if !f(x.entry) {
   665  		return false
   666  	}
   667  	return t.ascendRange(x.right, inf, sup, f)
   668  }
   669  
   670  func (t *rbTree) Lock() {
   671  	lock.Lock()
   672  }
   673  
   674  func (t *rbTree) Unlock() {
   675  	lock.Unlock()
   676  }
   677  
   678  func (t *rbTree) MarshalBinary() ([]byte, error) {
   679  	// lock in this case (for now)
   680  	lock.Lock()
   681  	defer lock.Unlock()
   682  	// TODO: implement...
   683  	return nil, nil
   684  }
   685  
   686  func (t *rbTree) UnmarshalBinary(data []byte) error {
   687  	// lock in this case (for now)
   688  	lock.Lock()
   689  	defer lock.Unlock()
   690  	// TODO: implement...
   691  	return nil
   692  }
   693  
   694  func (t *rbTree) String() string {
   695  	var sb strings.Builder
   696  	t.ascend(t.root, t.min(t.root).entry, func(entry *Entry) bool {
   697  		sb.WriteString(entry.String())
   698  		return true
   699  	})
   700  	return sb.String()
   701  }