github.com/scottcagno/storage@v1.8.0/pkg/lsmt/mtbl/rbtree.go (about)

     1  package mtbl
     2  
     3  import (
     4  	"bytes"
     5  	"github.com/scottcagno/storage/pkg/lsmt/binary"
     6  	"runtime"
     7  	"strings"
     8  )
     9  
    10  var empty *binary.Entry = nil
    11  
    12  func compare(this, that *binary.Entry) int {
    13  	return bytes.Compare(this.Key, that.Key)
    14  }
    15  
    16  const (
    17  	RED   = 0
    18  	BLACK = 1
    19  )
    20  
    21  type rbNode struct {
    22  	left   *rbNode
    23  	right  *rbNode
    24  	parent *rbNode
    25  	color  uint
    26  	entry  *binary.Entry
    27  }
    28  
    29  type RBTree = rbTree
    30  
    31  // rbTree is a struct representing a rbTree
    32  type rbTree struct {
    33  	NIL   *rbNode
    34  	root  *rbNode
    35  	count int
    36  	size  int64
    37  }
    38  
    39  func NewRBTree() *rbTree {
    40  	return newRBTree()
    41  }
    42  
    43  // NewTree creates and returns a new rbTree
    44  func newRBTree() *rbTree {
    45  	n := &rbNode{
    46  		left:   nil,
    47  		right:  nil,
    48  		parent: nil,
    49  		color:  BLACK,
    50  		entry:  empty,
    51  	}
    52  	return &rbTree{
    53  		NIL:   n,
    54  		root:  n,
    55  		count: 0,
    56  		size:  0,
    57  	}
    58  }
    59  
    60  func (t *rbTree) Count() int {
    61  	return t.count
    62  }
    63  
    64  // Has tests and returns a boolean value if the
    65  // provided key exists in the tree
    66  func (t *rbTree) Has(entry *binary.Entry) bool {
    67  	_, ok := t.getInternal(entry)
    68  	return ok
    69  }
    70  
    71  // HasKey tests and returns a boolean value if the
    72  // provided key exists in the tree
    73  func (t *rbTree) HasKey(k string) bool {
    74  	e, ok := t.getInternal(&binary.Entry{Key: []byte(k)})
    75  	return ok && e != nil && e.Value != nil
    76  }
    77  
    78  // Add adds the provided key and value only if it does not
    79  // already exist in the tree. It returns false if the key and
    80  // value was not able to be added, and true if it was added
    81  // successfully
    82  func (t *rbTree) Add(entry *binary.Entry) bool {
    83  	_, ok := t.getInternal(entry)
    84  	if ok {
    85  		// key already exists, so we are not adding
    86  		return false
    87  	}
    88  	t.putInternal(entry)
    89  	return true
    90  }
    91  
    92  func (t *rbTree) Put(entry *binary.Entry) (*binary.Entry, bool) {
    93  	return t.putInternal(entry)
    94  }
    95  
    96  // UpsertAndCheckIfFull updates the provided entry if it already
    97  // exists or inserts the supplied entry as a new entry if it
    98  // does not exist. UpsertAndCheckIfFull returns the current size
    99  // in bytes after performing the insert or update. It also returns
   100  // a boolean reporting true if the tree has met or exceeded the
   101  // provided threshold, and false if the current size is less than
   102  // the provided threshold.
   103  func (t *rbTree) UpsertAndCheckIfFull(entry *binary.Entry, threshold int64) (int64, bool) {
   104  	// TODO: possibly perform pre-check in future somehow??
   105  	//
   106  	// insert the entry in to the mem-table
   107  	t.putInternal(entry)
   108  	if t.size >= threshold {
   109  		// size is greater or equal to supplied threshold
   110  		// return size along with a true value (need flush)
   111  		return t.size, true
   112  	}
   113  	// size has not met or exceeded supplied threshold
   114  	// simply return the current size, and a false value
   115  	return t.size, false
   116  }
   117  
   118  // UpsertBatchAndCheckIfFull ranges the batch of entries, and it
   119  // updates the provided entry if it already exists or inserts the
   120  // supplied entry as a new entry if it does not exist. When it's
   121  // finished, UpsertBatchAndCheckIfFull returns the current size in
   122  // bytes after performing the insert or update. It also returns a
   123  // boolean value reporting true if the tree has met or exceeded the
   124  // provided threshold, and false if the current size is less than
   125  // the provided threshold.
   126  func (t *rbTree) UpsertBatchAndCheckIfFull(batch *binary.Batch, threshold int64) (int64, bool) {
   127  	// TODO: possibly perform pre-check in future somehow??
   128  	//
   129  	// range the batch entries
   130  	for _, e := range batch.Entries {
   131  		// insert the entry in to the mem-table
   132  		t.putInternal(e)
   133  	}
   134  	// TODO: possibly think about dealing with partial batches??
   135  	if t.size >= threshold {
   136  		// size is greater or equal to supplied threshold
   137  		// return size along with a true value (need flush)
   138  		return t.size, true
   139  	}
   140  	// size has not met or exceeded supplied threshold
   141  	// simply return the current size, and a false value
   142  	return t.size, false
   143  }
   144  
   145  func (t *rbTree) PutBatch(batch *binary.Batch) {
   146  	for _, entry := range batch.Entries {
   147  		t.putInternal(entry)
   148  	}
   149  }
   150  
   151  func (t *rbTree) putInternal(entry *binary.Entry) (*binary.Entry, bool) {
   152  	if entry == nil {
   153  		return nil, false
   154  	}
   155  	// insert return the node along with
   156  	// a boolean value signaling true if
   157  	// the node was updated, and false if
   158  	// the node was newly added.
   159  	ret, ok := t.insert(&rbNode{
   160  		left:   t.NIL,
   161  		right:  t.NIL,
   162  		parent: t.NIL,
   163  		color:  RED,
   164  		entry:  entry,
   165  	})
   166  	return ret.entry, ok
   167  }
   168  
   169  func (t *rbTree) Get(entry *binary.Entry) (*binary.Entry, bool) {
   170  	return t.getInternal(entry)
   171  }
   172  
   173  // GetNearMin performs an approximate search for the specified key
   174  // and returns the closest key that is less than (the predecessor)
   175  // to the searched key as well as a boolean reporting true if an
   176  // exact match was found for the key, and false if it is unknown
   177  // or and exact match was not found
   178  func (t *rbTree) GetNearMin(entry *binary.Entry) (*binary.Entry, bool) {
   179  	if entry == nil {
   180  		return nil, false
   181  	}
   182  	ret := t.searchApprox(&rbNode{
   183  		left:   t.NIL,
   184  		right:  t.NIL,
   185  		parent: t.NIL,
   186  		color:  RED,
   187  		entry:  entry,
   188  	})
   189  	prev := t.predecessor(ret).entry
   190  	if prev == nil {
   191  		prev, _ = t.Min()
   192  	}
   193  	return prev, compare(ret.entry, entry) == 0
   194  }
   195  
   196  // GetNearMax performs an approximate search for the specified key
   197  // and returns the closest key that is greater than (the successor)
   198  // to the searched key as well as a boolean reporting true if an
   199  // exact match was found for the key, and false if it is unknown or
   200  // and exact match was not found
   201  func (t *rbTree) GetNearMax(entry *binary.Entry) (*binary.Entry, bool) {
   202  	if entry == nil {
   203  		return nil, false
   204  	}
   205  	ret := t.searchApprox(&rbNode{
   206  		left:   t.NIL,
   207  		right:  t.NIL,
   208  		parent: t.NIL,
   209  		color:  RED,
   210  		entry:  entry,
   211  	})
   212  	return t.successor(ret).entry, compare(ret.entry, entry) == 0
   213  }
   214  
   215  // GetApproxPrevNext performs an approximate search for the specified key
   216  // and returns the searched key, the predecessor, and the successor and a
   217  // boolean reporting true if an exact match was found for the key, and false
   218  // if it is unknown or and exact match was not found
   219  func (t *rbTree) GetApproxPrevNext(entry *binary.Entry) (*binary.Entry, *binary.Entry, *binary.Entry, bool) {
   220  	if entry == nil {
   221  		return nil, nil, nil, false
   222  	}
   223  	ret := t.searchApprox(&rbNode{
   224  		left:   t.NIL,
   225  		right:  t.NIL,
   226  		parent: t.NIL,
   227  		color:  RED,
   228  		entry:  entry,
   229  	})
   230  	return ret.entry, t.predecessor(ret).entry, t.successor(ret).entry,
   231  		compare(ret.entry, entry) == 0
   232  }
   233  
   234  func (t *rbTree) getInternal(entry *binary.Entry) (*binary.Entry, bool) {
   235  	if entry == nil {
   236  		return nil, false
   237  	}
   238  	ret := t.search(&rbNode{
   239  		left:   t.NIL,
   240  		right:  t.NIL,
   241  		parent: t.NIL,
   242  		color:  RED,
   243  		entry:  entry,
   244  	})
   245  	return ret.entry, ret.entry != nil
   246  }
   247  
   248  func (t *rbTree) Del(entry *binary.Entry) (*binary.Entry, bool) {
   249  	return t.delInternal(entry)
   250  }
   251  
   252  func (t *rbTree) delInternal(entry *binary.Entry) (*binary.Entry, bool) {
   253  	if entry == nil {
   254  		return nil, false
   255  	}
   256  	cnt := t.count
   257  	ret := t.delete(&rbNode{
   258  		left:   t.NIL,
   259  		right:  t.NIL,
   260  		parent: t.NIL,
   261  		color:  RED,
   262  		entry:  entry,
   263  	})
   264  	return ret.entry, cnt == t.count+1
   265  }
   266  
   267  func (t *rbTree) Len() int {
   268  	return t.count
   269  }
   270  
   271  // Size returns the size in bytes
   272  func (t *rbTree) Size() int64 {
   273  	return t.size
   274  }
   275  
   276  func (t *rbTree) Min() (*binary.Entry, bool) {
   277  	x := t.min(t.root)
   278  	if x == t.NIL {
   279  		return nil, false
   280  	}
   281  	return x.entry, true
   282  }
   283  
   284  func (t *rbTree) Max() (*binary.Entry, bool) {
   285  	x := t.max(t.root)
   286  	if x == t.NIL {
   287  		return nil, false
   288  	}
   289  	return x.entry, true
   290  }
   291  
   292  // helper function for clone
   293  func (t *rbTree) cloneEntries(t2 *rbTree) {
   294  	t.ascend(t.root, t.min(t.root).entry, func(e *binary.Entry) bool {
   295  		t2.putInternal(e)
   296  		return true
   297  	})
   298  }
   299  
   300  type Iterator func(entry *binary.Entry) bool
   301  
   302  func (t *rbTree) Scan(iter Iterator) {
   303  	t.ascend(t.root, t.min(t.root).entry, iter)
   304  }
   305  
   306  func (t *rbTree) ScanBack(iter Iterator) {
   307  	t.descend(t.root, t.max(t.root).entry, iter)
   308  }
   309  
   310  func (t *rbTree) ScanRange(start, end *binary.Entry, iter Iterator) {
   311  	t.ascendRange(t.root, start, end, iter)
   312  }
   313  
   314  func (t *rbTree) String() string {
   315  	var sb strings.Builder
   316  	t.ascend(t.root, t.min(t.root).entry, func(entry *binary.Entry) bool {
   317  		sb.WriteString(entry.String())
   318  		return true
   319  	})
   320  	return sb.String()
   321  }
   322  
   323  func (t *rbTree) Close() {
   324  	t.NIL = nil
   325  	t.root = nil
   326  	t.count = 0
   327  	return
   328  }
   329  
   330  func (t *rbTree) Reset() {
   331  	t.NIL = nil
   332  	t.root = nil
   333  	t.count = 0
   334  	runtime.GC()
   335  	n := &rbNode{
   336  		left:   nil,
   337  		right:  nil,
   338  		parent: nil,
   339  		color:  BLACK,
   340  		entry:  empty,
   341  	}
   342  	t.NIL = n
   343  	t.root = n
   344  	t.count = 0
   345  	t.size = 0
   346  }
   347  
   348  func (t *rbTree) insert(z *rbNode) (*rbNode, bool) {
   349  	x := t.root
   350  	y := t.NIL
   351  	for x != t.NIL {
   352  		y = x
   353  		if compare(z.entry, x.entry) == -1 {
   354  			x = x.left
   355  		} else if compare(x.entry, z.entry) == -1 {
   356  			x = x.right
   357  		} else {
   358  			t.size -= int64(x.entry.Size())
   359  			t.size += int64(z.entry.Size())
   360  			// originally we were just returning x
   361  			// without updating the RBEntry, but if we
   362  			// want it to have similar behavior to
   363  			// a hashmap then we need to update any
   364  			// entries that already exist in the tree
   365  			x.entry = z.entry
   366  			return x, true // true means an existing
   367  			// value was found and updated. It should
   368  			// be noted that we don't need to re-balance
   369  			// the tree because they keys are not changing
   370  			// and the tree is balance is maintained by
   371  			// the keys and not their values.
   372  		}
   373  	}
   374  	z.parent = y
   375  	if y == t.NIL {
   376  		t.root = z
   377  	} else if compare(z.entry, y.entry) == -1 {
   378  		y.left = z
   379  	} else {
   380  		y.right = z
   381  	}
   382  	t.count++
   383  	t.size += int64(z.entry.Size())
   384  	t.insertFixup(z)
   385  	return z, false
   386  }
   387  
   388  func (t *rbTree) leftRotate(x *rbNode) {
   389  	if x.right == t.NIL {
   390  		return
   391  	}
   392  	y := x.right
   393  	x.right = y.left
   394  	if y.left != t.NIL {
   395  		y.left.parent = x
   396  	}
   397  	y.parent = x.parent
   398  	if x.parent == t.NIL {
   399  		t.root = y
   400  	} else if x == x.parent.left {
   401  		x.parent.left = y
   402  	} else {
   403  		x.parent.right = y
   404  	}
   405  	y.left = x
   406  	x.parent = y
   407  }
   408  
   409  func (t *rbTree) rightRotate(x *rbNode) {
   410  	if x.left == t.NIL {
   411  		return
   412  	}
   413  	y := x.left
   414  	x.left = y.right
   415  	if y.right != t.NIL {
   416  		y.right.parent = x
   417  	}
   418  	y.parent = x.parent
   419  
   420  	if x.parent == t.NIL {
   421  		t.root = y
   422  	} else if x == x.parent.left {
   423  		x.parent.left = y
   424  	} else {
   425  		x.parent.right = y
   426  	}
   427  
   428  	y.right = x
   429  	x.parent = y
   430  }
   431  
   432  func (t *rbTree) insertFixup(z *rbNode) {
   433  	for z.parent.color == RED {
   434  		if z.parent == z.parent.parent.left {
   435  			y := z.parent.parent.right
   436  			if y.color == RED {
   437  				z.parent.color = BLACK
   438  				y.color = BLACK
   439  				z.parent.parent.color = RED
   440  				z = z.parent.parent
   441  			} else {
   442  				if z == z.parent.right {
   443  					z = z.parent
   444  					t.leftRotate(z)
   445  				}
   446  				z.parent.color = BLACK
   447  				z.parent.parent.color = RED
   448  				t.rightRotate(z.parent.parent)
   449  			}
   450  		} else {
   451  			y := z.parent.parent.left
   452  			if y.color == RED {
   453  				z.parent.color = BLACK
   454  				y.color = BLACK
   455  				z.parent.parent.color = RED
   456  				z = z.parent.parent
   457  			} else {
   458  				if z == z.parent.left {
   459  					z = z.parent
   460  					t.rightRotate(z)
   461  				}
   462  				z.parent.color = BLACK
   463  				z.parent.parent.color = RED
   464  				t.leftRotate(z.parent.parent)
   465  			}
   466  		}
   467  	}
   468  	t.root.color = BLACK
   469  }
   470  
   471  // trying out a slightly different search method
   472  // that (hopefully) will not return nil values and
   473  // instead will return approximate node matches
   474  func (t *rbTree) searchApprox(x *rbNode) *rbNode {
   475  	p := t.root
   476  	for p != t.NIL {
   477  		if compare(p.entry, x.entry) == -1 {
   478  			if p.right == t.NIL {
   479  				break
   480  			}
   481  			p = p.right
   482  		} else if compare(x.entry, p.entry) == -1 {
   483  			if p.left == t.NIL {
   484  				break
   485  			}
   486  			p = p.left
   487  		} else {
   488  			break
   489  		}
   490  	}
   491  	return p
   492  }
   493  
   494  func (t *rbTree) search(x *rbNode) *rbNode {
   495  	p := t.root
   496  	for p != t.NIL {
   497  		if compare(p.entry, x.entry) == -1 {
   498  			p = p.right
   499  		} else if compare(x.entry, p.entry) == -1 {
   500  			p = p.left
   501  		} else {
   502  			break
   503  		}
   504  	}
   505  	return p
   506  }
   507  
   508  // min traverses from root to left recursively until left is NIL
   509  func (t *rbTree) min(x *rbNode) *rbNode {
   510  	if x == t.NIL {
   511  		return t.NIL
   512  	}
   513  	for x.left != t.NIL {
   514  		x = x.left
   515  	}
   516  	return x
   517  }
   518  
   519  // max traverses from root to right recursively until right is NIL
   520  func (t *rbTree) max(x *rbNode) *rbNode {
   521  	if x == t.NIL {
   522  		return t.NIL
   523  	}
   524  	for x.right != t.NIL {
   525  		x = x.right
   526  	}
   527  	return x
   528  }
   529  
   530  func (t *rbTree) predecessor(x *rbNode) *rbNode {
   531  	if x == t.NIL {
   532  		return t.NIL
   533  	}
   534  	if x.left != t.NIL {
   535  		return t.max(x.left)
   536  	}
   537  	y := x.parent
   538  	for y != t.NIL && x == y.left {
   539  		x = y
   540  		y = y.parent
   541  	}
   542  	return y
   543  }
   544  
   545  func (t *rbTree) successor(x *rbNode) *rbNode {
   546  	if x == t.NIL {
   547  		return t.NIL
   548  	}
   549  	if x.right != t.NIL {
   550  		return t.min(x.right)
   551  	}
   552  	y := x.parent
   553  	for y != t.NIL && x == y.right {
   554  		x = y
   555  		y = y.parent
   556  	}
   557  	return y
   558  }
   559  
   560  func (t *rbTree) delete(key *rbNode) *rbNode {
   561  	z := t.search(key)
   562  	if z == t.NIL {
   563  		return t.NIL
   564  	}
   565  	ret := &rbNode{t.NIL, t.NIL, t.NIL, z.color, z.entry}
   566  	var y *rbNode
   567  	var x *rbNode
   568  	if z.left == t.NIL || z.right == t.NIL {
   569  		y = z
   570  	} else {
   571  		y = t.successor(z)
   572  	}
   573  	if y.left != t.NIL {
   574  		x = y.left
   575  	} else {
   576  		x = y.right
   577  	}
   578  	x.parent = y.parent
   579  
   580  	if y.parent == t.NIL {
   581  		t.root = x
   582  	} else if y == y.parent.left {
   583  		y.parent.left = x
   584  	} else {
   585  		y.parent.right = x
   586  	}
   587  	if y != z {
   588  		z.entry = y.entry
   589  	}
   590  	if y.color == BLACK {
   591  		t.deleteFixup(x)
   592  	}
   593  	t.size -= int64(ret.entry.Size())
   594  	t.count--
   595  	return ret
   596  }
   597  
   598  func (t *rbTree) deleteFixup(x *rbNode) {
   599  	for x != t.root && x.color == BLACK {
   600  		if x == x.parent.left {
   601  			w := x.parent.right
   602  			if w.color == RED {
   603  				w.color = BLACK
   604  				x.parent.color = RED
   605  				t.leftRotate(x.parent)
   606  				w = x.parent.right
   607  			}
   608  			if w.left.color == BLACK && w.right.color == BLACK {
   609  				w.color = RED
   610  				x = x.parent
   611  			} else {
   612  				if w.right.color == BLACK {
   613  					w.left.color = BLACK
   614  					w.color = RED
   615  					t.rightRotate(w)
   616  					w = x.parent.right
   617  				}
   618  				w.color = x.parent.color
   619  				x.parent.color = BLACK
   620  				w.right.color = BLACK
   621  				t.leftRotate(x.parent)
   622  				// this is to exit while loop
   623  				x = t.root
   624  			}
   625  		} else {
   626  			w := x.parent.left
   627  			if w.color == RED {
   628  				w.color = BLACK
   629  				x.parent.color = RED
   630  				t.rightRotate(x.parent)
   631  				w = x.parent.left
   632  			}
   633  			if w.left.color == BLACK && w.right.color == BLACK {
   634  				w.color = RED
   635  				x = x.parent
   636  			} else {
   637  				if w.left.color == BLACK {
   638  					w.right.color = BLACK
   639  					w.color = RED
   640  					t.leftRotate(w)
   641  					w = x.parent.left
   642  				}
   643  				w.color = x.parent.color
   644  				x.parent.color = BLACK
   645  				w.left.color = BLACK
   646  				t.rightRotate(x.parent)
   647  				x = t.root
   648  			}
   649  		}
   650  	}
   651  	x.color = BLACK
   652  }
   653  
   654  func (t *rbTree) ascend(x *rbNode, entry *binary.Entry, iter Iterator) bool {
   655  	if x == t.NIL {
   656  		return true
   657  	}
   658  	if !(compare(x.entry, entry) == -1) {
   659  		if !t.ascend(x.left, entry, iter) {
   660  			return false
   661  		}
   662  		if !iter(x.entry) {
   663  			return false
   664  		}
   665  	}
   666  	return t.ascend(x.right, entry, iter)
   667  }
   668  
   669  func (t *rbTree) descend(x *rbNode, pivot *binary.Entry, iter Iterator) bool {
   670  	if x == t.NIL {
   671  		return true
   672  	}
   673  	if !(compare(pivot, x.entry) == -1) {
   674  		if !t.descend(x.right, pivot, iter) {
   675  			return false
   676  		}
   677  		if !iter(x.entry) {
   678  			return false
   679  		}
   680  	}
   681  	return t.descend(x.left, pivot, iter)
   682  }
   683  
   684  func (t *rbTree) ascendRange(x *rbNode, inf, sup *binary.Entry, iter Iterator) bool {
   685  	if x == t.NIL {
   686  		return true
   687  	}
   688  	if !(compare(x.entry, sup) == -1) {
   689  		return t.ascendRange(x.left, inf, sup, iter)
   690  	}
   691  	if compare(x.entry, inf) == -1 {
   692  		return t.ascendRange(x.right, inf, sup, iter)
   693  	}
   694  	if !t.ascendRange(x.left, inf, sup, iter) {
   695  		return false
   696  	}
   697  	if !iter(x.entry) {
   698  		return false
   699  	}
   700  	return t.ascendRange(x.right, inf, sup, iter)
   701  }