github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/unionstore/memdb.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/unionstore/memdb.go
    19  //
    20  
    21  // Copyright 2020 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package unionstore
    36  
    37  import (
    38  	"bytes"
    39  	"math"
    40  	"reflect"
    41  	"sync"
    42  	"unsafe"
    43  
    44  	tikverr "github.com/KinWaiYuen/client-go/v2/error"
    45  	"github.com/KinWaiYuen/client-go/v2/kv"
    46  )
    47  
    48  var tombstone = []byte{}
    49  
    50  // IsTombstone returns whether the value is a tombstone.
    51  func IsTombstone(val []byte) bool { return len(val) == 0 }
    52  
    53  // MemKeyHandle represents a pointer for key in MemBuffer.
    54  type MemKeyHandle struct {
    55  	// Opaque user data
    56  	UserData uint16
    57  	idx      uint16
    58  	off      uint32
    59  }
    60  
    61  func (h MemKeyHandle) toAddr() memdbArenaAddr {
    62  	return memdbArenaAddr{idx: uint32(h.idx), off: h.off}
    63  }
    64  
    65  // MemDB is rollbackable Red-Black Tree optimized for TiDB's transaction states buffer use scenario.
    66  // You can think MemDB is a combination of two separate tree map, one for key => value and another for key => keyFlags.
    67  //
    68  // The value map is rollbackable, that means you can use the `Staging`, `Release` and `Cleanup` API to safely modify KVs.
    69  //
    70  // The flags map is not rollbackable. There are two types of flag, persistent and non-persistent.
    71  // When discarding a newly added KV in `Cleanup`, the non-persistent flags will be cleared.
    72  // If there are persistent flags associated with key, we will keep this key in node without value.
    73  type MemDB struct {
    74  	// This RWMutex only used to ensure memdbSnapGetter.Get will not race with
    75  	// concurrent memdb.Set, memdb.SetWithFlags, memdb.Delete and memdb.UpdateFlags.
    76  	sync.RWMutex
    77  	root      memdbArenaAddr
    78  	allocator nodeAllocator
    79  	vlog      memdbVlog
    80  
    81  	entrySizeLimit  uint64
    82  	bufferSizeLimit uint64
    83  	count           int
    84  	size            int
    85  
    86  	vlogInvalid bool
    87  	dirty       bool
    88  	stages      []memdbCheckpoint
    89  }
    90  
    91  func newMemDB() *MemDB {
    92  	db := new(MemDB)
    93  	db.allocator.init()
    94  	db.root = nullAddr
    95  	db.stages = make([]memdbCheckpoint, 0, 2)
    96  	db.entrySizeLimit = math.MaxUint64
    97  	db.bufferSizeLimit = math.MaxUint64
    98  	return db
    99  }
   100  
   101  // Staging create a new staging buffer inside the MemBuffer.
   102  // Subsequent writes will be temporarily stored in this new staging buffer.
   103  // When you think all modifications looks good, you can call `Release` to public all of them to the upper level buffer.
   104  func (db *MemDB) Staging() int {
   105  	db.Lock()
   106  	defer db.Unlock()
   107  
   108  	db.stages = append(db.stages, db.vlog.checkpoint())
   109  	return len(db.stages)
   110  }
   111  
   112  // Release publish all modifications in the latest staging buffer to upper level.
   113  func (db *MemDB) Release(h int) {
   114  	if h != len(db.stages) {
   115  		// This should never happens in production environment.
   116  		// Use panic to make debug easier.
   117  		panic("cannot release staging buffer")
   118  	}
   119  
   120  	db.Lock()
   121  	defer db.Unlock()
   122  	if h == 1 {
   123  		tail := db.vlog.checkpoint()
   124  		if !db.stages[0].isSamePosition(&tail) {
   125  			db.dirty = true
   126  		}
   127  	}
   128  	db.stages = db.stages[:h-1]
   129  }
   130  
   131  // Cleanup cleanup the resources referenced by the StagingHandle.
   132  // If the changes are not published by `Release`, they will be discarded.
   133  func (db *MemDB) Cleanup(h int) {
   134  	if h > len(db.stages) {
   135  		return
   136  	}
   137  	if h < len(db.stages) {
   138  		// This should never happens in production environment.
   139  		// Use panic to make debug easier.
   140  		panic("cannot cleanup staging buffer")
   141  	}
   142  
   143  	db.Lock()
   144  	defer db.Unlock()
   145  	cp := &db.stages[h-1]
   146  	if !db.vlogInvalid {
   147  		curr := db.vlog.checkpoint()
   148  		if !curr.isSamePosition(cp) {
   149  			db.vlog.revertToCheckpoint(db, cp)
   150  			db.vlog.truncate(cp)
   151  		}
   152  	}
   153  	db.stages = db.stages[:h-1]
   154  }
   155  
   156  // Reset resets the MemBuffer to initial states.
   157  func (db *MemDB) Reset() {
   158  	db.root = nullAddr
   159  	db.stages = db.stages[:0]
   160  	db.dirty = false
   161  	db.vlogInvalid = false
   162  	db.size = 0
   163  	db.count = 0
   164  	db.vlog.reset()
   165  	db.allocator.reset()
   166  }
   167  
   168  // DiscardValues releases the memory used by all values.
   169  // NOTE: any operation need value will panic after this function.
   170  func (db *MemDB) DiscardValues() {
   171  	db.vlogInvalid = true
   172  	db.vlog.reset()
   173  }
   174  
   175  // InspectStage used to inspect the value updates in the given stage.
   176  func (db *MemDB) InspectStage(handle int, f func([]byte, kv.KeyFlags, []byte)) {
   177  	idx := handle - 1
   178  	tail := db.vlog.checkpoint()
   179  	head := db.stages[idx]
   180  	db.vlog.inspectKVInLog(db, &head, &tail, f)
   181  }
   182  
   183  // Get gets the value for key k from kv store.
   184  // If corresponding kv pair does not exist, it returns nil and ErrNotExist.
   185  func (db *MemDB) Get(key []byte) ([]byte, error) {
   186  	if db.vlogInvalid {
   187  		// panic for easier debugging.
   188  		panic("vlog is resetted")
   189  	}
   190  
   191  	x := db.traverse(key, false)
   192  	if x.isNull() {
   193  		return nil, tikverr.ErrNotExist
   194  	}
   195  	if x.vptr.isNull() {
   196  		// A flag only key, act as value not exists
   197  		return nil, tikverr.ErrNotExist
   198  	}
   199  	return db.vlog.getValue(x.vptr), nil
   200  }
   201  
   202  // SelectValueHistory select the latest value which makes `predicate` returns true from the modification history.
   203  func (db *MemDB) SelectValueHistory(key []byte, predicate func(value []byte) bool) ([]byte, error) {
   204  	x := db.traverse(key, false)
   205  	if x.isNull() {
   206  		return nil, tikverr.ErrNotExist
   207  	}
   208  	if x.vptr.isNull() {
   209  		// A flag only key, act as value not exists
   210  		return nil, tikverr.ErrNotExist
   211  	}
   212  	result := db.vlog.selectValueHistory(x.vptr, func(addr memdbArenaAddr) bool {
   213  		return predicate(db.vlog.getValue(addr))
   214  	})
   215  	if result.isNull() {
   216  		return nil, nil
   217  	}
   218  	return db.vlog.getValue(result), nil
   219  }
   220  
   221  // GetFlags returns the latest flags associated with key.
   222  func (db *MemDB) GetFlags(key []byte) (kv.KeyFlags, error) {
   223  	x := db.traverse(key, false)
   224  	if x.isNull() {
   225  		return 0, tikverr.ErrNotExist
   226  	}
   227  	return x.getKeyFlags(), nil
   228  }
   229  
   230  // UpdateFlags update the flags associated with key.
   231  func (db *MemDB) UpdateFlags(key []byte, ops ...kv.FlagsOp) {
   232  	err := db.set(key, nil, ops...)
   233  	_ = err // set without value will never fail
   234  }
   235  
   236  // Set sets the value for key k as v into kv store.
   237  // v must NOT be nil or empty, otherwise it returns ErrCannotSetNilValue.
   238  func (db *MemDB) Set(key []byte, value []byte) error {
   239  	if len(value) == 0 {
   240  		return tikverr.ErrCannotSetNilValue
   241  	}
   242  	return db.set(key, value)
   243  }
   244  
   245  // SetWithFlags put key-value into the last active staging buffer with the given KeyFlags.
   246  func (db *MemDB) SetWithFlags(key []byte, value []byte, ops ...kv.FlagsOp) error {
   247  	if len(value) == 0 {
   248  		return tikverr.ErrCannotSetNilValue
   249  	}
   250  	return db.set(key, value, ops...)
   251  }
   252  
   253  // Delete removes the entry for key k from kv store.
   254  func (db *MemDB) Delete(key []byte) error {
   255  	return db.set(key, tombstone)
   256  }
   257  
   258  // DeleteWithFlags delete key with the given KeyFlags
   259  func (db *MemDB) DeleteWithFlags(key []byte, ops ...kv.FlagsOp) error {
   260  	return db.set(key, tombstone, ops...)
   261  }
   262  
   263  // GetKeyByHandle returns key by handle.
   264  func (db *MemDB) GetKeyByHandle(handle MemKeyHandle) []byte {
   265  	x := db.getNode(handle.toAddr())
   266  	return x.getKey()
   267  }
   268  
   269  // GetValueByHandle returns value by handle.
   270  func (db *MemDB) GetValueByHandle(handle MemKeyHandle) ([]byte, bool) {
   271  	if db.vlogInvalid {
   272  		return nil, false
   273  	}
   274  	x := db.getNode(handle.toAddr())
   275  	if x.vptr.isNull() {
   276  		return nil, false
   277  	}
   278  	return db.vlog.getValue(x.vptr), true
   279  }
   280  
   281  // Len returns the number of entries in the DB.
   282  func (db *MemDB) Len() int {
   283  	return db.count
   284  }
   285  
   286  // Size returns sum of keys and values length.
   287  func (db *MemDB) Size() int {
   288  	return db.size
   289  }
   290  
   291  // Dirty returns whether the root staging buffer is updated.
   292  func (db *MemDB) Dirty() bool {
   293  	return db.dirty
   294  }
   295  
   296  func (db *MemDB) set(key []byte, value []byte, ops ...kv.FlagsOp) error {
   297  	if db.vlogInvalid {
   298  		// panic for easier debugging.
   299  		panic("vlog is resetted")
   300  	}
   301  
   302  	if value != nil {
   303  		if size := uint64(len(key) + len(value)); size > db.entrySizeLimit {
   304  			return &tikverr.ErrEntryTooLarge{
   305  				Limit: db.entrySizeLimit,
   306  				Size:  size,
   307  			}
   308  		}
   309  	}
   310  
   311  	db.Lock()
   312  	defer db.Unlock()
   313  
   314  	if len(db.stages) == 0 {
   315  		db.dirty = true
   316  	}
   317  	x := db.traverse(key, true)
   318  
   319  	if len(ops) != 0 {
   320  		flags := kv.ApplyFlagsOps(x.getKeyFlags(), ops...)
   321  		if flags.AndPersistent() != 0 {
   322  			db.dirty = true
   323  		}
   324  		x.setKeyFlags(flags)
   325  	}
   326  
   327  	if value == nil {
   328  		return nil
   329  	}
   330  
   331  	db.setValue(x, value)
   332  	if uint64(db.Size()) > db.bufferSizeLimit {
   333  		return &tikverr.ErrTxnTooLarge{Size: db.Size()}
   334  	}
   335  	return nil
   336  }
   337  
   338  func (db *MemDB) setValue(x memdbNodeAddr, value []byte) {
   339  	var activeCp *memdbCheckpoint
   340  	if len(db.stages) > 0 {
   341  		activeCp = &db.stages[len(db.stages)-1]
   342  	}
   343  
   344  	var oldVal []byte
   345  	if !x.vptr.isNull() {
   346  		oldVal = db.vlog.getValue(x.vptr)
   347  	}
   348  
   349  	if len(oldVal) > 0 && db.vlog.canModify(activeCp, x.vptr) {
   350  		// For easier to implement, we only consider this case.
   351  		// It is the most common usage in TiDB's transaction buffers.
   352  		if len(oldVal) == len(value) {
   353  			copy(oldVal, value)
   354  			return
   355  		}
   356  	}
   357  	x.vptr = db.vlog.appendValue(x.addr, x.vptr, value)
   358  	db.size = db.size - len(oldVal) + len(value)
   359  }
   360  
   361  // traverse search for and if not found and insert is true, will add a new node in.
   362  // Returns a pointer to the new node, or the node found.
   363  func (db *MemDB) traverse(key []byte, insert bool) memdbNodeAddr {
   364  	x := db.getRoot()
   365  	y := memdbNodeAddr{nil, nullAddr}
   366  	found := false
   367  
   368  	// walk x down the tree
   369  	for !x.isNull() && !found {
   370  		y = x
   371  		cmp := bytes.Compare(key, x.getKey())
   372  		if cmp < 0 {
   373  			x = x.getLeft(db)
   374  		} else if cmp > 0 {
   375  			x = x.getRight(db)
   376  		} else {
   377  			found = true
   378  		}
   379  	}
   380  
   381  	if found || !insert {
   382  		return x
   383  	}
   384  
   385  	z := db.allocNode(key)
   386  	z.up = y.addr
   387  
   388  	if y.isNull() {
   389  		db.root = z.addr
   390  	} else {
   391  		cmp := bytes.Compare(z.getKey(), y.getKey())
   392  		if cmp < 0 {
   393  			y.left = z.addr
   394  		} else {
   395  			y.right = z.addr
   396  		}
   397  	}
   398  
   399  	z.left = nullAddr
   400  	z.right = nullAddr
   401  
   402  	// colour this new node red
   403  	z.setRed()
   404  
   405  	// Having added a red node, we must now walk back up the tree balancing it,
   406  	// by a series of rotations and changing of colours
   407  	x = z
   408  
   409  	// While we are not at the top and our parent node is red
   410  	// NOTE: Since the root node is guaranteed black, then we
   411  	// are also going to stop if we are the child of the root
   412  
   413  	for x.addr != db.root {
   414  		xUp := x.getUp(db)
   415  		if xUp.isBlack() {
   416  			break
   417  		}
   418  
   419  		xUpUp := xUp.getUp(db)
   420  		// if our parent is on the left side of our grandparent
   421  		if x.up == xUpUp.left {
   422  			// get the right side of our grandparent (uncle?)
   423  			y = xUpUp.getRight(db)
   424  			if y.isRed() {
   425  				// make our parent black
   426  				xUp.setBlack()
   427  				// make our uncle black
   428  				y.setBlack()
   429  				// make our grandparent red
   430  				xUpUp.setRed()
   431  				// now consider our grandparent
   432  				x = xUp.getUp(db)
   433  			} else {
   434  				// if we are on the right side of our parent
   435  				if x.addr == xUp.right {
   436  					// Move up to our parent
   437  					x = x.getUp(db)
   438  					db.leftRotate(x)
   439  					xUp = x.getUp(db)
   440  					xUpUp = xUp.getUp(db)
   441  				}
   442  
   443  				xUp.setBlack()
   444  				xUpUp.setRed()
   445  				db.rightRotate(xUpUp)
   446  			}
   447  		} else {
   448  			// everything here is the same as above, but exchanging left for right
   449  			y = xUpUp.getLeft(db)
   450  			if y.isRed() {
   451  				xUp.setBlack()
   452  				y.setBlack()
   453  				xUpUp.setRed()
   454  
   455  				x = xUp.getUp(db)
   456  			} else {
   457  				if x.addr == xUp.left {
   458  					x = x.getUp(db)
   459  					db.rightRotate(x)
   460  					xUp = x.getUp(db)
   461  					xUpUp = xUp.getUp(db)
   462  				}
   463  
   464  				xUp.setBlack()
   465  				xUpUp.setRed()
   466  				db.leftRotate(xUpUp)
   467  			}
   468  		}
   469  	}
   470  
   471  	// Set the root node black
   472  	db.getRoot().setBlack()
   473  
   474  	return z
   475  }
   476  
   477  //
   478  // Rotate our tree thus:-
   479  //
   480  //             X        leftRotate(X)--->           Y
   481  //           /   \                                /   \
   482  //          A     Y     <---rightRotate(Y)       X     C
   483  //              /   \                          /   \
   484  //             B     C                        A     B
   485  //
   486  // NOTE: This does not change the ordering.
   487  //
   488  // We assume that neither X nor Y is NULL
   489  //
   490  
   491  func (db *MemDB) leftRotate(x memdbNodeAddr) {
   492  	y := x.getRight(db)
   493  
   494  	// Turn Y's left subtree into X's right subtree (move B)
   495  	x.right = y.left
   496  
   497  	// If B is not null, set it's parent to be X
   498  	if !y.left.isNull() {
   499  		left := y.getLeft(db)
   500  		left.up = x.addr
   501  	}
   502  
   503  	// Set Y's parent to be what X's parent was
   504  	y.up = x.up
   505  
   506  	// if X was the root
   507  	if x.up.isNull() {
   508  		db.root = y.addr
   509  	} else {
   510  		xUp := x.getUp(db)
   511  		// Set X's parent's left or right pointer to be Y
   512  		if x.addr == xUp.left {
   513  			xUp.left = y.addr
   514  		} else {
   515  			xUp.right = y.addr
   516  		}
   517  	}
   518  
   519  	// Put X on Y's left
   520  	y.left = x.addr
   521  	// Set X's parent to be Y
   522  	x.up = y.addr
   523  }
   524  
   525  func (db *MemDB) rightRotate(y memdbNodeAddr) {
   526  	x := y.getLeft(db)
   527  
   528  	// Turn X's right subtree into Y's left subtree (move B)
   529  	y.left = x.right
   530  
   531  	// If B is not null, set it's parent to be Y
   532  	if !x.right.isNull() {
   533  		right := x.getRight(db)
   534  		right.up = y.addr
   535  	}
   536  
   537  	// Set X's parent to be what Y's parent was
   538  	x.up = y.up
   539  
   540  	// if Y was the root
   541  	if y.up.isNull() {
   542  		db.root = x.addr
   543  	} else {
   544  		yUp := y.getUp(db)
   545  		// Set Y's parent's left or right pointer to be X
   546  		if y.addr == yUp.left {
   547  			yUp.left = x.addr
   548  		} else {
   549  			yUp.right = x.addr
   550  		}
   551  	}
   552  
   553  	// Put Y on X's right
   554  	x.right = y.addr
   555  	// Set Y's parent to be X
   556  	y.up = x.addr
   557  }
   558  
   559  func (db *MemDB) deleteNode(z memdbNodeAddr) {
   560  	var x, y memdbNodeAddr
   561  
   562  	db.count--
   563  	db.size -= int(z.klen)
   564  
   565  	if z.left.isNull() || z.right.isNull() {
   566  		y = z
   567  	} else {
   568  		y = db.successor(z)
   569  	}
   570  
   571  	if !y.left.isNull() {
   572  		x = y.getLeft(db)
   573  	} else {
   574  		x = y.getRight(db)
   575  	}
   576  	x.up = y.up
   577  
   578  	if y.up.isNull() {
   579  		db.root = x.addr
   580  	} else {
   581  		yUp := y.getUp(db)
   582  		if y.addr == yUp.left {
   583  			yUp.left = x.addr
   584  		} else {
   585  			yUp.right = x.addr
   586  		}
   587  	}
   588  
   589  	needFix := y.isBlack()
   590  
   591  	// NOTE: traditional red-black tree will copy key from Y to Z and free Y.
   592  	// We cannot do the same thing here, due to Y's pointer is stored in vlog and the space in Z may not suitable for Y.
   593  	// So we need to copy states from Z to Y, and relink all nodes formerly connected to Z.
   594  	if y != z {
   595  		db.replaceNode(z, y)
   596  	}
   597  
   598  	if needFix {
   599  		db.deleteNodeFix(x)
   600  	}
   601  
   602  	db.allocator.freeNode(z.addr)
   603  }
   604  
   605  func (db *MemDB) replaceNode(old memdbNodeAddr, new memdbNodeAddr) {
   606  	if !old.up.isNull() {
   607  		oldUp := old.getUp(db)
   608  		if old.addr == oldUp.left {
   609  			oldUp.left = new.addr
   610  		} else {
   611  			oldUp.right = new.addr
   612  		}
   613  	} else {
   614  		db.root = new.addr
   615  	}
   616  	new.up = old.up
   617  
   618  	left := old.getLeft(db)
   619  	left.up = new.addr
   620  	new.left = old.left
   621  
   622  	right := old.getRight(db)
   623  	right.up = new.addr
   624  	new.right = old.right
   625  
   626  	if old.isBlack() {
   627  		new.setBlack()
   628  	} else {
   629  		new.setRed()
   630  	}
   631  }
   632  
   633  func (db *MemDB) deleteNodeFix(x memdbNodeAddr) {
   634  	for x.addr != db.root && x.isBlack() {
   635  		xUp := x.getUp(db)
   636  		if x.addr == xUp.left {
   637  			w := xUp.getRight(db)
   638  			if w.isRed() {
   639  				w.setBlack()
   640  				xUp.setRed()
   641  				db.leftRotate(xUp)
   642  				w = x.getUp(db).getRight(db)
   643  			}
   644  
   645  			if w.getLeft(db).isBlack() && w.getRight(db).isBlack() {
   646  				w.setRed()
   647  				x = x.getUp(db)
   648  			} else {
   649  				if w.getRight(db).isBlack() {
   650  					w.getLeft(db).setBlack()
   651  					w.setRed()
   652  					db.rightRotate(w)
   653  					w = x.getUp(db).getRight(db)
   654  				}
   655  
   656  				xUp := x.getUp(db)
   657  				if xUp.isBlack() {
   658  					w.setBlack()
   659  				} else {
   660  					w.setRed()
   661  				}
   662  				xUp.setBlack()
   663  				w.getRight(db).setBlack()
   664  				db.leftRotate(xUp)
   665  				x = db.getRoot()
   666  			}
   667  		} else {
   668  			w := xUp.getLeft(db)
   669  			if w.isRed() {
   670  				w.setBlack()
   671  				xUp.setRed()
   672  				db.rightRotate(xUp)
   673  				w = x.getUp(db).getLeft(db)
   674  			}
   675  
   676  			if w.getRight(db).isBlack() && w.getLeft(db).isBlack() {
   677  				w.setRed()
   678  				x = x.getUp(db)
   679  			} else {
   680  				if w.getLeft(db).isBlack() {
   681  					w.getRight(db).setBlack()
   682  					w.setRed()
   683  					db.leftRotate(w)
   684  					w = x.getUp(db).getLeft(db)
   685  				}
   686  
   687  				xUp := x.getUp(db)
   688  				if xUp.isBlack() {
   689  					w.setBlack()
   690  				} else {
   691  					w.setRed()
   692  				}
   693  				xUp.setBlack()
   694  				w.getLeft(db).setBlack()
   695  				db.rightRotate(xUp)
   696  				x = db.getRoot()
   697  			}
   698  		}
   699  	}
   700  	x.setBlack()
   701  }
   702  
   703  func (db *MemDB) successor(x memdbNodeAddr) (y memdbNodeAddr) {
   704  	if !x.right.isNull() {
   705  		// If right is not NULL then go right one and
   706  		// then keep going left until we find a node with
   707  		// no left pointer.
   708  
   709  		y = x.getRight(db)
   710  		for !y.left.isNull() {
   711  			y = y.getLeft(db)
   712  		}
   713  		return
   714  	}
   715  
   716  	// Go up the tree until we get to a node that is on the
   717  	// left of its parent (or the root) and then return the
   718  	// parent.
   719  
   720  	y = x.getUp(db)
   721  	for !y.isNull() && x.addr == y.right {
   722  		x = y
   723  		y = y.getUp(db)
   724  	}
   725  	return y
   726  }
   727  
   728  func (db *MemDB) predecessor(x memdbNodeAddr) (y memdbNodeAddr) {
   729  	if !x.left.isNull() {
   730  		// If left is not NULL then go left one and
   731  		// then keep going right until we find a node with
   732  		// no right pointer.
   733  
   734  		y = x.getLeft(db)
   735  		for !y.right.isNull() {
   736  			y = y.getRight(db)
   737  		}
   738  		return
   739  	}
   740  
   741  	// Go up the tree until we get to a node that is on the
   742  	// right of its parent (or the root) and then return the
   743  	// parent.
   744  
   745  	y = x.getUp(db)
   746  	for !y.isNull() && x.addr == y.left {
   747  		x = y
   748  		y = y.getUp(db)
   749  	}
   750  	return y
   751  }
   752  
   753  func (db *MemDB) getNode(x memdbArenaAddr) memdbNodeAddr {
   754  	return memdbNodeAddr{db.allocator.getNode(x), x}
   755  }
   756  
   757  func (db *MemDB) getRoot() memdbNodeAddr {
   758  	return db.getNode(db.root)
   759  }
   760  
   761  func (db *MemDB) allocNode(key []byte) memdbNodeAddr {
   762  	db.size += len(key)
   763  	db.count++
   764  	x, xn := db.allocator.allocNode(key)
   765  	return memdbNodeAddr{xn, x}
   766  }
   767  
   768  type memdbNodeAddr struct {
   769  	*memdbNode
   770  	addr memdbArenaAddr
   771  }
   772  
   773  func (a *memdbNodeAddr) isNull() bool {
   774  	return a.addr.isNull()
   775  }
   776  
   777  func (a memdbNodeAddr) getUp(db *MemDB) memdbNodeAddr {
   778  	return db.getNode(a.up)
   779  }
   780  
   781  func (a memdbNodeAddr) getLeft(db *MemDB) memdbNodeAddr {
   782  	return db.getNode(a.left)
   783  }
   784  
   785  func (a memdbNodeAddr) getRight(db *MemDB) memdbNodeAddr {
   786  	return db.getNode(a.right)
   787  }
   788  
   789  type memdbNode struct {
   790  	up    memdbArenaAddr
   791  	left  memdbArenaAddr
   792  	right memdbArenaAddr
   793  	vptr  memdbArenaAddr
   794  	klen  uint16
   795  	flags uint16
   796  }
   797  
   798  func (n *memdbNode) isRed() bool {
   799  	return n.flags&nodeColorBit != 0
   800  }
   801  
   802  func (n *memdbNode) isBlack() bool {
   803  	return !n.isRed()
   804  }
   805  
   806  func (n *memdbNode) setRed() {
   807  	n.flags |= nodeColorBit
   808  }
   809  
   810  func (n *memdbNode) setBlack() {
   811  	n.flags &= ^nodeColorBit
   812  }
   813  
   814  func (n *memdbNode) getKey() []byte {
   815  	var ret []byte
   816  	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&ret))
   817  	hdr.Data = uintptr(unsafe.Pointer(&n.flags)) + kv.FlagBytes
   818  	hdr.Len = int(n.klen)
   819  	hdr.Cap = int(n.klen)
   820  	return ret
   821  }
   822  
   823  const (
   824  	// bit 1 => red, bit 0 => black
   825  	nodeColorBit  uint16 = 0x8000
   826  	nodeFlagsMask        = ^nodeColorBit
   827  )
   828  
   829  func (n *memdbNode) getKeyFlags() kv.KeyFlags {
   830  	return kv.KeyFlags(n.flags & nodeFlagsMask)
   831  }
   832  
   833  func (n *memdbNode) setKeyFlags(f kv.KeyFlags) {
   834  	n.flags = (^nodeFlagsMask & n.flags) | uint16(f)
   835  }