github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/arenaskl/skl.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package arenaskl
    16  
    17  import (
    18  	"encoding/binary"
    19  	"math"
    20  	"runtime"
    21  	"sync/atomic"
    22  	"unsafe"
    23  
    24  	"github.com/zuoyebang/bitalosdb/internal/base"
    25  	"github.com/zuoyebang/bitalosdb/internal/fastrand"
    26  
    27  	"github.com/cockroachdb/errors"
    28  )
    29  
    30  const (
    31  	maxHeight   = 20
    32  	maxNodeSize = int(unsafe.Sizeof(node{}))
    33  	linksSize   = int(unsafe.Sizeof(links{}))
    34  	pValue      = 1 / math.E
    35  )
    36  
    37  var ErrRecordExists = errors.New("record with this key already exists")
    38  
    39  type Skiplist struct {
    40  	arena   *Arena
    41  	cmp     base.Compare
    42  	head    *node
    43  	tail    *node
    44  	height  uint32
    45  	testing bool
    46  }
    47  
    48  type Inserter struct {
    49  	spl    [maxHeight]splice
    50  	height uint32
    51  }
    52  
    53  func (ins *Inserter) Add(list *Skiplist, key base.InternalKey, value []byte) error {
    54  	return list.addInternal(key, value, ins)
    55  }
    56  
    57  var (
    58  	probabilities [maxHeight]uint32
    59  )
    60  
    61  func init() {
    62  	p := 1.0
    63  	for i := 0; i < maxHeight; i++ {
    64  		probabilities[i] = uint32(float64(math.MaxUint32) * p)
    65  		p *= pValue
    66  	}
    67  }
    68  
    69  func NewSkiplist(arena *Arena, cmp base.Compare) *Skiplist {
    70  	skl := &Skiplist{}
    71  	skl.Reset(arena, cmp)
    72  	return skl
    73  }
    74  
    75  func (s *Skiplist) Reset(arena *Arena, cmp base.Compare) {
    76  	head, err := newRawNode(arena, maxHeight, 0, 0)
    77  	if err != nil {
    78  		panic("arenaSize is not large enough to hold the head node")
    79  	}
    80  	head.keyOffset = 0
    81  	head.skipToFirst = 0
    82  	head.skipToLast = 0
    83  
    84  	tail, err := newRawNode(arena, maxHeight, 0, 0)
    85  	if err != nil {
    86  		panic("arenaSize is not large enough to hold the tail node")
    87  	}
    88  	tail.keyOffset = 0
    89  	tail.skipToFirst = 0
    90  	tail.skipToLast = 0
    91  
    92  	headOffset := arena.getPointerOffset(unsafe.Pointer(head))
    93  	tailOffset := arena.getPointerOffset(unsafe.Pointer(tail))
    94  	for i := 0; i < maxHeight; i++ {
    95  		head.tower[i].nextOffset = tailOffset
    96  		tail.tower[i].prevOffset = headOffset
    97  	}
    98  
    99  	*s = Skiplist{
   100  		arena:  arena,
   101  		cmp:    cmp,
   102  		head:   head,
   103  		tail:   tail,
   104  		height: 1,
   105  	}
   106  }
   107  
   108  func (s *Skiplist) Height() uint32 { return atomic.LoadUint32(&s.height) }
   109  
   110  func (s *Skiplist) Arena() *Arena { return s.arena }
   111  
   112  func (s *Skiplist) Size() uint32 { return s.arena.Size() }
   113  
   114  func (s *Skiplist) Add(key base.InternalKey, value []byte) error {
   115  	var ins Inserter
   116  	return s.addInternal(key, value, &ins)
   117  }
   118  
   119  func (s *Skiplist) addInternal(key base.InternalKey, value []byte, ins *Inserter) error {
   120  	if s.findSplice(key, ins) {
   121  		return ErrRecordExists
   122  	}
   123  
   124  	if s.testing {
   125  		runtime.Gosched()
   126  	}
   127  
   128  	nd, height, err := s.newNode(key, value)
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	ndOffset := s.arena.getPointerOffset(unsafe.Pointer(nd))
   134  
   135  	var found bool
   136  	var invalidateSplice bool
   137  	for i := 0; i < int(height); i++ {
   138  		prev := ins.spl[i].prev
   139  		next := ins.spl[i].next
   140  
   141  		if prev == nil {
   142  			if next != nil {
   143  				panic("next is expected to be nil, since prev is nil")
   144  			}
   145  
   146  			prev = s.head
   147  			next = s.tail
   148  		}
   149  
   150  		for {
   151  			prevOffset := s.arena.getPointerOffset(unsafe.Pointer(prev))
   152  			nextOffset := s.arena.getPointerOffset(unsafe.Pointer(next))
   153  			nd.tower[i].init(prevOffset, nextOffset)
   154  
   155  			nextPrevOffset := next.prevOffset(i)
   156  			if nextPrevOffset != prevOffset {
   157  				prevNextOffset := prev.nextOffset(i)
   158  				if prevNextOffset == nextOffset {
   159  					next.casPrevOffset(i, nextPrevOffset, prevOffset)
   160  				}
   161  			}
   162  
   163  			if prev.casNextOffset(i, nextOffset, ndOffset) {
   164  				if s.testing {
   165  					runtime.Gosched()
   166  				}
   167  
   168  				next.casPrevOffset(i, prevOffset, ndOffset)
   169  				break
   170  			}
   171  
   172  			prev, next, found = s.findSpliceForLevel(key, i, prev)
   173  			if found {
   174  				if i != 0 {
   175  					panic("how can another thread have inserted a node at a non-base level?")
   176  				}
   177  
   178  				return ErrRecordExists
   179  			}
   180  			invalidateSplice = true
   181  		}
   182  	}
   183  
   184  	s.setNodeSkipOffset(nd, ndOffset, key)
   185  
   186  	if invalidateSplice {
   187  		ins.height = 0
   188  	} else {
   189  		for i := uint32(0); i < height; i++ {
   190  			ins.spl[i].prev = nd
   191  		}
   192  	}
   193  
   194  	return nil
   195  }
   196  
   197  func (s *Skiplist) setNodeSkipOffset(nd *node, ndOffset uint32, key base.InternalKey) {
   198  	nextNd := s.getNext(nd, 0)
   199  	if nextNd == s.tail {
   200  		return
   201  	}
   202  
   203  	offset, size := nextNd.keyOffset, nextNd.keySize
   204  	nextKey := s.arena.buf[offset : offset+size]
   205  	n := int32(size) - 8
   206  	if n < 0 || s.cmp(key.UserKey, nextKey[:n]) != 0 || key.Trailer <= binary.LittleEndian.Uint64(nextKey[n:]) {
   207  		return
   208  	}
   209  
   210  	skipToFirstOffset := nextNd.skipToFirstOffset()
   211  	if skipToFirstOffset > 0 {
   212  		nd.setSkipToFirstOffset(skipToFirstOffset)
   213  
   214  		skipToFirstNd := (*node)(s.arena.getPointer(skipToFirstOffset))
   215  		if skipToFirstNd == s.tail {
   216  			return
   217  		}
   218  
   219  		skipToFirstNd.setSkipToLastOffset(ndOffset)
   220  	} else {
   221  		nextNdOffset := s.arena.getPointerOffset(unsafe.Pointer(nextNd))
   222  		nd.setSkipToFirstOffset(nextNdOffset)
   223  	}
   224  }
   225  
   226  func (s *Skiplist) Get(key []byte) ([]byte, bool, base.InternalKeyKind) {
   227  	var nd *node
   228  	_, nd, _ = s.seekForBaseSplice(key)
   229  	if nd == s.tail {
   230  		return nil, false, base.InternalKeyKindInvalid
   231  	}
   232  
   233  	b := s.arena.getBytes(nd.keyOffset, nd.keySize)
   234  	l := len(b) - 8
   235  	if l < 0 || s.cmp(key, b[:l:l]) != 0 {
   236  		return nil, false, base.InternalKeyKindInvalid
   237  	}
   238  
   239  	kind := base.InternalKeyKind(binary.LittleEndian.Uint64(b[l:]) & 0xff)
   240  	switch kind {
   241  	case base.InternalKeyKindSet:
   242  		value := s.arena.getBytes(nd.keyOffset+nd.keySize, nd.valueSize)
   243  		return value, true, kind
   244  	case base.InternalKeyKindDelete, base.InternalKeyKindPrefixDelete:
   245  		return nil, true, kind
   246  	default:
   247  		return nil, false, base.InternalKeyKindInvalid
   248  	}
   249  }
   250  
   251  func (s *Skiplist) seekForBaseSplice(key []byte) (prev, next *node, found bool) {
   252  	ikey := base.MakeSearchKey(key)
   253  	level := int(s.Height() - 1)
   254  
   255  	prev = s.head
   256  	for {
   257  		prev, next, found = s.findSpliceForLevel(ikey, level, prev)
   258  
   259  		if found {
   260  			if level != 0 {
   261  				prev = s.getPrev(next, 0)
   262  			}
   263  			break
   264  		}
   265  
   266  		if level == 0 {
   267  			break
   268  		}
   269  
   270  		level--
   271  	}
   272  
   273  	return
   274  }
   275  
   276  func (s *Skiplist) NewIter(lower, upper []byte) *Iterator {
   277  	it := iterPool.Get().(*Iterator)
   278  	*it = Iterator{list: s, nd: s.head, lower: lower, upper: upper}
   279  	return it
   280  }
   281  
   282  func (s *Skiplist) NewFlushIter(bytesFlushed *uint64) base.InternalIterator {
   283  	return &flushIterator{
   284  		Iterator:      Iterator{list: s, nd: s.head},
   285  		bytesIterated: bytesFlushed,
   286  	}
   287  }
   288  
   289  func (s *Skiplist) newNode(
   290  	key base.InternalKey, value []byte,
   291  ) (nd *node, height uint32, err error) {
   292  	height = s.randomHeight()
   293  	nd, err = newNode(s.arena, height, key, value)
   294  	if err != nil {
   295  		return
   296  	}
   297  
   298  	listHeight := s.Height()
   299  	for height > listHeight {
   300  		if atomic.CompareAndSwapUint32(&s.height, listHeight, height) {
   301  			break
   302  		}
   303  
   304  		listHeight = s.Height()
   305  	}
   306  
   307  	return
   308  }
   309  
   310  func (s *Skiplist) randomHeight() uint32 {
   311  	rnd := fastrand.Uint32()
   312  
   313  	h := uint32(1)
   314  	for h < maxHeight && rnd <= probabilities[h] {
   315  		h++
   316  	}
   317  
   318  	return h
   319  }
   320  
   321  func (s *Skiplist) findSplice(key base.InternalKey, ins *Inserter) (found bool) {
   322  	listHeight := s.Height()
   323  	var level int
   324  
   325  	prev := s.head
   326  	if ins.height < listHeight {
   327  		ins.height = listHeight
   328  		level = int(ins.height)
   329  	} else {
   330  		for ; level < int(listHeight); level++ {
   331  			spl := &ins.spl[level]
   332  			if s.getNext(spl.prev, level) != spl.next {
   333  				continue
   334  			}
   335  			if spl.prev != s.head && !s.keyIsAfterNode(spl.prev, key) {
   336  				level = int(listHeight)
   337  				break
   338  			}
   339  			if spl.next != s.tail && s.keyIsAfterNode(spl.next, key) {
   340  				level = int(listHeight)
   341  				break
   342  			}
   343  
   344  			prev = spl.prev
   345  			break
   346  		}
   347  	}
   348  
   349  	for level = level - 1; level >= 0; level-- {
   350  		var next *node
   351  		prev, next, found = s.findSpliceForLevel(key, level, prev)
   352  		if next == nil {
   353  			next = s.tail
   354  		}
   355  		ins.spl[level].init(prev, next)
   356  	}
   357  
   358  	return
   359  }
   360  
   361  func (s *Skiplist) findSpliceForLevel(
   362  	key base.InternalKey, level int, start *node,
   363  ) (prev, next *node, found bool) {
   364  	prev = start
   365  
   366  	for {
   367  		next = s.getNext(prev, level)
   368  		if next == s.tail {
   369  			break
   370  		}
   371  
   372  		offset, size := next.keyOffset, next.keySize
   373  		nextKey := s.arena.buf[offset : offset+size]
   374  		n := int32(size) - 8
   375  		cmp := s.cmp(key.UserKey, nextKey[:n])
   376  		if cmp < 0 {
   377  			break
   378  		}
   379  		if cmp == 0 {
   380  			var nextTrailer uint64
   381  			if n >= 0 {
   382  				nextTrailer = binary.LittleEndian.Uint64(nextKey[n:])
   383  			} else {
   384  				nextTrailer = uint64(base.InternalKeyKindInvalid)
   385  			}
   386  			if key.Trailer == nextTrailer {
   387  				found = true
   388  				break
   389  			}
   390  			if key.Trailer > nextTrailer {
   391  				break
   392  			}
   393  		}
   394  
   395  		prev = next
   396  	}
   397  
   398  	return
   399  }
   400  
   401  func (s *Skiplist) keyIsAfterNode(nd *node, key base.InternalKey) bool {
   402  	ndKey := s.arena.buf[nd.keyOffset : nd.keyOffset+nd.keySize]
   403  	n := int32(nd.keySize) - 8
   404  	cmp := s.cmp(ndKey[:n], key.UserKey)
   405  	if cmp < 0 {
   406  		return true
   407  	}
   408  	if cmp > 0 {
   409  		return false
   410  	}
   411  	var ndTrailer uint64
   412  	if n >= 0 {
   413  		ndTrailer = binary.LittleEndian.Uint64(ndKey[n:])
   414  	} else {
   415  		ndTrailer = uint64(base.InternalKeyKindInvalid)
   416  	}
   417  	if key.Trailer == ndTrailer {
   418  		return false
   419  	}
   420  	return key.Trailer < ndTrailer
   421  }
   422  
   423  func (s *Skiplist) getNext(nd *node, h int) *node {
   424  	offset := atomic.LoadUint32(&nd.tower[h].nextOffset)
   425  	return (*node)(s.arena.getPointer(offset))
   426  }
   427  
   428  func (s *Skiplist) getPrev(nd *node, h int) *node {
   429  	offset := atomic.LoadUint32(&nd.tower[h].prevOffset)
   430  	return (*node)(s.arena.getPointer(offset))
   431  }
   432  
   433  func (s *Skiplist) getSkipNext(nd *node) *node {
   434  	var nextNd *node
   435  	skipToFirstOffset := nd.skipToFirstOffset()
   436  	if skipToFirstOffset > 0 {
   437  		nextNd = (*node)(s.arena.getPointer(skipToFirstOffset))
   438  	} else {
   439  		offset := atomic.LoadUint32(&nd.tower[0].nextOffset)
   440  		nextNd = (*node)(s.arena.getPointer(offset))
   441  	}
   442  	return nextNd
   443  }
   444  
   445  func (s *Skiplist) getSkipPrev(nd *node) *node {
   446  	var prevNd *node
   447  	skipToLastOffset := nd.skipToLastOffset()
   448  	if skipToLastOffset > 0 {
   449  		prevNd = (*node)(s.arena.getPointer(skipToLastOffset))
   450  	} else {
   451  		offset := atomic.LoadUint32(&nd.tower[0].prevOffset)
   452  		prevNd = (*node)(s.arena.getPointer(offset))
   453  	}
   454  	return prevNd
   455  }