github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/skl.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"math"
    21  	"runtime"
    22  	"sync"
    23  	"sync/atomic"
    24  	"unsafe"
    25  
    26  	"github.com/cockroachdb/errors"
    27  	"github.com/zuoyebang/bitalosdb/internal/base"
    28  	"github.com/zuoyebang/bitalosdb/internal/fastrand"
    29  	"github.com/zuoyebang/bitalosdb/internal/hash"
    30  )
    31  
    32  const (
    33  	sklVersion1 uint16 = 1
    34  )
    35  
    36  const (
    37  	maxHeight   = 20
    38  	maxNodeSize = int(unsafe.Sizeof(node{}))
    39  	linksSize   = int(unsafe.Sizeof(links{}))
    40  	pValue      = 1 / math.E
    41  	indexSize   = 1 << 20
    42  )
    43  
    44  const (
    45  	sklHeaderLength        = 4
    46  	sklHeaderOffset        = tableDataOffset
    47  	sklHeaderVersionOffset = sklHeaderOffset
    48  	sklHeaderHeightOffset  = sklHeaderVersionOffset + 2
    49  	sklHeadNodeOffset      = 8
    50  	sklTailNodeOffset      = 196
    51  )
    52  
    53  var ErrRecordExists = errors.New("record with this key already exists")
    54  
    55  type skl struct {
    56  	st          *sklTable
    57  	tbl         *table
    58  	cmp         base.Compare
    59  	head        *node
    60  	tail        *node
    61  	version     uint16
    62  	height      uint32
    63  	useMapIndex bool
    64  	testing     bool
    65  	cache       struct {
    66  		sync.RWMutex
    67  		index map[uint32]uint32
    68  	}
    69  }
    70  
    71  type Inserter struct {
    72  	spl    [maxHeight]splice
    73  	height uint32
    74  }
    75  
    76  func (ins *Inserter) Add(list *skl, key internalKey, value []byte) error {
    77  	return list.addInternal(key, value, ins)
    78  }
    79  
    80  var (
    81  	probabilities [maxHeight]uint32
    82  )
    83  
    84  func init() {
    85  	p := float64(1.0)
    86  	for i := 0; i < maxHeight; i++ {
    87  		probabilities[i] = uint32(float64(math.MaxUint32) * p)
    88  		p *= pValue
    89  	}
    90  }
    91  
    92  func newSkl(tbl *table, st *sklTable, useMapIndex bool) (*skl, error) {
    93  	headerOffset, err := tbl.alloc(sklHeaderLength)
    94  	if err != nil || headerOffset != uint32(sklHeaderOffset) {
    95  		return nil, ErrTableSize
    96  	}
    97  
    98  	head, err := newRawNode(tbl, maxHeight, 0, 0)
    99  	if err != nil {
   100  		return nil, errors.New("tblSize is not large enough to hold the head node")
   101  	}
   102  
   103  	tail, err := newRawNode(tbl, maxHeight, 0, 0)
   104  	if err != nil {
   105  		return nil, errors.New("tblSize is not large enough to hold the tail node")
   106  	}
   107  
   108  	head.keyOffset = 0
   109  	tail.keyOffset = 0
   110  
   111  	headOffset := tbl.getPointerOffset(unsafe.Pointer(head))
   112  	tailOffset := tbl.getPointerOffset(unsafe.Pointer(tail))
   113  	for i := 0; i < maxHeight; i++ {
   114  		head.tower[i].nextOffset = tailOffset
   115  		tail.tower[i].prevOffset = headOffset
   116  	}
   117  
   118  	sl := &skl{
   119  		st:          st,
   120  		tbl:         tbl,
   121  		cmp:         bytes.Compare,
   122  		head:        head,
   123  		tail:        tail,
   124  		height:      1,
   125  		useMapIndex: useMapIndex,
   126  	}
   127  
   128  	sl.setHeader()
   129  
   130  	if useMapIndex {
   131  		sl.cache.index = make(map[uint32]uint32, indexSize)
   132  	}
   133  
   134  	return sl, nil
   135  }
   136  
   137  func openSkl(tbl *table, st *sklTable, useMapIndex bool) *skl {
   138  	sl := &skl{
   139  		st:          st,
   140  		tbl:         tbl,
   141  		cmp:         bytes.Compare,
   142  		head:        (*node)(tbl.getPointer(sklHeadNodeOffset)),
   143  		tail:        (*node)(tbl.getPointer(sklTailNodeOffset)),
   144  		useMapIndex: useMapIndex,
   145  	}
   146  
   147  	sl.getHeader()
   148  
   149  	if useMapIndex {
   150  		sl.cache.index = make(map[uint32]uint32, indexSize)
   151  	}
   152  
   153  	return sl
   154  }
   155  
   156  func (s *skl) getHeader() {
   157  	s.version = s.tbl.readAtUInt16(sklHeaderVersionOffset)
   158  	s.height = s.getHeight()
   159  }
   160  
   161  func (s *skl) setHeader() {
   162  	s.tbl.writeAtUInt16(sklVersion1, sklHeaderVersionOffset)
   163  	s.setHeight()
   164  }
   165  
   166  func (s *skl) getHeight() uint32 {
   167  	return uint32(s.tbl.readAtUInt16(sklHeaderHeightOffset))
   168  }
   169  
   170  func (s *skl) setHeight() {
   171  	s.tbl.writeAtUInt16(uint16(s.Height()), sklHeaderHeightOffset)
   172  }
   173  
   174  func (s *skl) Height() uint32 { return atomic.LoadUint32(&s.height) }
   175  
   176  func (s *skl) Table() *table { return s.tbl }
   177  
   178  func (s *skl) Size() uint32 { return s.tbl.Size() }
   179  
   180  func (s *skl) Get(key []byte, khash uint32) ([]byte, bool, internalKeyKind) {
   181  	var nd *node
   182  	var kind internalKeyKind
   183  	var beFound bool
   184  
   185  	if s.useMapIndex && s.cache.index != nil {
   186  		s.cache.RLock()
   187  		if ndOffset, ok := s.cache.index[khash]; ok {
   188  			nd = (*node)(s.tbl.getPointer(ndOffset))
   189  			if nd != s.tail {
   190  				beFound, kind = s.compareKey(key, nd)
   191  			}
   192  		}
   193  		s.cache.RUnlock()
   194  	}
   195  
   196  	if !beFound {
   197  		_, nd, _ = s.seekForBaseSplice(key)
   198  		if nd == s.tail {
   199  			return nil, false, internalKeyKindInvalid
   200  		}
   201  
   202  		var exist bool = false
   203  		exist, kind = s.compareKey(key, nd)
   204  		if !exist {
   205  			return nil, false, internalKeyKindInvalid
   206  		}
   207  	}
   208  
   209  	if s.useMapIndex && !beFound && khash > 0 {
   210  		s.cache.Lock()
   211  		s.cache.index[khash] = s.tbl.getPointerOffset(unsafe.Pointer(nd))
   212  		s.cache.Unlock()
   213  	}
   214  
   215  	if kind == internalKeyKindSet {
   216  		value := s.tbl.getBytes(nd.keyOffset+nd.keySize, nd.valueSize)
   217  		return value, true, kind
   218  	} else if kind == internalKeyKindDelete {
   219  		return nil, true, kind
   220  	}
   221  
   222  	return nil, false, internalKeyKindInvalid
   223  }
   224  
   225  func (s *skl) Add(key internalKey, value []byte) error {
   226  	var ins Inserter
   227  	return s.addInternal(key, value, &ins)
   228  }
   229  
   230  func (s *skl) addInternal(key internalKey, value []byte, ins *Inserter) error {
   231  	if s.findSplice(key, ins) {
   232  		return ErrRecordExists
   233  	}
   234  
   235  	if s.testing {
   236  		runtime.Gosched()
   237  	}
   238  
   239  	nd, height, err := s.newNode(key, value)
   240  	if err != nil {
   241  		return err
   242  	}
   243  
   244  	ndOffset := s.tbl.getPointerOffset(unsafe.Pointer(nd))
   245  
   246  	var found bool
   247  	var invalidateSplice bool
   248  	for i := 0; i < int(height); i++ {
   249  		prev := ins.spl[i].prev
   250  		next := ins.spl[i].next
   251  
   252  		if prev == nil {
   253  			if next != nil {
   254  				return errors.New("bitpage: skl next is expected to be nil, since prev is nil")
   255  			}
   256  
   257  			prev = s.head
   258  			next = s.tail
   259  		}
   260  
   261  		for {
   262  			prevOffset := s.tbl.getPointerOffset(unsafe.Pointer(prev))
   263  			nextOffset := s.tbl.getPointerOffset(unsafe.Pointer(next))
   264  			nd.tower[i].init(prevOffset, nextOffset)
   265  
   266  			nextPrevOffset := next.prevOffset(i)
   267  			if nextPrevOffset != prevOffset {
   268  				prevNextOffset := prev.nextOffset(i)
   269  				if prevNextOffset == nextOffset {
   270  					next.casPrevOffset(i, nextPrevOffset, prevOffset)
   271  				}
   272  			}
   273  
   274  			if prev.casNextOffset(i, nextOffset, ndOffset) {
   275  				if s.testing {
   276  					runtime.Gosched()
   277  				}
   278  
   279  				next.casPrevOffset(i, prevOffset, ndOffset)
   280  				break
   281  			}
   282  
   283  			prev, next, found = s.findSpliceForLevel(key, i, prev)
   284  			if found {
   285  				if i != 0 {
   286  					panic("how can another thread have inserted a node at a non-base level?")
   287  				}
   288  
   289  				return ErrRecordExists
   290  			}
   291  			invalidateSplice = true
   292  		}
   293  	}
   294  
   295  	s.setNodeSkipOffset(nd, ndOffset, key)
   296  
   297  	if invalidateSplice {
   298  		ins.height = 0
   299  	} else {
   300  		for i := uint32(0); i < height; i++ {
   301  			ins.spl[i].prev = nd
   302  		}
   303  	}
   304  
   305  	if s.useMapIndex && s.cache.index != nil {
   306  		khash := hash.Crc32(key.UserKey)
   307  		s.cache.Lock()
   308  		s.cache.index[khash] = ndOffset
   309  		s.cache.Unlock()
   310  	}
   311  
   312  	return nil
   313  }
   314  
   315  func (s *skl) setNodeSkipOffset(nd *node, ndOffset uint32, key internalKey) {
   316  	nextNd := s.getNext(nd, 0)
   317  	if nextNd == s.tail {
   318  		return
   319  	}
   320  
   321  	offset, size := nextNd.keyOffset, nextNd.keySize
   322  	nextKey := s.tbl.getBytes(offset, size)
   323  	n := int32(size) - 8
   324  	if n < 0 || s.cmp(key.UserKey, nextKey[:n]) != 0 {
   325  		return
   326  	}
   327  	if key.Trailer <= binary.LittleEndian.Uint64(nextKey[n:]) {
   328  		return
   329  	}
   330  
   331  	if s.st != nil && s.st.bp != nil {
   332  		s.st.bp.deleteBithashKey(nextNd.getValue(s.tbl))
   333  	}
   334  
   335  	skipToFirstOffset := nextNd.skipToFirstOffset()
   336  	if skipToFirstOffset > 0 {
   337  		nd.setSkipToFirstOffset(skipToFirstOffset)
   338  
   339  		skipToFirstNd := (*node)(s.tbl.getPointer(skipToFirstOffset))
   340  		if skipToFirstNd == s.tail {
   341  			return
   342  		}
   343  
   344  		skipToFirstNd.setSkipToLastOffset(ndOffset)
   345  	} else {
   346  		nextNdOffset := s.tbl.getPointerOffset(unsafe.Pointer(nextNd))
   347  		nd.setSkipToFirstOffset(nextNdOffset)
   348  	}
   349  }
   350  
   351  func (s *skl) NewIter(lower, upper []byte) *sklIterator {
   352  	iter := &sklIterator{
   353  		list: s,
   354  		nd:   s.head,
   355  	}
   356  	return iter
   357  }
   358  
   359  func (s *skl) NewFlushIter() internalIterator {
   360  	return s.NewIter(nil, nil)
   361  }
   362  
   363  func (s *skl) newNode(key internalKey, value []byte) (nd *node, height uint32, err error) {
   364  	height = s.randomHeight()
   365  	nd, err = newNode(s.tbl, height, key, value)
   366  	if err != nil {
   367  		return
   368  	}
   369  
   370  	listHeight := s.Height()
   371  	for height > listHeight {
   372  		if atomic.CompareAndSwapUint32(&s.height, listHeight, height) {
   373  			s.setHeight()
   374  			break
   375  		}
   376  
   377  		listHeight = s.Height()
   378  	}
   379  
   380  	return
   381  }
   382  
   383  func (s *skl) randomHeight() uint32 {
   384  	rnd := fastrand.Uint32()
   385  
   386  	h := uint32(1)
   387  	for h < maxHeight && rnd <= probabilities[h] {
   388  		h++
   389  	}
   390  
   391  	return h
   392  }
   393  
   394  func (s *skl) isEmpty() bool {
   395  	return s.getNext(s.head, 0) == s.tail
   396  }
   397  
   398  func (s *skl) findSplice(key internalKey, ins *Inserter) (found bool) {
   399  	listHeight := s.Height()
   400  	var level int
   401  
   402  	prev := s.head
   403  	if ins.height < listHeight {
   404  		ins.height = listHeight
   405  		level = int(ins.height)
   406  	} else {
   407  		for ; level < int(listHeight); level++ {
   408  			spl := &ins.spl[level]
   409  			if s.getNext(spl.prev, level) != spl.next {
   410  				continue
   411  			}
   412  			if spl.prev != s.head && !s.keyIsAfterNode(spl.prev, key) {
   413  				level = int(listHeight)
   414  				break
   415  			}
   416  			if spl.next != s.tail && s.keyIsAfterNode(spl.next, key) {
   417  				level = int(listHeight)
   418  				break
   419  			}
   420  			prev = spl.prev
   421  			break
   422  		}
   423  	}
   424  
   425  	for level = level - 1; level >= 0; level-- {
   426  		var next *node
   427  		prev, next, found = s.findSpliceForLevel(key, level, prev)
   428  		if next == nil {
   429  			next = s.tail
   430  		}
   431  		ins.spl[level].init(prev, next)
   432  	}
   433  
   434  	return
   435  }
   436  
   437  func (s *skl) findSpliceForLevel(
   438  	key internalKey, level int, start *node,
   439  ) (prev, next *node, found bool) {
   440  	prev = start
   441  
   442  	for {
   443  		next = s.getNext(prev, level)
   444  		if next == s.tail {
   445  			break
   446  		}
   447  
   448  		offset, size := next.keyOffset, next.keySize
   449  		nextKey := s.tbl.getBytes(offset, size)
   450  		n := int32(size) - 8
   451  		cmp := s.cmp(key.UserKey, nextKey[:n])
   452  		if cmp < 0 {
   453  			break
   454  		}
   455  		if cmp == 0 {
   456  			var nextTrailer uint64
   457  			if n >= 0 {
   458  				nextTrailer = binary.LittleEndian.Uint64(nextKey[n:])
   459  			} else {
   460  				nextTrailer = uint64(internalKeyKindInvalid)
   461  			}
   462  			if key.Trailer == nextTrailer {
   463  				found = true
   464  				break
   465  			}
   466  			if key.Trailer > nextTrailer {
   467  				break
   468  			}
   469  		}
   470  
   471  		prev = next
   472  	}
   473  
   474  	return
   475  }
   476  
   477  func (s *skl) keyIsAfterNode(nd *node, key internalKey) bool {
   478  	ndKey := s.tbl.getBytes(nd.keyOffset, nd.keySize)
   479  	n := int32(nd.keySize) - 8
   480  	cmp := s.cmp(ndKey[:n], key.UserKey)
   481  	if cmp < 0 {
   482  		return true
   483  	}
   484  	if cmp > 0 {
   485  		return false
   486  	}
   487  	var ndTrailer uint64
   488  	if n >= 0 {
   489  		ndTrailer = binary.LittleEndian.Uint64(ndKey[n:])
   490  	} else {
   491  		ndTrailer = uint64(internalKeyKindInvalid)
   492  	}
   493  	if key.Trailer == ndTrailer {
   494  		return false
   495  	}
   496  	return key.Trailer < ndTrailer
   497  }
   498  
   499  func (s *skl) getNext(nd *node, h int) *node {
   500  	offset := atomic.LoadUint32(&nd.tower[h].nextOffset)
   501  	return (*node)(s.tbl.getPointer(offset))
   502  }
   503  
   504  func (s *skl) getPrev(nd *node, h int) *node {
   505  	offset := atomic.LoadUint32(&nd.tower[h].prevOffset)
   506  	return (*node)(s.tbl.getPointer(offset))
   507  }
   508  
   509  func (s *skl) getSkipNext(nd *node) *node {
   510  	var nextNd *node
   511  	skipToFirstOffset := nd.skipToFirstOffset()
   512  	if skipToFirstOffset > 0 {
   513  		nextNd = (*node)(s.tbl.getPointer(skipToFirstOffset))
   514  	} else {
   515  		offset := atomic.LoadUint32(&nd.tower[0].nextOffset)
   516  		nextNd = (*node)(s.tbl.getPointer(offset))
   517  	}
   518  	return nextNd
   519  }
   520  
   521  func (s *skl) getSkipPrev(nd *node) *node {
   522  	var prevNd *node
   523  	skipToLastOffset := nd.skipToLastOffset()
   524  	if skipToLastOffset > 0 {
   525  		prevNd = (*node)(s.tbl.getPointer(skipToLastOffset))
   526  	} else {
   527  		offset := atomic.LoadUint32(&nd.tower[0].prevOffset)
   528  		prevNd = (*node)(s.tbl.getPointer(offset))
   529  	}
   530  	return prevNd
   531  }
   532  
   533  func (s *skl) compareKey(key []byte, nd *node) (bool, internalKeyKind) {
   534  	b := s.tbl.getBytes(nd.keyOffset, nd.keySize)
   535  	l := len(b) - 8
   536  	if l < 0 || s.cmp(key, b[:l:l]) != 0 {
   537  		return false, internalKeyKindInvalid
   538  	}
   539  
   540  	return true, internalKeyKind(binary.LittleEndian.Uint64(b[l:]) & 0xff)
   541  }
   542  
   543  func (s *skl) seekForBaseSplice(key []byte) (prev, next *node, found bool) {
   544  	ikey := base.MakeSearchKey(key)
   545  	level := int(s.Height() - 1)
   546  
   547  	prev = s.head
   548  	for {
   549  		prev, next, found = s.findSpliceForLevel(ikey, level, prev)
   550  
   551  		if found {
   552  			if level != 0 {
   553  				prev = s.getPrev(next, 0)
   554  			}
   555  			break
   556  		}
   557  
   558  		if level == 0 {
   559  			break
   560  		}
   561  
   562  		level--
   563  	}
   564  
   565  	return
   566  }