github.com/andy-kimball/arenaskl@v0.0.0-20200617143215-f701008588b9/skl.go (about)

     1  /*
     2   * Copyright 2017 Dgraph Labs, Inc. and Contributors
     3   * Modifications copyright (C) 2017 Andy Kimball and Contributors
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  /*
    19  Adapted from RocksDB inline skiplist.
    20  
    21  Key differences:
    22  - No optimization for sequential inserts (no "prev").
    23  - No custom comparator.
    24  - Support overwrites. This requires care when we see the same key when inserting.
    25    For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so
    26  	there is no need for values. We don't intend to support versioning. In-place updates of values
    27  	would be more efficient.
    28  - We discard all non-concurrent code.
    29  - We do not support Splices. This simplifies the code a lot.
    30  - No AllocateNode or other pointer arithmetic.
    31  - We combine the findLessThan, findGreaterOrEqual, etc into one function.
    32  */
    33  
    34  /*
    35  Further adapted from Badger: https://github.com/dgraph-io/badger.
    36  
    37  Key differences:
    38  - Support for previous pointers - doubly linked lists. Note that it's up to higher
    39    level code to deal with the intermediate state that occurs during insertion,
    40    where node A is linked to node B, but node B is not yet linked back to node A.
    41  - Iterator includes mutator functions.
    42  */
    43  
    44  package arenaskl
    45  
    46  import (
    47  	"bytes"
    48  	"errors"
    49  	"math"
    50  	"sync/atomic"
    51  	"unsafe"
    52  
    53  	"github.com/andy-kimball/arenaskl/internal/fastrand"
    54  )
    55  
    56  const (
    57  	maxHeight  = 20
    58  	pValue     = 1 / math.E
    59  	linksSize  = int(unsafe.Sizeof(links{}))
    60  	deletedVal = 0
    61  )
    62  
    63  const MaxNodeSize = int(unsafe.Sizeof(node{}))
    64  
    65  var ErrRecordExists = errors.New("record with this key already exists")
    66  var ErrRecordUpdated = errors.New("record was updated by another caller")
    67  var ErrRecordDeleted = errors.New("record was deleted by another caller")
    68  
    69  type Skiplist struct {
    70  	arena  *Arena
    71  	head   *node
    72  	tail   *node
    73  	height uint32 // Current height. 1 <= height <= maxHeight. CAS.
    74  
    75  	// If set to true by tests, then extra delays are added to make it easier to
    76  	// detect unusual race conditions.
    77  	testing bool
    78  }
    79  
    80  var (
    81  	probabilities [maxHeight]uint32
    82  )
    83  
    84  func init() {
    85  	// Precompute the skiplist probabilities so that only a single random number
    86  	// needs to be generated and so that the optimal pvalue can be used (inverse
    87  	// of Euler's number).
    88  	p := float64(1.0)
    89  	for i := 0; i < maxHeight; i++ {
    90  		probabilities[i] = uint32(float64(math.MaxUint32) * p)
    91  		p *= pValue
    92  	}
    93  }
    94  
    95  // NewSkiplist constructs and initializes a new, empty skiplist. All nodes, keys,
    96  // and values in the skiplist will be allocated from the given arena.
    97  func NewSkiplist(arena *Arena) *Skiplist {
    98  	// Allocate head and tail nodes.
    99  	head, err := newNode(arena, maxHeight)
   100  	if err != nil {
   101  		panic("arenaSize is not large enough to hold the head node")
   102  	}
   103  
   104  	tail, err := newNode(arena, maxHeight)
   105  	if err != nil {
   106  		panic("arenaSize is not large enough to hold the tail node")
   107  	}
   108  
   109  	// Link all head/tail levels together.
   110  	headOffset := arena.GetPointerOffset(unsafe.Pointer(head))
   111  	tailOffset := arena.GetPointerOffset(unsafe.Pointer(tail))
   112  	for i := 0; i < maxHeight; i++ {
   113  		head.tower[i].nextOffset = tailOffset
   114  		tail.tower[i].prevOffset = headOffset
   115  	}
   116  
   117  	skl := &Skiplist{
   118  		arena:  arena,
   119  		head:   head,
   120  		tail:   tail,
   121  		height: 1,
   122  	}
   123  
   124  	return skl
   125  }
   126  
   127  // Height returns the height of the highest tower within any of the nodes that
   128  // have ever been allocated as part of this skiplist.
   129  func (s *Skiplist) Height() uint32 { return atomic.LoadUint32(&s.height) }
   130  
   131  // Arena returns the arena backing this skiplist.
   132  func (s *Skiplist) Arena() *Arena { return s.arena }
   133  
   134  // Size returns the number of bytes that have allocated from the arena.
   135  func (s *Skiplist) Size() uint32 { return s.arena.Size() }
   136  
   137  func (s *Skiplist) newNode(key, val []byte, meta uint16) (nd *node, height uint32, err error) {
   138  	height = s.randomHeight()
   139  	nd, err = newNode(s.arena, height)
   140  	if err != nil {
   141  		return
   142  	}
   143  
   144  	// Try to increase s.height via CAS.
   145  	listHeight := s.Height()
   146  	for height > listHeight {
   147  		if atomic.CompareAndSwapUint32(&s.height, listHeight, height) {
   148  			// Successfully increased skiplist.height.
   149  			break
   150  		}
   151  
   152  		listHeight = s.Height()
   153  	}
   154  
   155  	// Allocate node's key and value.
   156  	nd.keyOffset, nd.keySize, err = s.allocKey(key)
   157  	if err != nil {
   158  		return
   159  	}
   160  
   161  	nd.value, err = s.allocVal(val, meta)
   162  	return
   163  }
   164  
   165  func (s *Skiplist) randomHeight() uint32 {
   166  	rnd := fastrand.Uint32()
   167  	h := uint32(1)
   168  	for h < maxHeight && rnd <= probabilities[h] {
   169  		h++
   170  	}
   171  
   172  	return h
   173  }
   174  
   175  func (s *Skiplist) allocKey(key []byte) (keyOffset uint32, keySize uint32, err error) {
   176  	keySize = uint32(len(key))
   177  	if keySize > math.MaxUint32 {
   178  		panic("key is too large")
   179  	}
   180  
   181  	keyOffset, err = s.arena.Alloc(keySize, 0 /* overflow */, Align1)
   182  	if err == nil {
   183  		copy(s.arena.GetBytes(keyOffset, keySize), key)
   184  	}
   185  
   186  	return
   187  }
   188  
   189  func (s *Skiplist) allocVal(val []byte, meta uint16) (uint64, error) {
   190  	if len(val) > math.MaxUint16 {
   191  		panic("value is too large")
   192  	}
   193  
   194  	valSize := uint16(len(val))
   195  	valOffset, err := s.arena.Alloc(uint32(valSize), 0 /* overflow */, Align1)
   196  	if err != nil {
   197  		return 0, err
   198  	}
   199  
   200  	copy(s.arena.GetBytes(valOffset, uint32(valSize)), val)
   201  	return encodeValue(valOffset, valSize, meta), nil
   202  }
   203  
   204  func (s *Skiplist) findSpliceForLevel(key []byte, level int, start *node) (prev, next *node, found bool) {
   205  	prev = start
   206  
   207  	for {
   208  		// Assume prev.key < key.
   209  		next = s.getNext(prev, level)
   210  		nextKey := next.getKey(s.arena)
   211  		if nextKey == nil {
   212  			// Tail node key, so done.
   213  			break
   214  		}
   215  
   216  		cmp := bytes.Compare(key, nextKey)
   217  		if cmp == 0 {
   218  			// Equality case.
   219  			found = true
   220  			break
   221  		}
   222  
   223  		if cmp < 0 {
   224  			// We are done for this level, since prev.key < key < next.key.
   225  			break
   226  		}
   227  
   228  		// Keep moving right on this level.
   229  		prev = next
   230  	}
   231  
   232  	return
   233  }
   234  
   235  func (s *Skiplist) getNext(nd *node, h int) *node {
   236  	offset := atomic.LoadUint32(&nd.tower[h].nextOffset)
   237  	return (*node)(s.arena.GetPointer(offset))
   238  }
   239  
   240  func (s *Skiplist) getPrev(nd *node, h int) *node {
   241  	offset := atomic.LoadUint32(&nd.tower[h].prevOffset)
   242  	return (*node)(s.arena.GetPointer(offset))
   243  }
   244  
   245  func encodeValue(valOffset uint32, valSize, meta uint16) uint64 {
   246  	return uint64(meta)<<48 | uint64(valSize)<<32 | uint64(valOffset)
   247  }
   248  
   249  func decodeValue(value uint64) (valOffset uint32, valSize uint16) {
   250  	valOffset = uint32(value)
   251  	valSize = uint16(value >> 32)
   252  	return
   253  }
   254  
   255  func decodeMeta(value uint64) uint16 {
   256  	return uint16(value >> 48)
   257  }