github.com/andy-kimball/arenaskl@v0.0.0-20200617143215-f701008588b9/skl.go (about) 1 /* 2 * Copyright 2017 Dgraph Labs, Inc. and Contributors 3 * Modifications copyright (C) 2017 Andy Kimball and Contributors 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 Adapted from RocksDB inline skiplist. 20 21 Key differences: 22 - No optimization for sequential inserts (no "prev"). 23 - No custom comparator. 24 - Support overwrites. This requires care when we see the same key when inserting. 25 For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so 26 there is no need for values. We don't intend to support versioning. In-place updates of values 27 would be more efficient. 28 - We discard all non-concurrent code. 29 - We do not support Splices. This simplifies the code a lot. 30 - No AllocateNode or other pointer arithmetic. 31 - We combine the findLessThan, findGreaterOrEqual, etc into one function. 32 */ 33 34 /* 35 Further adapted from Badger: https://github.com/dgraph-io/badger. 36 37 Key differences: 38 - Support for previous pointers - doubly linked lists. Note that it's up to higher 39 level code to deal with the intermediate state that occurs during insertion, 40 where node A is linked to node B, but node B is not yet linked back to node A. 41 - Iterator includes mutator functions. 42 */ 43 44 package arenaskl 45 46 import ( 47 "bytes" 48 "errors" 49 "math" 50 "sync/atomic" 51 "unsafe" 52 53 "github.com/andy-kimball/arenaskl/internal/fastrand" 54 ) 55 56 const ( 57 maxHeight = 20 58 pValue = 1 / math.E 59 linksSize = int(unsafe.Sizeof(links{})) 60 deletedVal = 0 61 ) 62 63 const MaxNodeSize = int(unsafe.Sizeof(node{})) 64 65 var ErrRecordExists = errors.New("record with this key already exists") 66 var ErrRecordUpdated = errors.New("record was updated by another caller") 67 var ErrRecordDeleted = errors.New("record was deleted by another caller") 68 69 type Skiplist struct { 70 arena *Arena 71 head *node 72 tail *node 73 height uint32 // Current height. 1 <= height <= maxHeight. CAS. 74 75 // If set to true by tests, then extra delays are added to make it easier to 76 // detect unusual race conditions. 77 testing bool 78 } 79 80 var ( 81 probabilities [maxHeight]uint32 82 ) 83 84 func init() { 85 // Precompute the skiplist probabilities so that only a single random number 86 // needs to be generated and so that the optimal pvalue can be used (inverse 87 // of Euler's number). 88 p := float64(1.0) 89 for i := 0; i < maxHeight; i++ { 90 probabilities[i] = uint32(float64(math.MaxUint32) * p) 91 p *= pValue 92 } 93 } 94 95 // NewSkiplist constructs and initializes a new, empty skiplist. All nodes, keys, 96 // and values in the skiplist will be allocated from the given arena. 97 func NewSkiplist(arena *Arena) *Skiplist { 98 // Allocate head and tail nodes. 99 head, err := newNode(arena, maxHeight) 100 if err != nil { 101 panic("arenaSize is not large enough to hold the head node") 102 } 103 104 tail, err := newNode(arena, maxHeight) 105 if err != nil { 106 panic("arenaSize is not large enough to hold the tail node") 107 } 108 109 // Link all head/tail levels together. 110 headOffset := arena.GetPointerOffset(unsafe.Pointer(head)) 111 tailOffset := arena.GetPointerOffset(unsafe.Pointer(tail)) 112 for i := 0; i < maxHeight; i++ { 113 head.tower[i].nextOffset = tailOffset 114 tail.tower[i].prevOffset = headOffset 115 } 116 117 skl := &Skiplist{ 118 arena: arena, 119 head: head, 120 tail: tail, 121 height: 1, 122 } 123 124 return skl 125 } 126 127 // Height returns the height of the highest tower within any of the nodes that 128 // have ever been allocated as part of this skiplist. 129 func (s *Skiplist) Height() uint32 { return atomic.LoadUint32(&s.height) } 130 131 // Arena returns the arena backing this skiplist. 132 func (s *Skiplist) Arena() *Arena { return s.arena } 133 134 // Size returns the number of bytes that have allocated from the arena. 135 func (s *Skiplist) Size() uint32 { return s.arena.Size() } 136 137 func (s *Skiplist) newNode(key, val []byte, meta uint16) (nd *node, height uint32, err error) { 138 height = s.randomHeight() 139 nd, err = newNode(s.arena, height) 140 if err != nil { 141 return 142 } 143 144 // Try to increase s.height via CAS. 145 listHeight := s.Height() 146 for height > listHeight { 147 if atomic.CompareAndSwapUint32(&s.height, listHeight, height) { 148 // Successfully increased skiplist.height. 149 break 150 } 151 152 listHeight = s.Height() 153 } 154 155 // Allocate node's key and value. 156 nd.keyOffset, nd.keySize, err = s.allocKey(key) 157 if err != nil { 158 return 159 } 160 161 nd.value, err = s.allocVal(val, meta) 162 return 163 } 164 165 func (s *Skiplist) randomHeight() uint32 { 166 rnd := fastrand.Uint32() 167 h := uint32(1) 168 for h < maxHeight && rnd <= probabilities[h] { 169 h++ 170 } 171 172 return h 173 } 174 175 func (s *Skiplist) allocKey(key []byte) (keyOffset uint32, keySize uint32, err error) { 176 keySize = uint32(len(key)) 177 if keySize > math.MaxUint32 { 178 panic("key is too large") 179 } 180 181 keyOffset, err = s.arena.Alloc(keySize, 0 /* overflow */, Align1) 182 if err == nil { 183 copy(s.arena.GetBytes(keyOffset, keySize), key) 184 } 185 186 return 187 } 188 189 func (s *Skiplist) allocVal(val []byte, meta uint16) (uint64, error) { 190 if len(val) > math.MaxUint16 { 191 panic("value is too large") 192 } 193 194 valSize := uint16(len(val)) 195 valOffset, err := s.arena.Alloc(uint32(valSize), 0 /* overflow */, Align1) 196 if err != nil { 197 return 0, err 198 } 199 200 copy(s.arena.GetBytes(valOffset, uint32(valSize)), val) 201 return encodeValue(valOffset, valSize, meta), nil 202 } 203 204 func (s *Skiplist) findSpliceForLevel(key []byte, level int, start *node) (prev, next *node, found bool) { 205 prev = start 206 207 for { 208 // Assume prev.key < key. 209 next = s.getNext(prev, level) 210 nextKey := next.getKey(s.arena) 211 if nextKey == nil { 212 // Tail node key, so done. 213 break 214 } 215 216 cmp := bytes.Compare(key, nextKey) 217 if cmp == 0 { 218 // Equality case. 219 found = true 220 break 221 } 222 223 if cmp < 0 { 224 // We are done for this level, since prev.key < key < next.key. 225 break 226 } 227 228 // Keep moving right on this level. 229 prev = next 230 } 231 232 return 233 } 234 235 func (s *Skiplist) getNext(nd *node, h int) *node { 236 offset := atomic.LoadUint32(&nd.tower[h].nextOffset) 237 return (*node)(s.arena.GetPointer(offset)) 238 } 239 240 func (s *Skiplist) getPrev(nd *node, h int) *node { 241 offset := atomic.LoadUint32(&nd.tower[h].prevOffset) 242 return (*node)(s.arena.GetPointer(offset)) 243 } 244 245 func encodeValue(valOffset uint32, valSize, meta uint16) uint64 { 246 return uint64(meta)<<48 | uint64(valSize)<<32 | uint64(valOffset) 247 } 248 249 func decodeValue(value uint64) (valOffset uint32, valSize uint16) { 250 valOffset = uint32(value) 251 valSize = uint16(value >> 32) 252 return 253 } 254 255 func decodeMeta(value uint64) uint16 { 256 return uint16(value >> 48) 257 }