github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/arenaskl/skl.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package arenaskl 16 17 import ( 18 "encoding/binary" 19 "math" 20 "runtime" 21 "sync/atomic" 22 "unsafe" 23 24 "github.com/zuoyebang/bitalosdb/internal/base" 25 "github.com/zuoyebang/bitalosdb/internal/fastrand" 26 27 "github.com/cockroachdb/errors" 28 ) 29 30 const ( 31 maxHeight = 20 32 maxNodeSize = int(unsafe.Sizeof(node{})) 33 linksSize = int(unsafe.Sizeof(links{})) 34 pValue = 1 / math.E 35 ) 36 37 var ErrRecordExists = errors.New("record with this key already exists") 38 39 type Skiplist struct { 40 arena *Arena 41 cmp base.Compare 42 head *node 43 tail *node 44 height uint32 45 testing bool 46 } 47 48 type Inserter struct { 49 spl [maxHeight]splice 50 height uint32 51 } 52 53 func (ins *Inserter) Add(list *Skiplist, key base.InternalKey, value []byte) error { 54 return list.addInternal(key, value, ins) 55 } 56 57 var ( 58 probabilities [maxHeight]uint32 59 ) 60 61 func init() { 62 p := 1.0 63 for i := 0; i < maxHeight; i++ { 64 probabilities[i] = uint32(float64(math.MaxUint32) * p) 65 p *= pValue 66 } 67 } 68 69 func NewSkiplist(arena *Arena, cmp base.Compare) *Skiplist { 70 skl := &Skiplist{} 71 skl.Reset(arena, cmp) 72 return skl 73 } 74 75 func (s *Skiplist) Reset(arena *Arena, cmp base.Compare) { 76 head, err := newRawNode(arena, maxHeight, 0, 0) 77 if err != nil { 78 panic("arenaSize is not large enough to hold the head node") 79 } 80 head.keyOffset = 0 81 head.skipToFirst = 0 82 head.skipToLast = 0 83 84 tail, err := newRawNode(arena, maxHeight, 0, 0) 85 if err != nil { 86 panic("arenaSize is not large enough to hold the tail node") 87 } 88 tail.keyOffset = 0 89 tail.skipToFirst = 0 90 tail.skipToLast = 0 91 92 headOffset := arena.getPointerOffset(unsafe.Pointer(head)) 93 tailOffset := arena.getPointerOffset(unsafe.Pointer(tail)) 94 for i := 0; i < maxHeight; i++ { 95 head.tower[i].nextOffset = tailOffset 96 tail.tower[i].prevOffset = headOffset 97 } 98 99 *s = Skiplist{ 100 arena: arena, 101 cmp: cmp, 102 head: head, 103 tail: tail, 104 height: 1, 105 } 106 } 107 108 func (s *Skiplist) Height() uint32 { return atomic.LoadUint32(&s.height) } 109 110 func (s *Skiplist) Arena() *Arena { return s.arena } 111 112 func (s *Skiplist) Size() uint32 { return s.arena.Size() } 113 114 func (s *Skiplist) Add(key base.InternalKey, value []byte) error { 115 var ins Inserter 116 return s.addInternal(key, value, &ins) 117 } 118 119 func (s *Skiplist) addInternal(key base.InternalKey, value []byte, ins *Inserter) error { 120 if s.findSplice(key, ins) { 121 return ErrRecordExists 122 } 123 124 if s.testing { 125 runtime.Gosched() 126 } 127 128 nd, height, err := s.newNode(key, value) 129 if err != nil { 130 return err 131 } 132 133 ndOffset := s.arena.getPointerOffset(unsafe.Pointer(nd)) 134 135 var found bool 136 var invalidateSplice bool 137 for i := 0; i < int(height); i++ { 138 prev := ins.spl[i].prev 139 next := ins.spl[i].next 140 141 if prev == nil { 142 if next != nil { 143 panic("next is expected to be nil, since prev is nil") 144 } 145 146 prev = s.head 147 next = s.tail 148 } 149 150 for { 151 prevOffset := s.arena.getPointerOffset(unsafe.Pointer(prev)) 152 nextOffset := s.arena.getPointerOffset(unsafe.Pointer(next)) 153 nd.tower[i].init(prevOffset, nextOffset) 154 155 nextPrevOffset := next.prevOffset(i) 156 if nextPrevOffset != prevOffset { 157 prevNextOffset := prev.nextOffset(i) 158 if prevNextOffset == nextOffset { 159 next.casPrevOffset(i, nextPrevOffset, prevOffset) 160 } 161 } 162 163 if prev.casNextOffset(i, nextOffset, ndOffset) { 164 if s.testing { 165 runtime.Gosched() 166 } 167 168 next.casPrevOffset(i, prevOffset, ndOffset) 169 break 170 } 171 172 prev, next, found = s.findSpliceForLevel(key, i, prev) 173 if found { 174 if i != 0 { 175 panic("how can another thread have inserted a node at a non-base level?") 176 } 177 178 return ErrRecordExists 179 } 180 invalidateSplice = true 181 } 182 } 183 184 s.setNodeSkipOffset(nd, ndOffset, key) 185 186 if invalidateSplice { 187 ins.height = 0 188 } else { 189 for i := uint32(0); i < height; i++ { 190 ins.spl[i].prev = nd 191 } 192 } 193 194 return nil 195 } 196 197 func (s *Skiplist) setNodeSkipOffset(nd *node, ndOffset uint32, key base.InternalKey) { 198 nextNd := s.getNext(nd, 0) 199 if nextNd == s.tail { 200 return 201 } 202 203 offset, size := nextNd.keyOffset, nextNd.keySize 204 nextKey := s.arena.buf[offset : offset+size] 205 n := int32(size) - 8 206 if n < 0 || s.cmp(key.UserKey, nextKey[:n]) != 0 || key.Trailer <= binary.LittleEndian.Uint64(nextKey[n:]) { 207 return 208 } 209 210 skipToFirstOffset := nextNd.skipToFirstOffset() 211 if skipToFirstOffset > 0 { 212 nd.setSkipToFirstOffset(skipToFirstOffset) 213 214 skipToFirstNd := (*node)(s.arena.getPointer(skipToFirstOffset)) 215 if skipToFirstNd == s.tail { 216 return 217 } 218 219 skipToFirstNd.setSkipToLastOffset(ndOffset) 220 } else { 221 nextNdOffset := s.arena.getPointerOffset(unsafe.Pointer(nextNd)) 222 nd.setSkipToFirstOffset(nextNdOffset) 223 } 224 } 225 226 func (s *Skiplist) Get(key []byte) ([]byte, bool, base.InternalKeyKind) { 227 var nd *node 228 _, nd, _ = s.seekForBaseSplice(key) 229 if nd == s.tail { 230 return nil, false, base.InternalKeyKindInvalid 231 } 232 233 b := s.arena.getBytes(nd.keyOffset, nd.keySize) 234 l := len(b) - 8 235 if l < 0 || s.cmp(key, b[:l:l]) != 0 { 236 return nil, false, base.InternalKeyKindInvalid 237 } 238 239 kind := base.InternalKeyKind(binary.LittleEndian.Uint64(b[l:]) & 0xff) 240 switch kind { 241 case base.InternalKeyKindSet: 242 value := s.arena.getBytes(nd.keyOffset+nd.keySize, nd.valueSize) 243 return value, true, kind 244 case base.InternalKeyKindDelete, base.InternalKeyKindPrefixDelete: 245 return nil, true, kind 246 default: 247 return nil, false, base.InternalKeyKindInvalid 248 } 249 } 250 251 func (s *Skiplist) seekForBaseSplice(key []byte) (prev, next *node, found bool) { 252 ikey := base.MakeSearchKey(key) 253 level := int(s.Height() - 1) 254 255 prev = s.head 256 for { 257 prev, next, found = s.findSpliceForLevel(ikey, level, prev) 258 259 if found { 260 if level != 0 { 261 prev = s.getPrev(next, 0) 262 } 263 break 264 } 265 266 if level == 0 { 267 break 268 } 269 270 level-- 271 } 272 273 return 274 } 275 276 func (s *Skiplist) NewIter(lower, upper []byte) *Iterator { 277 it := iterPool.Get().(*Iterator) 278 *it = Iterator{list: s, nd: s.head, lower: lower, upper: upper} 279 return it 280 } 281 282 func (s *Skiplist) NewFlushIter(bytesFlushed *uint64) base.InternalIterator { 283 return &flushIterator{ 284 Iterator: Iterator{list: s, nd: s.head}, 285 bytesIterated: bytesFlushed, 286 } 287 } 288 289 func (s *Skiplist) newNode( 290 key base.InternalKey, value []byte, 291 ) (nd *node, height uint32, err error) { 292 height = s.randomHeight() 293 nd, err = newNode(s.arena, height, key, value) 294 if err != nil { 295 return 296 } 297 298 listHeight := s.Height() 299 for height > listHeight { 300 if atomic.CompareAndSwapUint32(&s.height, listHeight, height) { 301 break 302 } 303 304 listHeight = s.Height() 305 } 306 307 return 308 } 309 310 func (s *Skiplist) randomHeight() uint32 { 311 rnd := fastrand.Uint32() 312 313 h := uint32(1) 314 for h < maxHeight && rnd <= probabilities[h] { 315 h++ 316 } 317 318 return h 319 } 320 321 func (s *Skiplist) findSplice(key base.InternalKey, ins *Inserter) (found bool) { 322 listHeight := s.Height() 323 var level int 324 325 prev := s.head 326 if ins.height < listHeight { 327 ins.height = listHeight 328 level = int(ins.height) 329 } else { 330 for ; level < int(listHeight); level++ { 331 spl := &ins.spl[level] 332 if s.getNext(spl.prev, level) != spl.next { 333 continue 334 } 335 if spl.prev != s.head && !s.keyIsAfterNode(spl.prev, key) { 336 level = int(listHeight) 337 break 338 } 339 if spl.next != s.tail && s.keyIsAfterNode(spl.next, key) { 340 level = int(listHeight) 341 break 342 } 343 344 prev = spl.prev 345 break 346 } 347 } 348 349 for level = level - 1; level >= 0; level-- { 350 var next *node 351 prev, next, found = s.findSpliceForLevel(key, level, prev) 352 if next == nil { 353 next = s.tail 354 } 355 ins.spl[level].init(prev, next) 356 } 357 358 return 359 } 360 361 func (s *Skiplist) findSpliceForLevel( 362 key base.InternalKey, level int, start *node, 363 ) (prev, next *node, found bool) { 364 prev = start 365 366 for { 367 next = s.getNext(prev, level) 368 if next == s.tail { 369 break 370 } 371 372 offset, size := next.keyOffset, next.keySize 373 nextKey := s.arena.buf[offset : offset+size] 374 n := int32(size) - 8 375 cmp := s.cmp(key.UserKey, nextKey[:n]) 376 if cmp < 0 { 377 break 378 } 379 if cmp == 0 { 380 var nextTrailer uint64 381 if n >= 0 { 382 nextTrailer = binary.LittleEndian.Uint64(nextKey[n:]) 383 } else { 384 nextTrailer = uint64(base.InternalKeyKindInvalid) 385 } 386 if key.Trailer == nextTrailer { 387 found = true 388 break 389 } 390 if key.Trailer > nextTrailer { 391 break 392 } 393 } 394 395 prev = next 396 } 397 398 return 399 } 400 401 func (s *Skiplist) keyIsAfterNode(nd *node, key base.InternalKey) bool { 402 ndKey := s.arena.buf[nd.keyOffset : nd.keyOffset+nd.keySize] 403 n := int32(nd.keySize) - 8 404 cmp := s.cmp(ndKey[:n], key.UserKey) 405 if cmp < 0 { 406 return true 407 } 408 if cmp > 0 { 409 return false 410 } 411 var ndTrailer uint64 412 if n >= 0 { 413 ndTrailer = binary.LittleEndian.Uint64(ndKey[n:]) 414 } else { 415 ndTrailer = uint64(base.InternalKeyKindInvalid) 416 } 417 if key.Trailer == ndTrailer { 418 return false 419 } 420 return key.Trailer < ndTrailer 421 } 422 423 func (s *Skiplist) getNext(nd *node, h int) *node { 424 offset := atomic.LoadUint32(&nd.tower[h].nextOffset) 425 return (*node)(s.arena.getPointer(offset)) 426 } 427 428 func (s *Skiplist) getPrev(nd *node, h int) *node { 429 offset := atomic.LoadUint32(&nd.tower[h].prevOffset) 430 return (*node)(s.arena.getPointer(offset)) 431 } 432 433 func (s *Skiplist) getSkipNext(nd *node) *node { 434 var nextNd *node 435 skipToFirstOffset := nd.skipToFirstOffset() 436 if skipToFirstOffset > 0 { 437 nextNd = (*node)(s.arena.getPointer(skipToFirstOffset)) 438 } else { 439 offset := atomic.LoadUint32(&nd.tower[0].nextOffset) 440 nextNd = (*node)(s.arena.getPointer(offset)) 441 } 442 return nextNd 443 } 444 445 func (s *Skiplist) getSkipPrev(nd *node) *node { 446 var prevNd *node 447 skipToLastOffset := nd.skipToLastOffset() 448 if skipToLastOffset > 0 { 449 prevNd = (*node)(s.arena.getPointer(skipToLastOffset)) 450 } else { 451 offset := atomic.LoadUint32(&nd.tower[0].prevOffset) 452 prevNd = (*node)(s.arena.getPointer(offset)) 453 } 454 return prevNd 455 }