github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/skl.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitpage 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "runtime" 22 "sync" 23 "sync/atomic" 24 "unsafe" 25 26 "github.com/cockroachdb/errors" 27 "github.com/zuoyebang/bitalosdb/internal/base" 28 "github.com/zuoyebang/bitalosdb/internal/fastrand" 29 "github.com/zuoyebang/bitalosdb/internal/hash" 30 ) 31 32 const ( 33 sklVersion1 uint16 = 1 34 ) 35 36 const ( 37 maxHeight = 20 38 maxNodeSize = int(unsafe.Sizeof(node{})) 39 linksSize = int(unsafe.Sizeof(links{})) 40 pValue = 1 / math.E 41 indexSize = 1 << 20 42 ) 43 44 const ( 45 sklHeaderLength = 4 46 sklHeaderOffset = tableDataOffset 47 sklHeaderVersionOffset = sklHeaderOffset 48 sklHeaderHeightOffset = sklHeaderVersionOffset + 2 49 sklHeadNodeOffset = 8 50 sklTailNodeOffset = 196 51 ) 52 53 var ErrRecordExists = errors.New("record with this key already exists") 54 55 type skl struct { 56 st *sklTable 57 tbl *table 58 cmp base.Compare 59 head *node 60 tail *node 61 version uint16 62 height uint32 63 useMapIndex bool 64 testing bool 65 cache struct { 66 sync.RWMutex 67 index map[uint32]uint32 68 } 69 } 70 71 type Inserter struct { 72 spl [maxHeight]splice 73 height uint32 74 } 75 76 func (ins *Inserter) Add(list *skl, key internalKey, value []byte) error { 77 return list.addInternal(key, value, ins) 78 } 79 80 var ( 81 probabilities [maxHeight]uint32 82 ) 83 84 func init() { 85 p := float64(1.0) 86 for i := 0; i < maxHeight; i++ { 87 probabilities[i] = uint32(float64(math.MaxUint32) * p) 88 p *= pValue 89 } 90 } 91 92 func newSkl(tbl *table, st *sklTable, useMapIndex bool) (*skl, error) { 93 headerOffset, err := tbl.alloc(sklHeaderLength) 94 if err != nil || headerOffset != uint32(sklHeaderOffset) { 95 return nil, ErrTableSize 96 } 97 98 head, err := newRawNode(tbl, maxHeight, 0, 0) 99 if err != nil { 100 return nil, errors.New("tblSize is not large enough to hold the head node") 101 } 102 103 tail, err := newRawNode(tbl, maxHeight, 0, 0) 104 if err != nil { 105 return nil, errors.New("tblSize is not large enough to hold the tail node") 106 } 107 108 head.keyOffset = 0 109 tail.keyOffset = 0 110 111 headOffset := tbl.getPointerOffset(unsafe.Pointer(head)) 112 tailOffset := tbl.getPointerOffset(unsafe.Pointer(tail)) 113 for i := 0; i < maxHeight; i++ { 114 head.tower[i].nextOffset = tailOffset 115 tail.tower[i].prevOffset = headOffset 116 } 117 118 sl := &skl{ 119 st: st, 120 tbl: tbl, 121 cmp: bytes.Compare, 122 head: head, 123 tail: tail, 124 height: 1, 125 useMapIndex: useMapIndex, 126 } 127 128 sl.setHeader() 129 130 if useMapIndex { 131 sl.cache.index = make(map[uint32]uint32, indexSize) 132 } 133 134 return sl, nil 135 } 136 137 func openSkl(tbl *table, st *sklTable, useMapIndex bool) *skl { 138 sl := &skl{ 139 st: st, 140 tbl: tbl, 141 cmp: bytes.Compare, 142 head: (*node)(tbl.getPointer(sklHeadNodeOffset)), 143 tail: (*node)(tbl.getPointer(sklTailNodeOffset)), 144 useMapIndex: useMapIndex, 145 } 146 147 sl.getHeader() 148 149 if useMapIndex { 150 sl.cache.index = make(map[uint32]uint32, indexSize) 151 } 152 153 return sl 154 } 155 156 func (s *skl) getHeader() { 157 s.version = s.tbl.readAtUInt16(sklHeaderVersionOffset) 158 s.height = s.getHeight() 159 } 160 161 func (s *skl) setHeader() { 162 s.tbl.writeAtUInt16(sklVersion1, sklHeaderVersionOffset) 163 s.setHeight() 164 } 165 166 func (s *skl) getHeight() uint32 { 167 return uint32(s.tbl.readAtUInt16(sklHeaderHeightOffset)) 168 } 169 170 func (s *skl) setHeight() { 171 s.tbl.writeAtUInt16(uint16(s.Height()), sklHeaderHeightOffset) 172 } 173 174 func (s *skl) Height() uint32 { return atomic.LoadUint32(&s.height) } 175 176 func (s *skl) Table() *table { return s.tbl } 177 178 func (s *skl) Size() uint32 { return s.tbl.Size() } 179 180 func (s *skl) Get(key []byte, khash uint32) ([]byte, bool, internalKeyKind) { 181 var nd *node 182 var kind internalKeyKind 183 var beFound bool 184 185 if s.useMapIndex && s.cache.index != nil { 186 s.cache.RLock() 187 if ndOffset, ok := s.cache.index[khash]; ok { 188 nd = (*node)(s.tbl.getPointer(ndOffset)) 189 if nd != s.tail { 190 beFound, kind = s.compareKey(key, nd) 191 } 192 } 193 s.cache.RUnlock() 194 } 195 196 if !beFound { 197 _, nd, _ = s.seekForBaseSplice(key) 198 if nd == s.tail { 199 return nil, false, internalKeyKindInvalid 200 } 201 202 var exist bool = false 203 exist, kind = s.compareKey(key, nd) 204 if !exist { 205 return nil, false, internalKeyKindInvalid 206 } 207 } 208 209 if s.useMapIndex && !beFound && khash > 0 { 210 s.cache.Lock() 211 s.cache.index[khash] = s.tbl.getPointerOffset(unsafe.Pointer(nd)) 212 s.cache.Unlock() 213 } 214 215 if kind == internalKeyKindSet { 216 value := s.tbl.getBytes(nd.keyOffset+nd.keySize, nd.valueSize) 217 return value, true, kind 218 } else if kind == internalKeyKindDelete { 219 return nil, true, kind 220 } 221 222 return nil, false, internalKeyKindInvalid 223 } 224 225 func (s *skl) Add(key internalKey, value []byte) error { 226 var ins Inserter 227 return s.addInternal(key, value, &ins) 228 } 229 230 func (s *skl) addInternal(key internalKey, value []byte, ins *Inserter) error { 231 if s.findSplice(key, ins) { 232 return ErrRecordExists 233 } 234 235 if s.testing { 236 runtime.Gosched() 237 } 238 239 nd, height, err := s.newNode(key, value) 240 if err != nil { 241 return err 242 } 243 244 ndOffset := s.tbl.getPointerOffset(unsafe.Pointer(nd)) 245 246 var found bool 247 var invalidateSplice bool 248 for i := 0; i < int(height); i++ { 249 prev := ins.spl[i].prev 250 next := ins.spl[i].next 251 252 if prev == nil { 253 if next != nil { 254 return errors.New("bitpage: skl next is expected to be nil, since prev is nil") 255 } 256 257 prev = s.head 258 next = s.tail 259 } 260 261 for { 262 prevOffset := s.tbl.getPointerOffset(unsafe.Pointer(prev)) 263 nextOffset := s.tbl.getPointerOffset(unsafe.Pointer(next)) 264 nd.tower[i].init(prevOffset, nextOffset) 265 266 nextPrevOffset := next.prevOffset(i) 267 if nextPrevOffset != prevOffset { 268 prevNextOffset := prev.nextOffset(i) 269 if prevNextOffset == nextOffset { 270 next.casPrevOffset(i, nextPrevOffset, prevOffset) 271 } 272 } 273 274 if prev.casNextOffset(i, nextOffset, ndOffset) { 275 if s.testing { 276 runtime.Gosched() 277 } 278 279 next.casPrevOffset(i, prevOffset, ndOffset) 280 break 281 } 282 283 prev, next, found = s.findSpliceForLevel(key, i, prev) 284 if found { 285 if i != 0 { 286 panic("how can another thread have inserted a node at a non-base level?") 287 } 288 289 return ErrRecordExists 290 } 291 invalidateSplice = true 292 } 293 } 294 295 s.setNodeSkipOffset(nd, ndOffset, key) 296 297 if invalidateSplice { 298 ins.height = 0 299 } else { 300 for i := uint32(0); i < height; i++ { 301 ins.spl[i].prev = nd 302 } 303 } 304 305 if s.useMapIndex && s.cache.index != nil { 306 khash := hash.Crc32(key.UserKey) 307 s.cache.Lock() 308 s.cache.index[khash] = ndOffset 309 s.cache.Unlock() 310 } 311 312 return nil 313 } 314 315 func (s *skl) setNodeSkipOffset(nd *node, ndOffset uint32, key internalKey) { 316 nextNd := s.getNext(nd, 0) 317 if nextNd == s.tail { 318 return 319 } 320 321 offset, size := nextNd.keyOffset, nextNd.keySize 322 nextKey := s.tbl.getBytes(offset, size) 323 n := int32(size) - 8 324 if n < 0 || s.cmp(key.UserKey, nextKey[:n]) != 0 { 325 return 326 } 327 if key.Trailer <= binary.LittleEndian.Uint64(nextKey[n:]) { 328 return 329 } 330 331 if s.st != nil && s.st.bp != nil { 332 s.st.bp.deleteBithashKey(nextNd.getValue(s.tbl)) 333 } 334 335 skipToFirstOffset := nextNd.skipToFirstOffset() 336 if skipToFirstOffset > 0 { 337 nd.setSkipToFirstOffset(skipToFirstOffset) 338 339 skipToFirstNd := (*node)(s.tbl.getPointer(skipToFirstOffset)) 340 if skipToFirstNd == s.tail { 341 return 342 } 343 344 skipToFirstNd.setSkipToLastOffset(ndOffset) 345 } else { 346 nextNdOffset := s.tbl.getPointerOffset(unsafe.Pointer(nextNd)) 347 nd.setSkipToFirstOffset(nextNdOffset) 348 } 349 } 350 351 func (s *skl) NewIter(lower, upper []byte) *sklIterator { 352 iter := &sklIterator{ 353 list: s, 354 nd: s.head, 355 } 356 return iter 357 } 358 359 func (s *skl) NewFlushIter() internalIterator { 360 return s.NewIter(nil, nil) 361 } 362 363 func (s *skl) newNode(key internalKey, value []byte) (nd *node, height uint32, err error) { 364 height = s.randomHeight() 365 nd, err = newNode(s.tbl, height, key, value) 366 if err != nil { 367 return 368 } 369 370 listHeight := s.Height() 371 for height > listHeight { 372 if atomic.CompareAndSwapUint32(&s.height, listHeight, height) { 373 s.setHeight() 374 break 375 } 376 377 listHeight = s.Height() 378 } 379 380 return 381 } 382 383 func (s *skl) randomHeight() uint32 { 384 rnd := fastrand.Uint32() 385 386 h := uint32(1) 387 for h < maxHeight && rnd <= probabilities[h] { 388 h++ 389 } 390 391 return h 392 } 393 394 func (s *skl) isEmpty() bool { 395 return s.getNext(s.head, 0) == s.tail 396 } 397 398 func (s *skl) findSplice(key internalKey, ins *Inserter) (found bool) { 399 listHeight := s.Height() 400 var level int 401 402 prev := s.head 403 if ins.height < listHeight { 404 ins.height = listHeight 405 level = int(ins.height) 406 } else { 407 for ; level < int(listHeight); level++ { 408 spl := &ins.spl[level] 409 if s.getNext(spl.prev, level) != spl.next { 410 continue 411 } 412 if spl.prev != s.head && !s.keyIsAfterNode(spl.prev, key) { 413 level = int(listHeight) 414 break 415 } 416 if spl.next != s.tail && s.keyIsAfterNode(spl.next, key) { 417 level = int(listHeight) 418 break 419 } 420 prev = spl.prev 421 break 422 } 423 } 424 425 for level = level - 1; level >= 0; level-- { 426 var next *node 427 prev, next, found = s.findSpliceForLevel(key, level, prev) 428 if next == nil { 429 next = s.tail 430 } 431 ins.spl[level].init(prev, next) 432 } 433 434 return 435 } 436 437 func (s *skl) findSpliceForLevel( 438 key internalKey, level int, start *node, 439 ) (prev, next *node, found bool) { 440 prev = start 441 442 for { 443 next = s.getNext(prev, level) 444 if next == s.tail { 445 break 446 } 447 448 offset, size := next.keyOffset, next.keySize 449 nextKey := s.tbl.getBytes(offset, size) 450 n := int32(size) - 8 451 cmp := s.cmp(key.UserKey, nextKey[:n]) 452 if cmp < 0 { 453 break 454 } 455 if cmp == 0 { 456 var nextTrailer uint64 457 if n >= 0 { 458 nextTrailer = binary.LittleEndian.Uint64(nextKey[n:]) 459 } else { 460 nextTrailer = uint64(internalKeyKindInvalid) 461 } 462 if key.Trailer == nextTrailer { 463 found = true 464 break 465 } 466 if key.Trailer > nextTrailer { 467 break 468 } 469 } 470 471 prev = next 472 } 473 474 return 475 } 476 477 func (s *skl) keyIsAfterNode(nd *node, key internalKey) bool { 478 ndKey := s.tbl.getBytes(nd.keyOffset, nd.keySize) 479 n := int32(nd.keySize) - 8 480 cmp := s.cmp(ndKey[:n], key.UserKey) 481 if cmp < 0 { 482 return true 483 } 484 if cmp > 0 { 485 return false 486 } 487 var ndTrailer uint64 488 if n >= 0 { 489 ndTrailer = binary.LittleEndian.Uint64(ndKey[n:]) 490 } else { 491 ndTrailer = uint64(internalKeyKindInvalid) 492 } 493 if key.Trailer == ndTrailer { 494 return false 495 } 496 return key.Trailer < ndTrailer 497 } 498 499 func (s *skl) getNext(nd *node, h int) *node { 500 offset := atomic.LoadUint32(&nd.tower[h].nextOffset) 501 return (*node)(s.tbl.getPointer(offset)) 502 } 503 504 func (s *skl) getPrev(nd *node, h int) *node { 505 offset := atomic.LoadUint32(&nd.tower[h].prevOffset) 506 return (*node)(s.tbl.getPointer(offset)) 507 } 508 509 func (s *skl) getSkipNext(nd *node) *node { 510 var nextNd *node 511 skipToFirstOffset := nd.skipToFirstOffset() 512 if skipToFirstOffset > 0 { 513 nextNd = (*node)(s.tbl.getPointer(skipToFirstOffset)) 514 } else { 515 offset := atomic.LoadUint32(&nd.tower[0].nextOffset) 516 nextNd = (*node)(s.tbl.getPointer(offset)) 517 } 518 return nextNd 519 } 520 521 func (s *skl) getSkipPrev(nd *node) *node { 522 var prevNd *node 523 skipToLastOffset := nd.skipToLastOffset() 524 if skipToLastOffset > 0 { 525 prevNd = (*node)(s.tbl.getPointer(skipToLastOffset)) 526 } else { 527 offset := atomic.LoadUint32(&nd.tower[0].prevOffset) 528 prevNd = (*node)(s.tbl.getPointer(offset)) 529 } 530 return prevNd 531 } 532 533 func (s *skl) compareKey(key []byte, nd *node) (bool, internalKeyKind) { 534 b := s.tbl.getBytes(nd.keyOffset, nd.keySize) 535 l := len(b) - 8 536 if l < 0 || s.cmp(key, b[:l:l]) != 0 { 537 return false, internalKeyKindInvalid 538 } 539 540 return true, internalKeyKind(binary.LittleEndian.Uint64(b[l:]) & 0xff) 541 } 542 543 func (s *skl) seekForBaseSplice(key []byte) (prev, next *node, found bool) { 544 ikey := base.MakeSearchKey(key) 545 level := int(s.Height() - 1) 546 547 prev = s.head 548 for { 549 prev, next, found = s.findSpliceForLevel(ikey, level, prev) 550 551 if found { 552 if level != 0 { 553 prev = s.getPrev(next, 0) 554 } 555 break 556 } 557 558 if level == 0 { 559 break 560 } 561 562 level-- 563 } 564 565 return 566 }