github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/batchskl/skl.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package batchskl 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "fmt" 21 "math" 22 "time" 23 "unsafe" 24 25 "github.com/cockroachdb/errors" 26 "github.com/zuoyebang/bitalosdb/internal/base" 27 "golang.org/x/exp/rand" 28 ) 29 30 const ( 31 maxHeight = 20 32 maxNodeSize = int(unsafe.Sizeof(node{})) 33 linksSize = int(unsafe.Sizeof(links{})) 34 ) 35 36 var ErrExists = errors.New("record with this key already exists") 37 38 type links struct { 39 next uint32 40 prev uint32 41 } 42 43 type node struct { 44 offset uint32 45 keyStart uint32 46 keyEnd uint32 47 abbreviatedKey uint64 48 links [maxHeight]links 49 } 50 51 type Skiplist struct { 52 storage *[]byte 53 cmp base.Compare 54 abbreviatedKey base.AbbreviatedKey 55 nodes []byte 56 head uint32 57 tail uint32 58 height uint32 59 rand rand.PCGSource 60 } 61 62 var ( 63 probabilities [maxHeight]uint32 64 ) 65 66 func init() { 67 const pValue = 1 / math.E 68 69 p := float64(1.0) 70 for i := 0; i < maxHeight; i++ { 71 probabilities[i] = uint32(float64(math.MaxUint32) * p) 72 p *= pValue 73 } 74 } 75 76 func NewSkiplist(storage *[]byte, cmp base.Compare, abbreviatedKey base.AbbreviatedKey) *Skiplist { 77 s := &Skiplist{} 78 s.Init(storage, cmp, abbreviatedKey) 79 return s 80 } 81 82 func (s *Skiplist) Reset() { 83 *s = Skiplist{ 84 nodes: s.nodes[:0], 85 height: 1, 86 } 87 const batchMaxRetainedSize = 1 << 20 88 if cap(s.nodes) > batchMaxRetainedSize { 89 s.nodes = nil 90 } 91 } 92 93 func (s *Skiplist) Init(storage *[]byte, cmp base.Compare, abbreviatedKey base.AbbreviatedKey) { 94 *s = Skiplist{ 95 storage: storage, 96 cmp: cmp, 97 abbreviatedKey: abbreviatedKey, 98 nodes: s.nodes[:0], 99 height: 1, 100 } 101 s.rand.Seed(uint64(time.Now().UnixNano())) 102 103 const initBufSize = 256 104 if cap(s.nodes) < initBufSize { 105 s.nodes = make([]byte, 0, initBufSize) 106 } 107 108 s.head = s.newNode(maxHeight, 0, 0, 0, 0) 109 s.tail = s.newNode(maxHeight, 0, 0, 0, 0) 110 111 headNode := s.node(s.head) 112 tailNode := s.node(s.tail) 113 for i := uint32(0); i < maxHeight; i++ { 114 headNode.links[i].next = s.tail 115 tailNode.links[i].prev = s.head 116 } 117 } 118 119 func (s *Skiplist) Add(keyOffset uint32) error { 120 data := (*s.storage)[keyOffset+1:] 121 v, n := binary.Uvarint(data) 122 if n <= 0 { 123 return errors.Errorf("corrupted batch entry: %d", errors.Safe(keyOffset)) 124 } 125 data = data[n:] 126 if v > uint64(len(data)) { 127 return errors.Errorf("corrupted batch entry: %d", errors.Safe(keyOffset)) 128 } 129 keyStart := 1 + keyOffset + uint32(n) 130 keyEnd := keyStart + uint32(v) 131 key := data[:v] 132 abbreviatedKey := s.abbreviatedKey(key) 133 134 var spl [maxHeight]splice 135 136 prev := s.getPrev(s.tail, 0) 137 if prevNode := s.node(prev); prev == s.head || 138 abbreviatedKey > prevNode.abbreviatedKey || 139 (abbreviatedKey == prevNode.abbreviatedKey && 140 s.cmp(key, (*s.storage)[prevNode.keyStart:prevNode.keyEnd]) > 0) { 141 for level := uint32(0); level < s.height; level++ { 142 spl[level].prev = s.getPrev(s.tail, level) 143 spl[level].next = s.tail 144 } 145 } else { 146 s.findSplice(key, abbreviatedKey, &spl) 147 } 148 149 height := s.randomHeight() 150 for ; s.height < height; s.height++ { 151 spl[s.height].next = s.tail 152 spl[s.height].prev = s.head 153 } 154 155 nd := s.newNode(height, keyOffset, keyStart, keyEnd, abbreviatedKey) 156 newNode := s.node(nd) 157 for level := uint32(0); level < height; level++ { 158 next := spl[level].next 159 prev := spl[level].prev 160 newNode.links[level].next = next 161 newNode.links[level].prev = prev 162 s.node(next).links[level].prev = nd 163 s.node(prev).links[level].next = nd 164 } 165 166 return nil 167 } 168 169 func (s *Skiplist) NewIter(lower, upper []byte) Iterator { 170 return Iterator{list: s, lower: lower, upper: upper} 171 } 172 173 func (s *Skiplist) newNode(height, 174 offset, keyStart, keyEnd uint32, abbreviatedKey uint64) uint32 { 175 if height < 1 || height > maxHeight { 176 panic("height cannot be less than one or greater than the max height") 177 } 178 179 unusedSize := (maxHeight - int(height)) * linksSize 180 nodeOffset := s.alloc(uint32(maxNodeSize - unusedSize)) 181 nd := s.node(nodeOffset) 182 183 nd.offset = offset 184 nd.keyStart = keyStart 185 nd.keyEnd = keyEnd 186 nd.abbreviatedKey = abbreviatedKey 187 return nodeOffset 188 } 189 190 func (s *Skiplist) alloc(size uint32) uint32 { 191 offset := len(s.nodes) 192 193 minAllocSize := offset + maxNodeSize 194 if cap(s.nodes) < minAllocSize { 195 allocSize := cap(s.nodes) * 2 196 if allocSize < minAllocSize { 197 allocSize = minAllocSize 198 } 199 tmp := make([]byte, len(s.nodes), allocSize) 200 copy(tmp, s.nodes) 201 s.nodes = tmp 202 } 203 204 newSize := uint32(offset) + size 205 s.nodes = s.nodes[:newSize] 206 return uint32(offset) 207 } 208 209 func (s *Skiplist) node(offset uint32) *node { 210 return (*node)(unsafe.Pointer(&s.nodes[offset])) 211 } 212 213 func (s *Skiplist) randomHeight() uint32 { 214 rnd := uint32(s.rand.Uint64()) 215 h := uint32(1) 216 for h < maxHeight && rnd <= probabilities[h] { 217 h++ 218 } 219 return h 220 } 221 222 func (s *Skiplist) findSplice(key []byte, abbreviatedKey uint64, spl *[maxHeight]splice) { 223 prev := s.head 224 225 for level := s.height - 1; ; level-- { 226 next := s.getNext(prev, level) 227 for next != s.tail { 228 nextNode := s.node(next) 229 nextAbbreviatedKey := nextNode.abbreviatedKey 230 if abbreviatedKey < nextAbbreviatedKey { 231 break 232 } 233 if abbreviatedKey == nextAbbreviatedKey { 234 if s.cmp(key, (*s.storage)[nextNode.keyStart:nextNode.keyEnd]) <= 0 { 235 break 236 } 237 } 238 prev = next 239 next = nextNode.links[level].next 240 } 241 242 spl[level].prev = prev 243 spl[level].next = next 244 if level == 0 { 245 break 246 } 247 } 248 } 249 250 func (s *Skiplist) findSpliceForLevel( 251 key []byte, abbreviatedKey uint64, level, start uint32, 252 ) (prev, next uint32) { 253 prev = start 254 next = s.getNext(prev, level) 255 256 for next != s.tail { 257 nextNode := s.node(next) 258 nextAbbreviatedKey := nextNode.abbreviatedKey 259 if abbreviatedKey < nextAbbreviatedKey { 260 break 261 } 262 if abbreviatedKey == nextAbbreviatedKey { 263 if s.cmp(key, (*s.storage)[nextNode.keyStart:nextNode.keyEnd]) <= 0 { 264 break 265 } 266 } 267 268 prev = next 269 next = nextNode.links[level].next 270 } 271 272 return 273 } 274 275 func (s *Skiplist) getKey(nd uint32) base.InternalKey { 276 n := s.node(nd) 277 kind := base.InternalKeyKind((*s.storage)[n.offset]) 278 key := (*s.storage)[n.keyStart:n.keyEnd] 279 return base.MakeInternalKey(key, uint64(n.offset)|base.InternalKeySeqNumBatch, kind) 280 } 281 282 func (s *Skiplist) getNext(nd, h uint32) uint32 { 283 return s.node(nd).links[h].next 284 } 285 286 func (s *Skiplist) getPrev(nd, h uint32) uint32 { 287 return s.node(nd).links[h].prev 288 } 289 290 func (s *Skiplist) debug() string { 291 var buf bytes.Buffer 292 for level := uint32(0); level < s.height; level++ { 293 var count int 294 for nd := s.head; nd != s.tail; nd = s.getNext(nd, level) { 295 count++ 296 } 297 fmt.Fprintf(&buf, "%d: %d\n", level, count) 298 } 299 return buf.String() 300 } 301 302 var _ = (*Skiplist).debug