github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/louds_sparse.go (about) 1 package surf 2 3 import ( 4 "bytes" 5 "io" 6 ) 7 8 type loudsSparse struct { 9 height uint32 10 startLevel uint32 11 denseNodeCount uint32 12 denseChildCount uint32 13 14 labelVec labelVector 15 hasChildVec rankVectorSparse 16 loudsVec selectVector 17 suffixes suffixVector 18 values valueVector 19 prefixVec prefixVector 20 } 21 22 func (ls *loudsSparse) Init(builder *Builder) *loudsSparse { 23 ls.height = uint32(len(builder.lsLabels)) 24 ls.startLevel = builder.sparseStartLevel 25 26 for l := 0; uint32(l) < ls.startLevel; l++ { 27 ls.denseNodeCount += builder.nodeCounts[l] 28 } 29 30 if ls.startLevel != 0 { 31 ls.denseChildCount = ls.denseNodeCount + builder.nodeCounts[ls.startLevel] - 1 32 } 33 34 ls.labelVec.Init(builder.lsLabels, ls.startLevel, ls.height) 35 36 numItemsPerLevel := make([]uint32, ls.sparseLevels()) 37 for level := range numItemsPerLevel { 38 numItemsPerLevel[level] = uint32(len(builder.lsLabels[int(ls.startLevel)+level])) 39 } 40 ls.hasChildVec.Init(builder.lsHasChild[ls.startLevel:], numItemsPerLevel) 41 ls.loudsVec.Init(builder.lsLoudsBits[ls.startLevel:], numItemsPerLevel) 42 43 if builder.suffixLen() != 0 { 44 hashLen := builder.hashSuffixLen 45 realLen := builder.realSuffixLen 46 suffixLen := hashLen + realLen 47 numSuffixBitsPerLevel := make([]uint32, ls.sparseLevels()) 48 for i := range numSuffixBitsPerLevel { 49 numSuffixBitsPerLevel[i] = builder.suffixCounts[int(ls.startLevel)+i] * suffixLen 50 } 51 ls.suffixes.Init(hashLen, realLen, builder.suffixes[ls.startLevel:], numSuffixBitsPerLevel) 52 } 53 54 ls.values.Init(builder.values[ls.startLevel:], builder.valueSize) 55 ls.prefixVec.Init(builder.hasPrefix[ls.startLevel:], builder.nodeCounts[ls.startLevel:], builder.prefixes[ls.startLevel:]) 56 57 return ls 58 } 59 60 func (ls *loudsSparse) Get(key []byte, startDepth, nodeID uint32) (value []byte, ok bool) { 61 var ( 62 pos = ls.firstLabelPos(nodeID) 63 depth uint32 64 prefixLen uint32 65 ) 66 for depth = startDepth; depth < uint32(len(key)); depth++ { 67 prefixLen, ok = ls.prefixVec.CheckPrefix(key, depth, ls.prefixID(nodeID)) 68 if !ok { 69 return nil, false 70 } 71 depth += prefixLen 72 73 if depth >= uint32(len(key)) { 74 break 75 } 76 77 if pos, ok = ls.labelVec.Search(key[depth], pos, ls.nodeSize(pos)); !ok { 78 return nil, false 79 } 80 81 if !ls.hasChildVec.IsSet(pos) { 82 valPos := ls.suffixPos(pos) 83 if ok = ls.suffixes.CheckEquality(valPos, key, depth+1); ok { 84 value = ls.values.Get(valPos) 85 } 86 return value, ok 87 } 88 89 nodeID = ls.childNodeID(pos) 90 pos = ls.firstLabelPos(nodeID) 91 } 92 93 if ls.labelVec.GetLabel(pos) == labelTerminator && !ls.hasChildVec.IsSet(pos) { 94 valPos := ls.suffixPos(pos) 95 if ok = ls.suffixes.CheckEquality(valPos, key, depth+1); ok { 96 value = ls.values.Get(valPos) 97 } 98 return value, ok 99 } 100 101 return nil, false 102 } 103 104 func (ls *loudsSparse) MarshalSize() int64 { 105 return align(ls.rawMarshalSize()) 106 } 107 108 func (ls *loudsSparse) rawMarshalSize() int64 { 109 return 4*4 + ls.labelVec.MarshalSize() + ls.hasChildVec.MarshalSize() + ls.loudsVec.MarshalSize() + 110 ls.suffixes.MarshalSize() + ls.prefixVec.MarshalSize() 111 } 112 113 func (ls *loudsSparse) WriteTo(w io.Writer) error { 114 var bs [4]byte 115 endian.PutUint32(bs[:], ls.height) 116 if _, err := w.Write(bs[:]); err != nil { 117 return err 118 } 119 endian.PutUint32(bs[:], ls.startLevel) 120 if _, err := w.Write(bs[:]); err != nil { 121 return err 122 } 123 endian.PutUint32(bs[:], ls.denseNodeCount) 124 if _, err := w.Write(bs[:]); err != nil { 125 return err 126 } 127 endian.PutUint32(bs[:], ls.denseChildCount) 128 if _, err := w.Write(bs[:]); err != nil { 129 return err 130 } 131 if err := ls.labelVec.WriteTo(w); err != nil { 132 return err 133 } 134 if err := ls.hasChildVec.WriteTo(w); err != nil { 135 return err 136 } 137 if err := ls.loudsVec.WriteTo(w); err != nil { 138 return err 139 } 140 if err := ls.suffixes.WriteTo(w); err != nil { 141 return err 142 } 143 if err := ls.prefixVec.WriteTo(w); err != nil { 144 return err 145 } 146 147 padding := ls.MarshalSize() - ls.rawMarshalSize() 148 var zeros [8]byte 149 _, err := w.Write(zeros[:padding]) 150 return err 151 } 152 153 func (ls *loudsSparse) Unmarshal(buf []byte) []byte { 154 buf1 := buf 155 ls.height = endian.Uint32(buf1) 156 buf1 = buf1[4:] 157 ls.startLevel = endian.Uint32(buf1) 158 buf1 = buf1[4:] 159 ls.denseNodeCount = endian.Uint32(buf1) 160 buf1 = buf1[4:] 161 ls.denseChildCount = endian.Uint32(buf1) 162 buf1 = buf1[4:] 163 164 buf1 = ls.labelVec.Unmarshal(buf1) 165 buf1 = ls.hasChildVec.Unmarshal(buf1) 166 buf1 = ls.loudsVec.Unmarshal(buf1) 167 buf1 = ls.suffixes.Unmarshal(buf1) 168 buf1 = ls.prefixVec.Unmarshal(buf1) 169 170 sz := align(int64(len(buf) - len(buf1))) 171 return buf[sz:] 172 } 173 174 func (ls *loudsSparse) suffixPos(pos uint32) uint32 { 175 return pos - ls.hasChildVec.Rank(pos) 176 } 177 178 func (ls *loudsSparse) firstLabelPos(nodeID uint32) uint32 { 179 return ls.loudsVec.Select(nodeID + 1 - ls.denseNodeCount) 180 } 181 182 func (ls *loudsSparse) sparseLevels() uint32 { 183 return ls.height - ls.startLevel 184 } 185 func (ls *loudsSparse) prefixID(nodeID uint32) uint32 { 186 return nodeID - ls.denseNodeCount 187 } 188 189 func (ls *loudsSparse) lastLabelPos(nodeID uint32) uint32 { 190 nextRank := nodeID + 2 - ls.denseNodeCount 191 if nextRank > ls.loudsVec.numOnes { 192 return ls.loudsVec.numBits - 1 193 } 194 return ls.loudsVec.Select(nextRank) - 1 195 } 196 197 func (ls *loudsSparse) childNodeID(pos uint32) uint32 { 198 return ls.hasChildVec.Rank(pos) + ls.denseChildCount 199 } 200 201 func (ls *loudsSparse) nodeSize(pos uint32) uint32 { 202 return ls.loudsVec.DistanceToNextSetBit(pos) 203 } 204 205 func (ls *loudsSparse) isEndOfNode(pos uint32) bool { 206 return pos == ls.loudsVec.numBits-1 || ls.loudsVec.IsSet(pos+1) 207 } 208 209 type sparseIter struct { 210 valid bool 211 atTerminator bool 212 ls *loudsSparse 213 startLevel uint32 214 startNodeID uint32 215 startDepth uint32 216 level uint32 217 keyBuf []byte 218 posInTrie []uint32 219 nodeID []uint32 220 prefixLen []uint32 221 } 222 223 func (it *sparseIter) Init(ls *loudsSparse) { 224 it.ls = ls 225 it.startLevel = ls.startLevel 226 it.posInTrie = make([]uint32, ls.height-ls.startLevel) 227 it.prefixLen = make([]uint32, ls.height-ls.startLevel) 228 it.nodeID = make([]uint32, ls.height-ls.startLevel) 229 } 230 231 func (it *sparseIter) Next() { 232 it.atTerminator = false 233 pos := it.posInTrie[it.level] + 1 234 nodeID := it.nodeID[it.level] 235 236 for pos >= it.ls.loudsVec.numBits || it.ls.loudsVec.IsSet(pos) { 237 if it.level == 0 { 238 it.valid = false 239 it.keyBuf = it.keyBuf[:0] 240 return 241 } 242 it.level-- 243 pos = it.posInTrie[it.level] + 1 244 nodeID = it.nodeID[it.level] 245 } 246 it.setAt(it.level, pos, nodeID) 247 it.MoveToLeftMostKey() 248 } 249 250 func (it *sparseIter) Prev() { 251 it.atTerminator = false 252 pos := it.posInTrie[it.level] 253 nodeID := it.nodeID[it.level] 254 255 if pos == 0 { 256 it.valid = false 257 return 258 } 259 for it.ls.loudsVec.IsSet(pos) { 260 if it.level == 0 { 261 it.valid = false 262 it.keyBuf = it.keyBuf[:0] 263 return 264 } 265 it.level-- 266 pos = it.posInTrie[it.level] 267 nodeID = it.nodeID[it.level] 268 } 269 it.setAt(it.level, pos-1, nodeID) 270 it.MoveToRightMostKey() 271 } 272 273 func (it *sparseIter) Seek(key []byte) bool { 274 nodeID := it.startNodeID 275 pos := it.ls.firstLabelPos(nodeID) 276 var ok bool 277 depth := it.startDepth 278 279 for it.level = 0; it.level < it.ls.sparseLevels(); it.level++ { 280 prefix := it.ls.prefixVec.GetPrefix(it.ls.prefixID(nodeID)) 281 var prefixCmp int 282 if len(prefix) != 0 { 283 end := int(depth) + len(prefix) 284 if end > len(key) { 285 end = len(key) 286 } 287 prefixCmp = bytes.Compare(prefix, key[depth:end]) 288 } 289 290 if prefixCmp < 0 { 291 if it.level == 0 { 292 it.valid = false 293 return false 294 } 295 it.level-- 296 it.Next() 297 return false 298 } 299 300 depth += uint32(len(prefix)) 301 if depth >= uint32(len(key)) || prefixCmp > 0 { 302 it.append(it.ls.labelVec.GetLabel(pos), pos, nodeID) 303 it.MoveToLeftMostKey() 304 return false 305 } 306 307 nodeSize := it.ls.nodeSize(pos) 308 pos, ok = it.ls.labelVec.Search(key[depth], pos, nodeSize) 309 if !ok { 310 it.moveToLeftInNextSubTrie(pos, nodeID, nodeSize, key[depth]) 311 return false 312 } 313 314 it.append(key[depth], pos, nodeID) 315 316 if !it.ls.hasChildVec.IsSet(pos) { 317 return it.compareSuffixGreaterThan(key, pos, depth+1) 318 } 319 320 nodeID = it.ls.childNodeID(pos) 321 pos = it.ls.firstLabelPos(nodeID) 322 depth++ 323 } 324 325 if it.ls.labelVec.GetLabel(pos) == labelTerminator && !it.ls.hasChildVec.IsSet(pos) && !it.ls.isEndOfNode(pos) { 326 it.append(labelTerminator, pos, nodeID) 327 it.atTerminator = true 328 it.valid = true 329 return false 330 } 331 332 if uint32(len(key)) <= depth { 333 it.MoveToLeftMostKey() 334 return false 335 } 336 337 it.valid = true 338 return true 339 } 340 341 func (it *sparseIter) Key() []byte { 342 if it.atTerminator { 343 return it.keyBuf[:len(it.keyBuf)-1] 344 } 345 return it.keyBuf 346 } 347 348 func (it *sparseIter) Value() []byte { 349 valPos := it.ls.suffixPos(it.posInTrie[it.level]) 350 return it.ls.values.Get(valPos) 351 } 352 353 func (it *sparseIter) Compare(key []byte) int { 354 itKey := it.Key() 355 startDepth := int(it.startDepth) 356 if startDepth > len(key) { 357 panic("dense compare have bug") 358 } 359 if startDepth == len(key) { 360 if len(itKey) == 0 { 361 return 0 362 } 363 return 1 364 } 365 cmpKey := key[startDepth:] 366 367 cmpLen := len(itKey) 368 if cmpLen > len(cmpKey) { 369 cmpLen = len(cmpKey) 370 } 371 cmp := bytes.Compare(itKey[:cmpLen], cmpKey[:cmpLen]) 372 if cmp != 0 { 373 return cmp 374 } 375 if len(itKey) > len(cmpKey) { 376 return 1 377 } 378 if len(itKey) == len(cmpKey) && it.atTerminator { 379 return 0 380 } 381 suffixPos := it.ls.suffixPos(it.posInTrie[it.level]) 382 return it.ls.suffixes.Compare(key, suffixPos, uint32(len(itKey)+startDepth)) 383 } 384 385 func (it *sparseIter) Reset() { 386 it.valid = false 387 it.level = 0 388 it.atTerminator = false 389 it.keyBuf = it.keyBuf[:0] 390 } 391 392 func (it *sparseIter) MoveToLeftMostKey() { 393 if len(it.keyBuf) == 0 { 394 pos := it.ls.firstLabelPos(it.startNodeID) 395 label := it.ls.labelVec.GetLabel(pos) 396 it.append(label, pos, it.startNodeID) 397 } 398 399 pos := it.posInTrie[it.level] 400 label := it.ls.labelVec.GetLabel(pos) 401 402 if !it.ls.hasChildVec.IsSet(pos) { 403 if label == labelTerminator && !it.ls.isEndOfNode(pos) { 404 it.atTerminator = true 405 } 406 it.valid = true 407 return 408 } 409 410 for it.level < it.ls.sparseLevels() { 411 it.level++ 412 nodeID := it.ls.childNodeID(pos) 413 pos = it.ls.firstLabelPos(nodeID) 414 label = it.ls.labelVec.GetLabel(pos) 415 416 if !it.ls.hasChildVec.IsSet(pos) { 417 it.append(label, pos, nodeID) 418 if label == labelTerminator && !it.ls.isEndOfNode(pos) { 419 it.atTerminator = true 420 } 421 it.valid = true 422 return 423 } 424 it.append(label, pos, nodeID) 425 } 426 panic("unreachable") 427 } 428 429 func (it *sparseIter) MoveToRightMostKey() { 430 if len(it.keyBuf) == 0 { 431 pos := it.ls.lastLabelPos(it.startNodeID) 432 label := it.ls.labelVec.GetLabel(pos) 433 it.append(label, pos, it.startNodeID) 434 } 435 436 pos := it.posInTrie[it.level] 437 label := it.ls.labelVec.GetLabel(pos) 438 439 if !it.ls.hasChildVec.IsSet(pos) { 440 if label == labelTerminator && !it.ls.isEndOfNode(pos) { 441 it.atTerminator = true 442 } 443 it.valid = true 444 return 445 } 446 447 for it.level < it.ls.sparseLevels() { 448 it.level++ 449 nodeID := it.ls.childNodeID(pos) 450 pos = it.ls.lastLabelPos(nodeID) 451 label = it.ls.labelVec.GetLabel(pos) 452 453 if !it.ls.hasChildVec.IsSet(pos) { 454 it.append(label, pos, nodeID) 455 if label == labelTerminator && !it.ls.isEndOfNode(pos) { 456 it.atTerminator = true 457 } 458 it.valid = true 459 return 460 } 461 it.append(label, pos, nodeID) 462 } 463 panic("unreachable") 464 } 465 466 func (it *sparseIter) SetToFirstInRoot() { 467 it.append(it.ls.labelVec.GetLabel(0), 0, it.startNodeID) 468 } 469 470 func (it *sparseIter) SetToLastInRoot() { 471 pos := it.ls.lastLabelPos(0) 472 it.append(it.ls.labelVec.GetLabel(pos), pos, it.startNodeID) 473 } 474 475 func (it *sparseIter) append(label byte, pos, nodeID uint32) { 476 prefix := it.ls.prefixVec.GetPrefix(it.ls.prefixID(nodeID)) 477 it.keyBuf = append(it.keyBuf, prefix...) 478 it.keyBuf = append(it.keyBuf, label) 479 it.posInTrie[it.level] = pos 480 it.prefixLen[it.level] = uint32(len(prefix)) + 1 481 if it.level != 0 { 482 it.prefixLen[it.level] += it.prefixLen[it.level-1] 483 } 484 it.nodeID[it.level] = nodeID 485 } 486 487 func (it *sparseIter) setAt(level, pos, nodeID uint32) { 488 it.keyBuf = append(it.keyBuf[:it.prefixLen[level]-1], it.ls.labelVec.GetLabel(pos)) 489 it.posInTrie[it.level] = pos 490 } 491 492 func (it *sparseIter) truncate(level uint32) { 493 it.keyBuf = it.keyBuf[:it.prefixLen[level]] 494 } 495 496 func (it *sparseIter) moveToLeftInNextSubTrie(pos, nodeID, nodeSize uint32, label byte) { 497 pos, ok := it.ls.labelVec.SearchGreaterThan(label, pos, nodeSize) 498 it.append(it.ls.labelVec.GetLabel(pos), pos, nodeID) 499 if ok { 500 it.MoveToLeftMostKey() 501 } else { 502 it.Next() 503 } 504 } 505 506 func (it *sparseIter) compareSuffixGreaterThan(key []byte, pos, level uint32) bool { 507 cmp := it.ls.suffixes.Compare(key, it.ls.suffixPos(pos), level) 508 if cmp < 0 { 509 it.Next() 510 return false 511 } 512 it.valid = true 513 return cmp == couldBePositive 514 }