github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/bitrie.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitpage 16 17 import ( 18 "bytes" 19 "container/list" 20 "encoding/binary" 21 "fmt" 22 "sort" 23 ) 24 25 const ( 26 BitrieVersion = 1 27 BitrieHeaderSize = 18 28 BitrieKeySize = 1 29 BitrieIndexSize = 4 30 PruneKeySize = 2 31 PruneKeyValFlag = 1 32 ) 33 34 const ( 35 InternalKindKeyPrune uint8 = 1 36 InternalKindHasValue uint8 = 2 37 InternalKindHasChildrenL1 uint8 = 4 38 InternalKindHasChildrenL2 uint8 = 8 39 InternalKindHasChildrenL3 uint8 = 16 40 41 KindChildrenL1Step uint32 = 65536 42 KindChildrenL2Step uint32 = 16777216 43 ) 44 45 type Header struct { 46 version uint16 47 reserved uint16 48 keyOffset uint16 49 indexOffset uint32 50 dataOffset uint32 51 size uint32 52 } 53 54 type Bitrie struct { 55 header Header 56 length uint32 57 data []byte 58 children map[uint8]*trienode 59 } 60 61 type trienode struct { 62 key uint8 63 prune []byte 64 value []byte 65 children map[uint8]*trienode 66 } 67 68 type disknode struct { 69 prune []byte 70 value []byte 71 childCount uint8 72 childIndex uint32 73 } 74 75 func NewBitrie() *Bitrie { 76 root := &Bitrie{ 77 header: Header{version: BitrieVersion}, 78 length: 1, 79 data: nil, 80 children: nil, 81 } 82 83 return root 84 } 85 86 func (bt *Bitrie) InitWriter() { 87 bt.length = 1 88 bt.children = make(map[uint8]*trienode, 1<<10) 89 } 90 91 func (bt *Bitrie) SetReader(d []byte, offset uint32) bool { 92 if d == nil { 93 return false 94 } 95 96 bt.data = d 97 bt.header = bt.readHeader(bt.data[offset:]) 98 99 return len(d) >= int(bt.header.size) 100 } 101 102 func (bt *Bitrie) Size() uint32 { 103 return bt.header.size 104 } 105 106 func (bt *Bitrie) Add(key []byte, value []byte) { 107 keyLength := len(key) 108 if keyLength <= 0 || len(value) <= 0 { 109 return 110 } 111 112 var ok bool 113 var childNode *trienode 114 115 children := bt.children 116 117 for i := 0; i < keyLength; i++ { 118 if childNode, ok = children[key[i]]; !ok { 119 newNode := &trienode{ 120 key: key[i], 121 prune: key[i+1:], 122 value: value, 123 children: make(map[byte]*trienode, 1<<3), 124 } 125 children[key[i]] = newNode 126 bt.length += 1 127 break 128 } else if pruneKeyLength := len(childNode.prune); pruneKeyLength > 0 { 129 m := 0 130 n := i + 1 131 for m < pruneKeyLength && n < keyLength { 132 if childNode.prune[m] != key[n] { 133 break 134 } 135 136 m++ 137 n++ 138 } 139 140 tailKeyLength := keyLength - i - 1 141 if m == 0 { 142 if pruneKeyLength > tailKeyLength { 143 if n <= keyLength-1 { 144 bt.newPruneChildByNode(childNode, m) 145 bt.newPruneChildByKey(key[n:], value, childNode) 146 childNode.prune = nil 147 bt.length += 2 148 } else if n > keyLength-1 { 149 bt.newPruneChildByNode(childNode, m) 150 childNode.prune = nil 151 childNode.value = value 152 bt.length += 1 153 } 154 } else if pruneKeyLength == tailKeyLength { 155 if n <= keyLength-1 { 156 bt.newPruneChildByNode(childNode, m) 157 bt.newPruneChildByKey(key[n:], value, childNode) 158 childNode.prune = nil 159 bt.length += 2 160 } else if n > keyLength-1 { 161 childNode.value = value 162 } 163 } else if pruneKeyLength < tailKeyLength { 164 bt.newPruneChildByNode(childNode, m) 165 bt.newPruneChildByKey(key[n:], value, childNode) 166 childNode.prune = nil 167 bt.length += 2 168 } 169 break 170 } else if m > 0 { 171 if pruneKeyLength > tailKeyLength { 172 if n <= keyLength-1 { 173 bt.newPruneChildByNode(childNode, m) 174 bt.newPruneChildByKey(key[n:], value, childNode) 175 childNode.prune = childNode.prune[:m] 176 bt.length += 2 177 } else if n > keyLength-1 { 178 bt.newPruneChildByNode(childNode, m) 179 childNode.value = value 180 childNode.prune = childNode.prune[:m] 181 bt.length += 1 182 } 183 break 184 } else if pruneKeyLength == tailKeyLength { 185 if n <= keyLength-1 { 186 bt.newPruneChildByNode(childNode, m) 187 bt.newPruneChildByKey(key[n:], value, childNode) 188 childNode.prune = childNode.prune[:m] 189 bt.length += 2 190 } else if n > keyLength-1 { 191 childNode.value = value 192 } 193 break 194 } else if pruneKeyLength < tailKeyLength { 195 if m <= pruneKeyLength-1 { 196 bt.newPruneChildByNode(childNode, m) 197 bt.newPruneChildByKey(key[n:], value, childNode) 198 childNode.prune = childNode.prune[:m] 199 bt.length += 2 200 break 201 } else if m > pruneKeyLength-1 { 202 i += m 203 } 204 } 205 } 206 } 207 208 children = childNode.children 209 } 210 } 211 212 func (bt *Bitrie) Finish() { 213 bt.children = nil 214 } 215 216 func (bt *Bitrie) Serialize( 217 tblalloc func(uint32) uint32, 218 tblbytes func(uint32, uint32) []byte, 219 tblsize func() uint32) bool { 220 if bt.length <= 0 { 221 return false 222 } 223 224 itemIndex := uint32(1) 225 226 headerOffset := tblalloc(BitrieHeaderSize + bt.length) 227 keyOffset := headerOffset + BitrieHeaderSize 228 229 idxSize := bt.length * BitrieIndexSize 230 indexOffset := tblalloc(idxSize) 231 232 dataOffset := indexOffset + idxSize 233 234 bt.header.keyOffset = uint16(keyOffset) 235 bt.header.indexOffset = indexOffset 236 bt.header.dataOffset = dataOffset 237 238 wrBuf := make([]byte, 256<<10) 239 dkNode := disknode{ 240 prune: nil, 241 value: nil, 242 childCount: 0, 243 childIndex: 0, 244 } 245 246 if len(bt.children) > 0 { 247 bt.writeKey(tblbytes(keyOffset, BitrieKeySize), 0) 248 keyOffset += BitrieKeySize 249 250 bt.writeIndex(tblbytes(indexOffset, BitrieIndexSize), dataOffset) 251 indexOffset += BitrieIndexSize 252 253 dkNode.childIndex = itemIndex 254 dkNode.childCount = uint8(len(bt.children) - 1) 255 256 wbuf, wsize := bt.writeNode(wrBuf[0:], &dkNode) 257 offset := tblalloc(wsize) 258 copy(tblbytes(offset, wsize), wbuf) 259 dataOffset += wsize 260 } else { 261 return false 262 } 263 264 Queue := list.New() 265 bt.pushQueue(Queue, bt.children) 266 267 for Queue.Len() > 0 { 268 elem := Queue.Front() 269 node := elem.Value.(*trienode) 270 271 bt.writeKey(tblbytes(keyOffset, BitrieKeySize), node.key) 272 keyOffset += BitrieKeySize 273 274 bt.writeIndex(tblbytes(indexOffset, BitrieIndexSize), dataOffset) 275 276 dkNode.prune = node.prune 277 dkNode.value = node.value 278 if len(node.children) > 0 { 279 dkNode.childIndex = itemIndex + uint32(Queue.Len()) 280 dkNode.childCount = uint8(len(node.children) - 1) 281 } else { 282 dkNode.childIndex = 0 283 dkNode.childCount = 0 284 } 285 itemIndex++ 286 287 wbuf, wsize := bt.writeNode(wrBuf[0:], &dkNode) 288 offset := tblalloc(wsize) 289 copy(tblbytes(offset, wsize), wbuf) 290 291 indexOffset += BitrieIndexSize 292 dataOffset += wsize 293 294 bt.pushQueue(Queue, node.children) 295 Queue.Remove(elem) 296 } 297 298 bt.header.size = tblsize() 299 bt.writeHeader(tblbytes(headerOffset, BitrieHeaderSize), bt.header) 300 301 return true 302 } 303 304 func (bt *Bitrie) Get(key []byte) ([]byte, bool) { 305 keyOffset := uint32(bt.header.keyOffset) 306 indexOffset := bt.header.indexOffset 307 308 node := bt.readNode(bt.data[bt.header.dataOffset:], 0) 309 childCount := node.childCount 310 childIndex := node.childIndex 311 312 keyLength := len(key) 313 for i := 0; i < keyLength; i++ { 314 tmpChildCount := uint32(childCount) 315 if childIndex > 0 { 316 tmpChildCount++ 317 } 318 319 find, childPos := bt.findNode(key[i], bt.data[keyOffset+childIndex:], tmpChildCount) 320 if find { 321 childIndex += childPos 322 nsize, offset := bt.getNodeSizeAndOffset(bt.data, indexOffset+childIndex*BitrieIndexSize) 323 node = bt.readNode(bt.data[offset:], nsize) 324 325 valueLength := len(node.value) 326 pruneLength := len(node.prune) 327 if pruneLength > 0 { 328 curKeyPos := i + 1 + pruneLength 329 if curKeyPos <= keyLength && bytes.Equal(node.prune, key[i+1:curKeyPos]) { 330 i += pruneLength 331 if i == keyLength-1 && valueLength > 0 { 332 return node.value, true 333 } else { 334 childCount = node.childCount 335 childIndex = node.childIndex 336 continue 337 } 338 } else { 339 return nil, false 340 } 341 } 342 childCount = node.childCount 343 childIndex = node.childIndex 344 345 if i == keyLength-1 && valueLength > 0 { 346 return node.value, true 347 } 348 } else { 349 return nil, false 350 } 351 } 352 353 return nil, false 354 } 355 356 func (bt *Bitrie) ToBytes() []byte { 357 buf := make([]byte, 0, 1024) 358 359 queue := list.New() 360 bt.pushQueue(queue, bt.children) 361 362 for queue.Len() > 0 { 363 elem := queue.Front() 364 node := elem.Value.(*trienode) 365 366 buf = append(buf, fmt.Sprintf("key=%c; prune=%s; value=%s; ", node.key, node.prune, node.value)...) 367 if len(node.children) > 0 { 368 buf = append(buf, fmt.Sprintf("children[%d]=[", len(node.children))...) 369 for k, _ := range node.children { 370 buf = append(buf, fmt.Sprintf("k=%c, ", k)...) 371 } 372 buf = append(buf, "]\n"...) 373 } else { 374 buf = append(buf, "children=[0]\n"...) 375 } 376 377 bt.pushQueue(queue, node.children) 378 queue.Remove(elem) 379 } 380 381 return buf 382 } 383 384 func (bt *Bitrie) AnalyzeBytes() []byte { 385 buf := make([]byte, 0, 10<<10) 386 387 keyOffset := uint32(bt.header.keyOffset) 388 indexOffset := bt.header.indexOffset 389 390 key := uint8(0) 391 offset_next := uint32(0) 392 393 count := indexOffset - keyOffset 394 buf = append(buf, fmt.Sprintf("Header version=%d; keyOffset=%d; indexOffset=%d; dataOffset=%d; itemCount=%d; size=%d\n", bt.header.version, keyOffset, indexOffset, bt.header.dataOffset, count, bt.header.size)...) 395 for i := uint32(0); i < count; i++ { 396 tmp_kpos := keyOffset + i 397 tmp_ipos := indexOffset + BitrieIndexSize*i 398 tmp_dpos := binary.BigEndian.Uint32(bt.data[tmp_ipos:]) 399 if i == count-1 { 400 offset_next = bt.header.size 401 } else { 402 offset_next = binary.BigEndian.Uint32(bt.data[tmp_ipos+BitrieIndexSize:]) 403 } 404 node := bt.readNode(bt.data[tmp_dpos:], offset_next-tmp_dpos) 405 406 key = bt.data[tmp_kpos] 407 if key == 0 { 408 key = ' ' 409 } 410 411 if len(node.prune) == 0 { 412 node.prune = []byte(" ") 413 } 414 if len(node.value) == 0 { 415 node.value = []byte(" ") 416 } 417 418 tmpChildCount := uint32(node.childCount) 419 if node.childIndex > 0 { 420 tmpChildCount++ 421 } 422 423 buf = append(buf, fmt.Sprintf("Item-%d keyOffset=%d; indexOffset=%d; dataOffset=%d; node.key=%c; node.prune=%s; node.value=%s; node.childCount=%v; node.childIndex=%v\n", i, tmp_ipos, tmp_ipos, tmp_dpos, key, node.prune, node.value, tmpChildCount, node.childIndex)...) 424 } 425 426 return buf 427 } 428 429 func (bt *Bitrie) newPruneChildByNode(node *trienode, offset int) *trienode { 430 pruneKeyLen := len(node.prune) - 1 431 if offset > pruneKeyLen { 432 return nil 433 } 434 435 newNode := &trienode{ 436 key: node.prune[offset], 437 value: node.value, 438 } 439 440 if len(node.children) > 0 { 441 newNode.children = node.children 442 node.children = make(map[byte]*trienode, 1<<3) 443 } else { 444 newNode.children = make(map[byte]*trienode, 1<<3) 445 } 446 447 node.children[newNode.key] = newNode 448 449 if offset < pruneKeyLen { 450 newNode.prune = node.prune[offset+1:] 451 } else { 452 newNode.prune = nil 453 } 454 455 node.value = nil 456 457 return newNode 458 } 459 460 func (bt *Bitrie) newPruneChildByKey(key []byte, value []byte, node *trienode) *trienode { 461 newNode := &trienode{ 462 key: key[0], 463 prune: key[1:], 464 value: value, 465 children: make(map[byte]*trienode, 1<<3), 466 } 467 468 node.children[newNode.key] = newNode 469 470 return newNode 471 } 472 473 func (bt *Bitrie) readHeader(buf []byte) Header { 474 header := Header{ 475 version: binary.BigEndian.Uint16(buf[0:]), 476 reserved: binary.BigEndian.Uint16(buf[2:]), 477 keyOffset: binary.BigEndian.Uint16(buf[4:]), 478 indexOffset: binary.BigEndian.Uint32(buf[6:]), 479 dataOffset: binary.BigEndian.Uint32(buf[10:]), 480 size: binary.BigEndian.Uint32(buf[14:]), 481 } 482 483 return header 484 } 485 486 func (bt *Bitrie) writeHeader(buf []byte, header Header) { 487 binary.BigEndian.PutUint16(buf[0:], header.version) 488 binary.BigEndian.PutUint16(buf[2:], header.reserved) 489 binary.BigEndian.PutUint16(buf[4:], header.keyOffset) 490 binary.BigEndian.PutUint32(buf[6:], header.indexOffset) 491 binary.BigEndian.PutUint32(buf[10:], header.dataOffset) 492 binary.BigEndian.PutUint32(buf[14:], header.size) 493 } 494 495 func (bt *Bitrie) getNodeSizeAndOffset(buf []byte, offset uint32) (uint32, uint32) { 496 lo := binary.BigEndian.Uint32(buf[offset:]) 497 498 var ro uint32 499 offsetNext := offset + BitrieIndexSize 500 if offsetNext < bt.header.dataOffset { 501 ro = binary.BigEndian.Uint32(buf[offsetNext:]) 502 } else { 503 ro = bt.header.size 504 } 505 506 return ro - lo, lo 507 } 508 509 func (bt *Bitrie) readNode(buf []byte, size uint32) disknode { 510 dkNode := disknode{ 511 prune: nil, 512 value: nil, 513 childCount: 0, 514 childIndex: 0, 515 } 516 517 kind := buf[0] 518 offset := uint32(1) 519 520 if kind&InternalKindKeyPrune == InternalKindKeyPrune { 521 ksize := uint32(binary.BigEndian.Uint16(buf[offset:])) 522 offset += 2 523 dkNode.prune = buf[offset : offset+ksize] 524 offset += ksize 525 } 526 527 if kind&InternalKindHasChildrenL1 == InternalKindHasChildrenL1 { 528 dkNode.childCount = buf[offset] 529 offset += 1 530 dkNode.childIndex = uint32(binary.BigEndian.Uint16(buf[offset:])) 531 offset += 2 532 } else if kind&InternalKindHasChildrenL2 == InternalKindHasChildrenL2 { 533 childIndex := binary.BigEndian.Uint32(buf[offset:]) 534 dkNode.childCount = uint8(childIndex & 0xff) 535 dkNode.childIndex = childIndex >> 8 536 offset += 4 537 } else if kind&InternalKindHasChildrenL3 == InternalKindHasChildrenL3 { 538 dkNode.childCount = buf[offset] 539 offset += 1 540 dkNode.childIndex = binary.BigEndian.Uint32(buf[offset:]) 541 offset += 4 542 } 543 544 if kind&InternalKindHasValue == InternalKindHasValue && offset < size { 545 dkNode.value = buf[offset:size] 546 } 547 548 return dkNode 549 } 550 551 func (bt *Bitrie) writeKey(buf []byte, key uint8) { 552 buf[0] = key 553 } 554 555 func (bt *Bitrie) writeIndex(buf []byte, idx uint32) { 556 binary.BigEndian.PutUint32(buf[0:], idx) 557 } 558 559 func (bt *Bitrie) writeNode(buf []byte, dkNode *disknode) ([]byte, uint32) { 560 kind := uint8(0) 561 offset := uint32(1) 562 563 pruneLength := uint32(len(dkNode.prune)) 564 if pruneLength > 0 { 565 kind |= InternalKindKeyPrune 566 binary.BigEndian.PutUint16(buf[offset:], uint16(pruneLength)) 567 offset += 2 568 copy(buf[offset:offset+pruneLength], dkNode.prune) 569 offset += pruneLength 570 } 571 572 if dkNode.childIndex > 0 { 573 if dkNode.childIndex < KindChildrenL1Step { 574 kind |= InternalKindHasChildrenL1 575 buf[offset] = dkNode.childCount 576 offset += 1 577 binary.BigEndian.PutUint16(buf[offset:], uint16(dkNode.childIndex)) 578 offset += 2 579 } else if dkNode.childIndex < KindChildrenL2Step { 580 kind |= InternalKindHasChildrenL2 581 childIndex := dkNode.childIndex<<8 | uint32(dkNode.childCount) 582 binary.BigEndian.PutUint32(buf[offset:], childIndex) 583 offset += 4 584 } else { 585 kind |= InternalKindHasChildrenL3 586 buf[offset] = dkNode.childCount 587 offset += 1 588 binary.BigEndian.PutUint32(buf[offset:], dkNode.childIndex) 589 offset += 4 590 } 591 } 592 593 valueLength := uint32(len(dkNode.value)) 594 if valueLength > 0 { 595 kind |= InternalKindHasValue 596 copy(buf[offset:offset+valueLength], dkNode.value) 597 offset += valueLength 598 } 599 600 buf[0] = kind 601 602 return buf[0:offset], offset 603 } 604 605 func (bt *Bitrie) findNode(key uint8, buf []byte, n uint32) (bool, uint32) { 606 i, j := uint32(0), n 607 for i < j { 608 h := (i + j) >> 1 609 if buf[h] < key { 610 i = h + 1 611 } else { 612 j = h 613 } 614 } 615 616 if i < n && buf[i] == key { 617 return true, i 618 } 619 620 return false, 0 621 } 622 623 func (bt *Bitrie) pushQueue(queue *list.List, children map[uint8]*trienode) { 624 childCount := len(children) 625 if childCount <= 0 { 626 return 627 } 628 629 sortedKeys := make([]int, 0, childCount) 630 631 for k, _ := range children { 632 sortedKeys = append(sortedKeys, int(k)) 633 } 634 635 sort.Ints(sortedKeys) 636 637 for _, v := range sortedKeys { 638 queue.PushBack(children[uint8(v)]) 639 } 640 }