github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/bindex/succinct_map.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bindex 16 17 import ( 18 "arena" 19 "encoding/binary" 20 "sort" 21 ) 22 23 const ( 24 SuccinctVersion = 1 25 SuccinctShardItemMax = 1024 26 SuccinctHeaderSize = 8 27 SuccinctShardSize = 8 28 SuccinctItem32Size = 8 29 SuccinctItem64Size = 12 30 ) 31 32 type SuccinctMap struct { 33 header Header 34 size uint32 35 length uint32 36 type32 bool 37 data []byte 38 data32 []Item32Array 39 data64 []Item64Array 40 arena *arena.Arena 41 } 42 43 type Header struct { 44 version uint16 45 reserved uint16 46 shards uint32 47 } 48 49 type Shard struct { 50 offset uint32 51 length uint32 52 } 53 54 type Item32 struct { 55 key uint32 56 value uint32 57 } 58 59 type Item64 struct { 60 key uint32 61 value uint64 62 } 63 64 type Item32Array []Item32 65 66 func (i32 Item32Array) Len() int { 67 return len(i32) 68 } 69 70 func (i32 Item32Array) Swap(i, j int) { 71 i32[i], i32[j] = i32[j], i32[i] 72 } 73 74 func (i32 Item32Array) Less(i, j int) bool { 75 return i32[i].key < i32[j].key 76 } 77 78 type Item64Array []Item64 79 80 func (i64 Item64Array) Len() int { 81 return len(i64) 82 } 83 84 func (i64 Item64Array) Swap(i, j int) { 85 i64[i], i64[j] = i64[j], i64[i] 86 } 87 88 func (i64 Item64Array) Less(i, j int) bool { 89 return i64[i].key < i64[j].key 90 } 91 92 func NewSuccinctMap(type32 bool) *SuccinctMap { 93 m := &SuccinctMap{ 94 header: Header{version: SuccinctVersion, reserved: 0, shards: 0}, 95 size: 0, 96 length: 0, 97 type32: type32, 98 data: nil, 99 data32: nil, 100 data64: nil, 101 arena: nil, 102 } 103 104 return m 105 } 106 107 func (s *SuccinctMap) Size() uint32 { 108 return s.size 109 } 110 111 func (s *SuccinctMap) Length() uint32 { 112 return s.length 113 } 114 115 func (s *SuccinctMap) GetData() []byte { 116 return s.data 117 } 118 119 func (s *SuccinctMap) SetReader(d []byte) bool { 120 if d == nil || len(d) <= SuccinctHeaderSize { 121 return false 122 } 123 124 s.data = d 125 s.header = s.readHeader(s.data) 126 127 return true 128 } 129 130 func (s *SuccinctMap) InitWriter(count uint32) { 131 shards := (count / SuccinctShardItemMax) + 1<<4 132 133 s.header = Header{version: SuccinctVersion, reserved: 0, shards: shards} 134 s.size = SuccinctHeaderSize + shards*SuccinctShardSize 135 s.length = count 136 s.data = nil 137 s.arena = arena.NewArena() 138 139 if s.type32 { 140 s.data32 = arena.MakeSlice[Item32Array](s.arena, int(shards), int(shards)) 141 } else { 142 s.data64 = arena.MakeSlice[Item64Array](s.arena, int(shards), int(shards)) 143 } 144 } 145 146 func (s *SuccinctMap) SetWriter(d []byte) bool { 147 if d == nil || len(d) < int(s.size) || cap(d) < int(s.size) { 148 return false 149 } 150 151 s.data = d 152 153 return true 154 } 155 156 func (s *SuccinctMap) Store(key uint32, value any) { 157 switch value.(type) { 158 case uint32: 159 if s.type32 { 160 s.store32Internal(key, value.(uint32)) 161 } 162 return 163 case uint64: 164 if !s.type32 { 165 s.store64Internal(key, value.(uint64)) 166 } 167 return 168 default: 169 return 170 } 171 } 172 173 func (s *SuccinctMap) Add(key uint32, value any) { 174 switch value.(type) { 175 case uint32: 176 if s.type32 { 177 s.add32Internal(key, value.(uint32)) 178 } 179 return 180 case uint64: 181 if !s.type32 { 182 s.add64Internal(key, value.(uint64)) 183 } 184 return 185 default: 186 return 187 } 188 } 189 190 func (s *SuccinctMap) Serialize() bool { 191 if s.type32 { 192 return s.serialize32Internal() 193 } else { 194 return s.serialize64Internal() 195 } 196 } 197 198 func (s *SuccinctMap) Load(key uint32) (any, bool) { 199 if s.type32 { 200 return s.load32Internal(key) 201 } else { 202 return s.load64Internal(key) 203 } 204 } 205 206 func (s *SuccinctMap) Get(key uint32) (any, bool) { 207 if s.type32 { 208 return s.get32Internal(key) 209 } else { 210 return s.get64Internal(key) 211 } 212 } 213 214 func (s *SuccinctMap) store32Internal(key uint32, value uint32) { 215 if s.header.shards <= 0 { 216 return 217 } 218 219 sid := key % s.header.shards 220 221 if len(s.data32[sid]) == 0 { 222 s.data32[sid] = arena.MakeSlice[Item32](s.arena, 0, SuccinctShardItemMax/2) 223 } 224 225 itemArray := &s.data32[sid] 226 227 index := sort.Search(len(*itemArray), 228 func(i int) bool { 229 var ret int 230 if (*itemArray)[i].key == key { 231 ret = 0 232 } else if (*itemArray)[i].key < key { 233 ret = -1 234 } else { 235 ret = 1 236 } 237 return ret != -1 238 }, 239 ) 240 241 exist := len(*itemArray) > 0 && index < len(*itemArray) && (*itemArray)[index].key == key 242 if !exist { 243 *itemArray = append(*itemArray, Item32{}) 244 copy((*itemArray)[index+1:], (*itemArray)[index:]) 245 } 246 247 item := &(*itemArray)[index] 248 item.key = key 249 item.value = value 250 251 s.size += SuccinctItem32Size 252 } 253 254 func (s *SuccinctMap) store64Internal(key uint32, value uint64) { 255 if s.header.shards <= 0 { 256 return 257 } 258 259 sid := key % s.header.shards 260 261 if len(s.data64[sid]) == 0 { 262 s.data64[sid] = arena.MakeSlice[Item64](s.arena, 0, SuccinctShardItemMax/2) 263 } 264 265 itemArray := &s.data64[sid] 266 267 index := sort.Search(len(*itemArray), 268 func(i int) bool { 269 var ret int 270 if (*itemArray)[i].key == key { 271 ret = 0 272 } else if (*itemArray)[i].key < key { 273 ret = -1 274 } else { 275 ret = 1 276 } 277 return ret != -1 278 }, 279 ) 280 281 exist := len(*itemArray) > 0 && index < len(*itemArray) && (*itemArray)[index].key == key 282 if !exist { 283 *itemArray = append(*itemArray, Item64{}) 284 copy((*itemArray)[index+1:], (*itemArray)[index:]) 285 } 286 287 item := &(*itemArray)[index] 288 item.key = key 289 item.value = value 290 291 s.size += SuccinctItem64Size 292 } 293 294 func (s *SuccinctMap) add32Internal(key uint32, value uint32) { 295 if s.header.shards <= 0 { 296 return 297 } 298 299 sid := key % s.header.shards 300 301 if len(s.data32[sid]) == 0 { 302 s.data32[sid] = arena.MakeSlice[Item32](s.arena, 0, SuccinctShardItemMax) 303 } 304 305 s.data32[sid] = append(s.data32[sid], Item32{key: key, value: value}) 306 307 s.size += SuccinctItem32Size 308 } 309 310 func (s *SuccinctMap) add64Internal(key uint32, value uint64) { 311 if s.header.shards <= 0 { 312 return 313 } 314 315 sid := key % s.header.shards 316 317 if len(s.data64[sid]) == 0 { 318 s.data64[sid] = arena.MakeSlice[Item64](s.arena, 0, SuccinctShardItemMax) 319 } 320 321 s.data64[sid] = append(s.data64[sid], Item64{key: key, value: value}) 322 323 s.size += SuccinctItem64Size 324 } 325 326 func (s *SuccinctMap) serialize32Internal() bool { 327 if s.size <= SuccinctHeaderSize || s.length <= 0 || len(s.data32) <= 0 { 328 return false 329 } 330 331 shardOffset := uint32(0) 332 itemOffset := SuccinctHeaderSize + s.header.shards*SuccinctShardSize 333 334 if s.data == nil { 335 s.data = arena.MakeSlice[byte](s.arena, int(s.size), int(s.size)) 336 } 337 338 s.writeHeader(s.data[shardOffset:], s.header) 339 shardOffset += SuccinctHeaderSize 340 341 for i := uint32(0); i < s.header.shards; i++ { 342 itemsLen := uint32(len(s.data32[i])) 343 s.writeShard(s.data[shardOffset:], Shard{offset: itemOffset, length: itemsLen}) 344 shardOffset += SuccinctShardSize 345 346 if itemsLen <= 0 { 347 continue 348 } 349 350 sort.Sort(s.data32[i]) 351 for j := uint32(0); j < itemsLen; j++ { 352 s.writeItem32(s.data[itemOffset:], s.data32[i][j]) 353 itemOffset += SuccinctItem32Size 354 } 355 } 356 357 return true 358 } 359 360 func (s *SuccinctMap) serialize64Internal() bool { 361 if s.size <= SuccinctHeaderSize || s.length <= 0 || len(s.data64) <= 0 { 362 return false 363 } 364 365 shardOffset := uint32(0) 366 itemOffset := SuccinctHeaderSize + s.header.shards*SuccinctShardSize 367 368 if s.data == nil { 369 s.data = arena.MakeSlice[byte](s.arena, int(s.size), int(s.size)) 370 } 371 372 s.writeHeader(s.data[shardOffset:], s.header) 373 shardOffset += SuccinctHeaderSize 374 375 for i := uint32(0); i < s.header.shards; i++ { 376 itemsLen := uint32(len(s.data64[i])) 377 s.writeShard(s.data[shardOffset:], Shard{offset: itemOffset, length: itemsLen}) 378 shardOffset += SuccinctShardSize 379 380 if itemsLen <= 0 { 381 continue 382 } 383 384 sort.Sort(s.data64[i]) 385 for j := uint32(0); j < itemsLen; j++ { 386 s.writeItem64(s.data[itemOffset:], s.data64[i][j]) 387 itemOffset += SuccinctItem64Size 388 } 389 } 390 391 return true 392 } 393 394 func (s *SuccinctMap) load32Internal(key uint32) (uint32, bool) { 395 if len(s.data32) < int(s.header.shards) || s.header.shards <= 0 { 396 return 0, false 397 } 398 399 sid := key % s.header.shards 400 401 if len(s.data32[sid]) == 0 { 402 return 0, false 403 } 404 405 itemArray := &s.data32[sid] 406 407 ok, idx := s.findItem32Arr(key, *itemArray, len(*itemArray)) 408 if !ok { 409 return 0, false 410 } 411 412 return (*itemArray)[idx].value, true 413 } 414 415 func (s *SuccinctMap) load64Internal(key uint32) (uint64, bool) { 416 if len(s.data64) < int(s.header.shards) || s.header.shards <= 0 { 417 return 0, false 418 } 419 420 sid := key % s.header.shards 421 422 if len(s.data64[sid]) == 0 { 423 return 0, false 424 } 425 426 itemArray := &s.data64[sid] 427 428 ok, idx := s.findItem64Arr(key, *itemArray, len(*itemArray)) 429 if !ok { 430 return 0, false 431 } 432 433 return (*itemArray)[idx].value, true 434 } 435 436 func (s *SuccinctMap) get32Internal(key uint32) (uint32, bool) { 437 if len(s.data) <= SuccinctHeaderSize || s.header.shards <= 0 { 438 return 0, false 439 } 440 441 sid := key % s.header.shards 442 curOffset := SuccinctHeaderSize + sid*SuccinctShardSize 443 444 shard := s.readShard(s.data[curOffset:]) 445 if shard.length <= 0 { 446 return 0, false 447 } 448 449 curOffset = shard.offset 450 451 ok, idx := s.findItem32(key, s.data[curOffset:], int(shard.length)) 452 if !ok { 453 return 0, false 454 } 455 456 curOffset += uint32(idx * SuccinctItem32Size) 457 item32 := s.readItem32(s.data[curOffset:]) 458 459 return item32.value, true 460 } 461 462 func (s *SuccinctMap) get64Internal(key uint32) (uint64, bool) { 463 if len(s.data) <= SuccinctHeaderSize || s.header.shards <= 0 { 464 return 0, false 465 } 466 467 sid := key % s.header.shards 468 curOffset := SuccinctHeaderSize + sid*SuccinctShardSize 469 470 shard := s.readShard(s.data[curOffset:]) 471 if shard.length <= 0 { 472 return 0, false 473 } 474 475 curOffset = shard.offset 476 477 ok, idx := s.findItem64(key, s.data[curOffset:], int(shard.length)) 478 if !ok { 479 return 0, false 480 } 481 482 curOffset += uint32(idx * SuccinctItem64Size) 483 item64 := s.readItem64(s.data[curOffset:]) 484 485 return item64.value, true 486 } 487 488 func (s *SuccinctMap) Finish() { 489 s.size = SuccinctHeaderSize 490 s.length = 0 491 s.data32 = nil 492 s.data64 = nil 493 if s.arena != nil { 494 s.arena.Free() 495 s.arena = nil 496 } 497 } 498 499 func (s *SuccinctMap) writeHeader(buf []byte, header Header) { 500 binary.BigEndian.PutUint16(buf[0:], header.version) 501 binary.BigEndian.PutUint16(buf[2:], header.reserved) 502 binary.BigEndian.PutUint32(buf[4:], header.shards) 503 } 504 505 func (s *SuccinctMap) writeShard(buf []byte, shard Shard) { 506 binary.BigEndian.PutUint32(buf[0:], shard.offset) 507 binary.BigEndian.PutUint32(buf[4:], shard.length) 508 } 509 510 func (s *SuccinctMap) writeItem32(buf []byte, item32 Item32) { 511 binary.BigEndian.PutUint32(buf[0:], item32.key) 512 binary.BigEndian.PutUint32(buf[4:], item32.value) 513 } 514 515 func (s *SuccinctMap) writeItem64(buf []byte, item64 Item64) { 516 binary.BigEndian.PutUint32(buf[0:], item64.key) 517 binary.BigEndian.PutUint64(buf[4:], item64.value) 518 } 519 520 func (s *SuccinctMap) readHeader(buf []byte) Header { 521 header := Header{ 522 version: binary.BigEndian.Uint16(buf[0:]), 523 reserved: binary.BigEndian.Uint16(buf[2:]), 524 shards: binary.BigEndian.Uint32(buf[4:]), 525 } 526 527 return header 528 } 529 530 func (s *SuccinctMap) readShard(buf []byte) Shard { 531 shard := Shard{ 532 offset: binary.BigEndian.Uint32(buf[0:]), 533 length: binary.BigEndian.Uint32(buf[4:]), 534 } 535 536 return shard 537 } 538 539 func (s *SuccinctMap) readItem32(buf []byte) Item32 { 540 item32 := Item32{ 541 key: binary.BigEndian.Uint32(buf[0:]), 542 value: binary.BigEndian.Uint32(buf[4:]), 543 } 544 545 return item32 546 } 547 548 func (s *SuccinctMap) readItem64(buf []byte) Item64 { 549 item64 := Item64{ 550 key: binary.BigEndian.Uint32(buf[0:]), 551 value: binary.BigEndian.Uint64(buf[4:]), 552 } 553 554 return item64 555 } 556 557 func (s *SuccinctMap) findItem32(key uint32, buf []byte, n int) (bool, int) { 558 i, j := 0, n 559 for i < j { 560 h := int(uint(i+j) >> 1) 561 if binary.BigEndian.Uint32(buf[SuccinctItem32Size*h:]) < key { 562 i = h + 1 563 } else { 564 j = h 565 } 566 } 567 568 if i < n && binary.BigEndian.Uint32(buf[SuccinctItem32Size*i:]) == key { 569 return true, i 570 } 571 572 return false, 0 573 } 574 575 func (s *SuccinctMap) findItem32Arr(key uint32, arr Item32Array, n int) (bool, int) { 576 i, j := 0, n 577 for i < j { 578 h := int(uint(i+j) >> 1) 579 if arr[h].key < key { 580 i = h + 1 581 } else { 582 j = h 583 } 584 } 585 586 if i < n && arr[i].key == key { 587 return true, i 588 } 589 590 return false, 0 591 } 592 593 func (s *SuccinctMap) findItem64Arr(key uint32, arr Item64Array, n int) (bool, int) { 594 i, j := 0, n 595 for i < j { 596 h := int(uint(i+j) >> 1) 597 if arr[h].key < key { 598 i = h + 1 599 } else { 600 j = h 601 } 602 } 603 604 if i < n && arr[i].key == key { 605 return true, i 606 } 607 608 return false, 0 609 } 610 611 func (s *SuccinctMap) findItem64(key uint32, buf []byte, n int) (bool, int) { 612 i, j := 0, n 613 for i < j { 614 h := int(uint(i+j) >> 1) 615 if binary.BigEndian.Uint32(buf[SuccinctItem64Size*h:]) < key { 616 i = h + 1 617 } else { 618 j = h 619 } 620 } 621 622 if i < n && binary.BigEndian.Uint32(buf[SuccinctItem64Size*i:]) == key { 623 return true, i 624 } 625 626 return false, 0 627 }