github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/hashprobe/hashprobe.go (about) 1 // Package hashprobe provides implementations of probing tables for various 2 // data types. 3 // 4 // Probing tables are specialized hash tables supporting only a single 5 // "probing" operation which behave like a "lookup or insert". When a key 6 // is probed, either its value is retrieved if it already existed in the table, 7 // or it is inserted and assigned its index in the insert sequence as value. 8 // 9 // Values are represented as signed 32 bits integers, which means that probing 10 // tables defined in this package may contain at most 2^31-1 entries. 11 // 12 // Probing tables have a method named Probe with the following signature: 13 // 14 // func (t *Int64Table) Probe(keys []int64, values []int32) int { 15 // ... 16 // } 17 // 18 // The method takes an array of keys to probe as first argument, an array of 19 // values where the indexes of each key will be written as second argument, and 20 // returns the number of keys that were inserted during the call. 21 // 22 // Applications that need to determine which keys were inserted can capture the 23 // length of the probing table prior to the call, and scan the list of values 24 // looking for indexes greater or equal to the length of the table before the 25 // call. 26 package hashprobe 27 28 import ( 29 cryptoRand "crypto/rand" 30 "encoding/binary" 31 "math" 32 "math/bits" 33 "math/rand" 34 "sync" 35 36 "github.com/segmentio/parquet-go/hashprobe/aeshash" 37 "github.com/segmentio/parquet-go/hashprobe/wyhash" 38 "github.com/segmentio/parquet-go/internal/unsafecast" 39 "github.com/segmentio/parquet-go/sparse" 40 ) 41 42 const ( 43 // Number of probes tested per iteration. This parameter balances between 44 // the amount of memory allocated on the stack to hold the computed hashes 45 // of the keys being probed, and amortizing the baseline cost of the probing 46 // algorithm. 47 // 48 // The larger the value, the more memory is required, but lower the baseline 49 // cost will be. 50 // 51 // We chose a value that is somewhat large, resulting in reserving 2KiB of 52 // stack but mostly erasing the baseline cost. 53 probesPerLoop = 256 54 ) 55 56 var ( 57 prngSeed [8]byte 58 prngMutex sync.Mutex 59 prngSource rand.Source64 60 ) 61 62 func init() { 63 _, err := cryptoRand.Read(prngSeed[:]) 64 if err != nil { 65 panic("cannot seed random number generator from system source: " + err.Error()) 66 } 67 seed := int64(binary.LittleEndian.Uint64(prngSeed[:])) 68 prngSource = rand.NewSource(seed).(rand.Source64) 69 } 70 71 func tableSizeAndMaxLen(groupSize, numValues int, maxLoad float64) (size, maxLen int) { 72 n := int(math.Ceil((1 / maxLoad) * float64(numValues))) 73 size = nextPowerOf2((n + (groupSize - 1)) / groupSize) 74 maxLen = int(math.Ceil(maxLoad * float64(groupSize*size))) 75 return 76 } 77 78 func nextPowerOf2(n int) int { 79 return 1 << (64 - bits.LeadingZeros64(uint64(n-1))) 80 } 81 82 func randSeed() uintptr { 83 prngMutex.Lock() 84 defer prngMutex.Unlock() 85 return uintptr(prngSource.Uint64()) 86 } 87 88 type Int32Table struct{ table32 } 89 90 func NewInt32Table(cap int, maxLoad float64) *Int32Table { 91 return &Int32Table{makeTable32(cap, maxLoad)} 92 } 93 94 func (t *Int32Table) Reset() { t.reset() } 95 96 func (t *Int32Table) Len() int { return t.len } 97 98 func (t *Int32Table) Cap() int { return t.size() } 99 100 func (t *Int32Table) Probe(keys, values []int32) int { 101 return t.probe(unsafecast.Int32ToUint32(keys), values) 102 } 103 104 func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int { 105 return t.probeArray(keys.Uint32Array(), values) 106 } 107 108 type Float32Table struct{ table32 } 109 110 func NewFloat32Table(cap int, maxLoad float64) *Float32Table { 111 return &Float32Table{makeTable32(cap, maxLoad)} 112 } 113 114 func (t *Float32Table) Reset() { t.reset() } 115 116 func (t *Float32Table) Len() int { return t.len } 117 118 func (t *Float32Table) Cap() int { return t.size() } 119 120 func (t *Float32Table) Probe(keys []float32, values []int32) int { 121 return t.probe(unsafecast.Float32ToUint32(keys), values) 122 } 123 124 func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int { 125 return t.probeArray(keys.Uint32Array(), values) 126 } 127 128 type Uint32Table struct{ table32 } 129 130 func NewUint32Table(cap int, maxLoad float64) *Uint32Table { 131 return &Uint32Table{makeTable32(cap, maxLoad)} 132 } 133 134 func (t *Uint32Table) Reset() { t.reset() } 135 136 func (t *Uint32Table) Len() int { return t.len } 137 138 func (t *Uint32Table) Cap() int { return t.size() } 139 140 func (t *Uint32Table) Probe(keys []uint32, values []int32) int { 141 return t.probe(keys, values) 142 } 143 144 func (t *Uint32Table) ProbeArray(keys sparse.Uint32Array, values []int32) int { 145 return t.probeArray(keys, values) 146 } 147 148 // table32 is the generic implementation of probing tables for 32 bit types. 149 // 150 // The table uses the following memory layout: 151 // 152 // [group 0][group 1][...][group N] 153 // 154 // Each group contains up to 7 key/value pairs, and is exactly 64 bytes in size, 155 // which allows it to fit within a single cache line, and ensures that probes 156 // can be performed with a single memory load per key. 157 // 158 // Groups fill up by appending new entries to the keys and values arrays. When a 159 // group is full, the probe checks the next group. 160 // 161 // https://en.wikipedia.org/wiki/Linear_probing 162 type table32 struct { 163 len int 164 maxLen int 165 maxLoad float64 166 seed uintptr 167 table []table32Group 168 } 169 170 const table32GroupSize = 7 171 172 type table32Group struct { 173 keys [table32GroupSize]uint32 174 values [table32GroupSize]uint32 175 bits uint32 176 _ uint32 177 } 178 179 func makeTable32(cap int, maxLoad float64) (t table32) { 180 if maxLoad < 0 || maxLoad > 1 { 181 panic("max load of probing table must be a value between 0 and 1") 182 } 183 if cap < table32GroupSize { 184 cap = table32GroupSize 185 } 186 t.init(cap, maxLoad) 187 return t 188 } 189 190 func (t *table32) size() int { 191 return table32GroupSize * len(t.table) 192 } 193 194 func (t *table32) init(cap int, maxLoad float64) { 195 size, maxLen := tableSizeAndMaxLen(table32GroupSize, cap, maxLoad) 196 *t = table32{ 197 maxLen: maxLen, 198 maxLoad: maxLoad, 199 seed: randSeed(), 200 table: make([]table32Group, size), 201 } 202 } 203 204 func (t *table32) grow(totalValues int) { 205 tmp := table32{} 206 tmp.init(totalValues, t.maxLoad) 207 tmp.len = t.len 208 209 hashes := make([]uintptr, table32GroupSize) 210 modulo := uintptr(len(tmp.table)) - 1 211 212 for i := range t.table { 213 g := &t.table[i] 214 n := bits.OnesCount32(g.bits) 215 216 if aeshash.Enabled() { 217 aeshash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed) 218 } else { 219 wyhash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed) 220 } 221 222 for j, hash := range hashes[:n] { 223 for { 224 group := &tmp.table[hash&modulo] 225 226 if n := bits.OnesCount32(group.bits); n < table32GroupSize { 227 group.bits = (group.bits << 1) | 1 228 group.keys[n] = g.keys[j] 229 group.values[n] = g.values[j] 230 break 231 } 232 233 hash++ 234 } 235 } 236 } 237 238 *t = tmp 239 } 240 241 func (t *table32) reset() { 242 t.len = 0 243 244 for i := range t.table { 245 t.table[i] = table32Group{} 246 } 247 } 248 249 func (t *table32) probe(keys []uint32, values []int32) int { 250 return t.probeArray(sparse.MakeUint32Array(keys), values) 251 } 252 253 func (t *table32) probeArray(keys sparse.Uint32Array, values []int32) int { 254 numKeys := keys.Len() 255 256 if totalValues := t.len + numKeys; totalValues > t.maxLen { 257 t.grow(totalValues) 258 } 259 260 var hashes [probesPerLoop]uintptr 261 var baseLength = t.len 262 var useAesHash = aeshash.Enabled() 263 264 _ = values[:numKeys] 265 266 for i := 0; i < numKeys; { 267 j := len(hashes) + i 268 n := len(hashes) 269 270 if j > numKeys { 271 j = numKeys 272 n = numKeys - i 273 } 274 275 k := keys.Slice(i, j) 276 v := values[i:j:j] 277 h := hashes[:n:n] 278 279 if useAesHash { 280 aeshash.MultiHashUint32Array(h, k, t.seed) 281 } else { 282 wyhash.MultiHashUint32Array(h, k, t.seed) 283 } 284 285 t.len = multiProbe32(t.table, t.len, h, k, v) 286 i = j 287 } 288 289 return t.len - baseLength 290 } 291 292 func multiProbe32Default(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int { 293 modulo := uintptr(len(table)) - 1 294 295 for i, hash := range hashes { 296 key := keys.Index(i) 297 for { 298 group := &table[hash&modulo] 299 index := table32GroupSize 300 value := int32(0) 301 302 for j, k := range group.keys { 303 if k == key { 304 index = j 305 break 306 } 307 } 308 309 if n := bits.OnesCount32(group.bits); index < n { 310 value = int32(group.values[index]) 311 } else { 312 if n == table32GroupSize { 313 hash++ 314 continue 315 } 316 317 value = int32(numKeys) 318 group.bits = (group.bits << 1) | 1 319 group.keys[n] = key 320 group.values[n] = uint32(value) 321 numKeys++ 322 } 323 324 values[i] = value 325 break 326 } 327 } 328 329 return numKeys 330 } 331 332 type Int64Table struct{ table64 } 333 334 func NewInt64Table(cap int, maxLoad float64) *Int64Table { 335 return &Int64Table{makeTable64(cap, maxLoad)} 336 } 337 338 func (t *Int64Table) Reset() { t.reset() } 339 340 func (t *Int64Table) Len() int { return t.len } 341 342 func (t *Int64Table) Cap() int { return t.size() } 343 344 func (t *Int64Table) Probe(keys []int64, values []int32) int { 345 return t.probe(unsafecast.Int64ToUint64(keys), values) 346 } 347 348 func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int { 349 return t.probeArray(keys.Uint64Array(), values) 350 } 351 352 type Float64Table struct{ table64 } 353 354 func NewFloat64Table(cap int, maxLoad float64) *Float64Table { 355 return &Float64Table{makeTable64(cap, maxLoad)} 356 } 357 358 func (t *Float64Table) Reset() { t.reset() } 359 360 func (t *Float64Table) Len() int { return t.len } 361 362 func (t *Float64Table) Cap() int { return t.size() } 363 364 func (t *Float64Table) Probe(keys []float64, values []int32) int { 365 return t.probe(unsafecast.Float64ToUint64(keys), values) 366 } 367 368 func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int { 369 return t.probeArray(keys.Uint64Array(), values) 370 } 371 372 type Uint64Table struct{ table64 } 373 374 func NewUint64Table(cap int, maxLoad float64) *Uint64Table { 375 return &Uint64Table{makeTable64(cap, maxLoad)} 376 } 377 378 func (t *Uint64Table) Reset() { t.reset() } 379 380 func (t *Uint64Table) Len() int { return t.len } 381 382 func (t *Uint64Table) Cap() int { return t.size() } 383 384 func (t *Uint64Table) Probe(keys []uint64, values []int32) int { 385 return t.probe(keys, values) 386 } 387 388 func (t *Uint64Table) ProbeArray(keys sparse.Uint64Array, values []int32) int { 389 return t.probeArray(keys, values) 390 } 391 392 // table64 is the generic implementation of probing tables for 64 bit types. 393 // 394 // The table uses a layout similar to the one documented on the table for 32 bit 395 // keys (see table32). Each group holds up to 4 key/value pairs (instead of 7 396 // like table32) so that each group fits in a single CPU cache line. This table 397 // version has a bit lower memory density, with ~23% of table memory being used 398 // for padding. 399 // 400 // Technically we could hold up to 5 entries per group and still fit within the 401 // 64 bytes of a CPU cache line; on x86 platforms, AVX2 registers can only hold 402 // four 64 bit values, we would need twice as many instructions per probe if the 403 // groups were holding 5 values. The trade off of memory for compute efficiency 404 // appeared to be the right choice at the time. 405 type table64 struct { 406 len int 407 maxLen int 408 maxLoad float64 409 seed uintptr 410 table []table64Group 411 } 412 413 const table64GroupSize = 4 414 415 type table64Group struct { 416 keys [table64GroupSize]uint64 417 values [table64GroupSize]uint32 418 bits uint32 419 _ uint32 420 _ uint32 421 _ uint32 422 } 423 424 func makeTable64(cap int, maxLoad float64) (t table64) { 425 if maxLoad < 0 || maxLoad > 1 { 426 panic("max load of probing table must be a value between 0 and 1") 427 } 428 if cap < table64GroupSize { 429 cap = table64GroupSize 430 } 431 t.init(cap, maxLoad) 432 return t 433 } 434 435 func (t *table64) size() int { 436 return table64GroupSize * len(t.table) 437 } 438 439 func (t *table64) init(cap int, maxLoad float64) { 440 size, maxLen := tableSizeAndMaxLen(table64GroupSize, cap, maxLoad) 441 *t = table64{ 442 maxLen: maxLen, 443 maxLoad: maxLoad, 444 seed: randSeed(), 445 table: make([]table64Group, size), 446 } 447 } 448 449 func (t *table64) grow(totalValues int) { 450 tmp := table64{} 451 tmp.init(totalValues, t.maxLoad) 452 tmp.len = t.len 453 454 hashes := make([]uintptr, table64GroupSize) 455 modulo := uintptr(len(tmp.table)) - 1 456 457 for i := range t.table { 458 g := &t.table[i] 459 n := bits.OnesCount32(g.bits) 460 461 if aeshash.Enabled() { 462 aeshash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed) 463 } else { 464 wyhash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed) 465 } 466 467 for j, hash := range hashes[:n] { 468 for { 469 group := &tmp.table[hash&modulo] 470 471 if n := bits.OnesCount32(group.bits); n < table64GroupSize { 472 group.bits = (group.bits << 1) | 1 473 group.keys[n] = g.keys[j] 474 group.values[n] = g.values[j] 475 break 476 } 477 478 hash++ 479 } 480 } 481 } 482 483 *t = tmp 484 } 485 486 func (t *table64) reset() { 487 t.len = 0 488 489 for i := range t.table { 490 t.table[i] = table64Group{} 491 } 492 } 493 494 func (t *table64) probe(keys []uint64, values []int32) int { 495 return t.probeArray(sparse.MakeUint64Array(keys), values) 496 } 497 498 func (t *table64) probeArray(keys sparse.Uint64Array, values []int32) int { 499 numKeys := keys.Len() 500 501 if totalValues := t.len + numKeys; totalValues > t.maxLen { 502 t.grow(totalValues) 503 } 504 505 var hashes [probesPerLoop]uintptr 506 var baseLength = t.len 507 var useAesHash = aeshash.Enabled() 508 509 _ = values[:numKeys] 510 511 for i := 0; i < numKeys; { 512 j := len(hashes) + i 513 n := len(hashes) 514 515 if j > numKeys { 516 j = numKeys 517 n = numKeys - i 518 } 519 520 k := keys.Slice(i, j) 521 v := values[i:j:j] 522 h := hashes[:n:n] 523 524 if useAesHash { 525 aeshash.MultiHashUint64Array(h, k, t.seed) 526 } else { 527 wyhash.MultiHashUint64Array(h, k, t.seed) 528 } 529 530 t.len = multiProbe64(t.table, t.len, h, k, v) 531 i = j 532 } 533 534 return t.len - baseLength 535 } 536 537 func multiProbe64Default(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int { 538 modulo := uintptr(len(table)) - 1 539 540 for i, hash := range hashes { 541 key := keys.Index(i) 542 for { 543 group := &table[hash&modulo] 544 index := table64GroupSize 545 value := int32(0) 546 547 for i, k := range group.keys { 548 if k == key { 549 index = i 550 break 551 } 552 } 553 554 if n := bits.OnesCount32(group.bits); index < n { 555 value = int32(group.values[index]) 556 } else { 557 if n == table64GroupSize { 558 hash++ 559 continue 560 } 561 562 value = int32(numKeys) 563 group.bits = (group.bits << 1) | 1 564 group.keys[n] = key 565 group.values[n] = uint32(value) 566 numKeys++ 567 } 568 569 values[i] = value 570 break 571 } 572 } 573 574 return numKeys 575 } 576 577 type Uint128Table struct{ table128 } 578 579 func NewUint128Table(cap int, maxLoad float64) *Uint128Table { 580 return &Uint128Table{makeTable128(cap, maxLoad)} 581 } 582 583 func (t *Uint128Table) Reset() { t.reset() } 584 585 func (t *Uint128Table) Len() int { return t.len } 586 587 func (t *Uint128Table) Cap() int { return t.cap } 588 589 func (t *Uint128Table) Probe(keys [][16]byte, values []int32) int { 590 return t.probe(keys, values) 591 } 592 593 func (t *Uint128Table) ProbeArray(keys sparse.Uint128Array, values []int32) int { 594 return t.probeArray(keys, values) 595 } 596 597 // table128 is the generic implementation of probing tables for 128 bit types. 598 // 599 // This table uses the following memory layout: 600 // 601 // [key A][key B][...][value A][value B][...] 602 // 603 // The table stores values as their actual value plus one, and uses zero as a 604 // sentinel to determine whether a slot is occupied. A linear probing strategy 605 // is used to resolve conflicts. This approach results in at most two memory 606 // loads for every four keys being tested, since the location of a key and its 607 // corresponding value will not be contiguous on the same CPU cache line, but 608 // a cache line can hold four 16 byte keys. 609 type table128 struct { 610 len int 611 cap int 612 maxLen int 613 maxLoad float64 614 seed uintptr 615 table []byte 616 } 617 618 func makeTable128(cap int, maxLoad float64) (t table128) { 619 if maxLoad < 0 || maxLoad > 1 { 620 panic("max load of probing table must be a value between 0 and 1") 621 } 622 if cap < 8 { 623 cap = 8 624 } 625 t.init(cap, maxLoad) 626 return t 627 } 628 629 func (t *table128) init(cap int, maxLoad float64) { 630 size, maxLen := tableSizeAndMaxLen(1, cap, maxLoad) 631 *t = table128{ 632 cap: size, 633 maxLen: maxLen, 634 maxLoad: maxLoad, 635 seed: randSeed(), 636 table: make([]byte, 16*size+4*size), 637 } 638 } 639 640 func (t *table128) kv() (keys [][16]byte, values []int32) { 641 i := t.cap * 16 642 return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:]) 643 } 644 645 func (t *table128) grow(totalValues int) { 646 tmp := table128{} 647 tmp.init(totalValues, t.maxLoad) 648 tmp.len = t.len 649 650 keys, values := t.kv() 651 hashes := make([]uintptr, probesPerLoop) 652 useAesHash := aeshash.Enabled() 653 654 _ = values[:len(keys)] 655 656 for i := 0; i < len(keys); { 657 j := len(hashes) + i 658 n := len(hashes) 659 660 if j > len(keys) { 661 j = len(keys) 662 n = len(keys) - i 663 } 664 665 h := hashes[:n:n] 666 k := keys[i:j:j] 667 v := values[i:j:j] 668 669 if useAesHash { 670 aeshash.MultiHash128(h, k, tmp.seed) 671 } else { 672 wyhash.MultiHash128(h, k, tmp.seed) 673 } 674 675 tmp.insert(h, k, v) 676 i = j 677 } 678 679 *t = tmp 680 } 681 682 func (t *table128) insert(hashes []uintptr, keys [][16]byte, values []int32) { 683 tableKeys, tableValues := t.kv() 684 modulo := uintptr(t.cap) - 1 685 686 for i, hash := range hashes { 687 for { 688 j := hash & modulo 689 v := tableValues[j] 690 691 if v == 0 { 692 tableKeys[j] = keys[i] 693 tableValues[j] = values[i] 694 break 695 } 696 697 hash++ 698 } 699 } 700 } 701 702 func (t *table128) reset() { 703 t.len = 0 704 705 for i := range t.table { 706 t.table[i] = 0 707 } 708 } 709 710 func (t *table128) probe(keys [][16]byte, values []int32) int { 711 return t.probeArray(sparse.MakeUint128Array(keys), values) 712 } 713 714 func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int { 715 numKeys := keys.Len() 716 717 if totalValues := t.len + numKeys; totalValues > t.maxLen { 718 t.grow(totalValues) 719 } 720 721 var hashes [probesPerLoop]uintptr 722 var baseLength = t.len 723 var useAesHash = aeshash.Enabled() 724 725 _ = values[:numKeys] 726 727 for i := 0; i < numKeys; { 728 j := len(hashes) + i 729 n := len(hashes) 730 731 if j > numKeys { 732 j = numKeys 733 n = numKeys - i 734 } 735 736 k := keys.Slice(i, j) 737 v := values[i:j:j] 738 h := hashes[:n:n] 739 740 if useAesHash { 741 aeshash.MultiHashUint128Array(h, k, t.seed) 742 } else { 743 wyhash.MultiHashUint128Array(h, k, t.seed) 744 } 745 746 t.len = multiProbe128(t.table, t.cap, t.len, h, k, v) 747 i = j 748 } 749 750 return t.len - baseLength 751 } 752 753 func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int { 754 modulo := uintptr(tableCap) - 1 755 offset := uintptr(tableCap) * 16 756 tableKeys := unsafecast.BytesToUint128(table[:offset]) 757 tableValues := unsafecast.BytesToInt32(table[offset:]) 758 759 for i, hash := range hashes { 760 key := keys.Index(i) 761 for { 762 j := hash & modulo 763 v := tableValues[j] 764 765 if v == 0 { 766 values[i] = int32(tableLen) 767 tableLen++ 768 tableKeys[j] = key 769 tableValues[j] = int32(tableLen) 770 break 771 } 772 773 if key == tableKeys[j] { 774 values[i] = v - 1 775 break 776 } 777 778 hash++ 779 } 780 } 781 782 return tableLen 783 }