github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/hashprobe/hashprobe.go (about) 1 // Package hashprobe provides implementations of probing tables for various 2 // data types. 3 // 4 // Probing tables are specialized hash tables supporting only a single 5 // "probing" operation which behave like a "lookup or insert". When a key 6 // is probed, either its value is retrieved if it already existed in the table, 7 // or it is inserted and assigned its index in the insert sequence as value. 8 // 9 // Values are represented as signed 32 bits integers, which means that probing 10 // tables defined in this package may contain at most 2^31-1 entries. 11 // 12 // Probing tables have a method named Probe with the following signature: 13 // 14 // func (t *Int64Table) Probe(keys []int64, values []int32) int { 15 // ... 16 // } 17 // 18 // The method takes an array of keys to probe as first argument, an array of 19 // values where the indexes of each key will be written as second argument, and 20 // returns the number of keys that were inserted during the call. 21 // 22 // Applications that need to determine which keys were inserted can capture the 23 // length of the probing table prior to the call, and scan the list of values 24 // looking for indexes greater or equal to the length of the table before the 25 // call. 26 package hashprobe 27 28 import ( 29 cryptoRand "crypto/rand" 30 "encoding/binary" 31 "math" 32 "math/bits" 33 "math/rand" 34 "sync" 35 36 "github.com/vc42/parquet-go/hashprobe/aeshash" 37 "github.com/vc42/parquet-go/hashprobe/wyhash" 38 "github.com/vc42/parquet-go/internal/unsafecast" 39 "github.com/vc42/parquet-go/sparse" 40 ) 41 42 const ( 43 // Number of probes tested per iteration. This parameter balances between 44 // the amount of memory allocated on the stack to hold the computed hashes 45 // of the keys being probed, and amortizing the baseline cost of the probing 46 // algorithm. 47 // 48 // The larger the value, the more memory is required, but lower the baseline 49 // cost will be. 50 // 51 // We chose a value that is somewhat large, resulting in reserving 2KiB of 52 // stack but mostly erasing the baseline cost. 53 probesPerLoop = 256 54 ) 55 56 var ( 57 prngSeed [8]byte 58 prngMutex sync.Mutex 59 prngSource rand.Source64 60 ) 61 62 func init() { 63 _, err := cryptoRand.Read(prngSeed[:]) 64 if err != nil { 65 panic("cannot seed random number generator from system source: " + err.Error()) 66 } 67 seed := int64(binary.LittleEndian.Uint64(prngSeed[:])) 68 prngSource = rand.NewSource(seed).(rand.Source64) 69 } 70 71 func nextPowerOf2(n int) int { 72 return 1 << (64 - bits.LeadingZeros64(uint64(n-1))) 73 } 74 75 func randSeed() uintptr { 76 prngMutex.Lock() 77 defer prngMutex.Unlock() 78 return uintptr(prngSource.Uint64()) 79 } 80 81 type Int32Table struct{ table32 } 82 83 func NewInt32Table(cap int, maxLoad float64) *Int32Table { 84 return &Int32Table{makeTable32(cap, maxLoad)} 85 } 86 87 func (t *Int32Table) Reset() { t.reset() } 88 89 func (t *Int32Table) Len() int { return t.len } 90 91 func (t *Int32Table) Cap() int { return t.size() } 92 93 func (t *Int32Table) Probe(keys, values []int32) int { 94 return t.probe(unsafecast.Int32ToUint32(keys), values) 95 } 96 97 func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int { 98 return t.probeArray(keys.Uint32Array(), values) 99 } 100 101 type Float32Table struct{ table32 } 102 103 func NewFloat32Table(cap int, maxLoad float64) *Float32Table { 104 return &Float32Table{makeTable32(cap, maxLoad)} 105 } 106 107 func (t *Float32Table) Reset() { t.reset() } 108 109 func (t *Float32Table) Len() int { return t.len } 110 111 func (t *Float32Table) Cap() int { return t.size() } 112 113 func (t *Float32Table) Probe(keys []float32, values []int32) int { 114 return t.probe(unsafecast.Float32ToUint32(keys), values) 115 } 116 117 func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int { 118 return t.probeArray(keys.Uint32Array(), values) 119 } 120 121 type Uint32Table struct{ table32 } 122 123 func NewUint32Table(cap int, maxLoad float64) *Uint32Table { 124 return &Uint32Table{makeTable32(cap, maxLoad)} 125 } 126 127 func (t *Uint32Table) Reset() { t.reset() } 128 129 func (t *Uint32Table) Len() int { return t.len } 130 131 func (t *Uint32Table) Cap() int { return t.size() } 132 133 func (t *Uint32Table) Probe(keys []uint32, values []int32) int { 134 return t.probe(keys, values) 135 } 136 137 func (t *Uint32Table) ProbeArray(keys sparse.Uint32Array, values []int32) int { 138 return t.probeArray(keys, values) 139 } 140 141 // table32 is the generic implementation of probing tables for 32 bit types. 142 // 143 // The table uses the following memory layout: 144 // 145 // [group 0][group 1][...][group N] 146 // 147 // Each group contains up to 7 key/value pairs, and is exactly 64 bytes in size, 148 // which allows it to fit within a single cache line, and ensures that probes 149 // can be performed with a single memory load per key. 150 // 151 // Groups fill up by appending new entries to the keys and values arrays. When a 152 // group is full, the probe checks the next group. 153 // 154 // https://en.wikipedia.org/wiki/Linear_probing 155 type table32 struct { 156 len int 157 maxLen int 158 maxLoad float64 159 seed uintptr 160 table []table32Group 161 } 162 163 const table32GroupSize = 7 164 165 type table32Group struct { 166 keys [table32GroupSize]uint32 167 values [table32GroupSize]uint32 168 bits uint32 169 _ uint32 170 } 171 172 func makeTable32(cap int, maxLoad float64) (t table32) { 173 if maxLoad < 0 || maxLoad > 1 { 174 panic("max load of probing table must be a value between 0 and 1") 175 } 176 if cap < table32GroupSize { 177 cap = table32GroupSize 178 } 179 t.init(cap, maxLoad) 180 return t 181 } 182 183 func (t *table32) size() int { 184 return table32GroupSize * len(t.table) 185 } 186 187 func (t *table32) init(cap int, maxLoad float64) { 188 m := int(math.Ceil((1 / maxLoad) * float64(cap))) 189 n := nextPowerOf2((m + (table32GroupSize - 1)) / table32GroupSize) 190 *t = table32{ 191 maxLen: int(math.Ceil(maxLoad * float64(table32GroupSize*n))), 192 maxLoad: maxLoad, 193 seed: randSeed(), 194 table: make([]table32Group, n), 195 } 196 } 197 198 func (t *table32) grow(totalValues int) { 199 tmp := table32{} 200 tmp.init(totalValues, t.maxLoad) 201 tmp.len = t.len 202 203 hashes := make([]uintptr, table32GroupSize) 204 modulo := uintptr(len(tmp.table)) - 1 205 206 for i := range t.table { 207 g := &t.table[i] 208 n := bits.OnesCount32(g.bits) 209 210 if aeshash.Enabled() { 211 aeshash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed) 212 } else { 213 wyhash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed) 214 } 215 216 for j, hash := range hashes[:n] { 217 for { 218 group := &tmp.table[hash&modulo] 219 220 if n := bits.OnesCount32(group.bits); n < table32GroupSize { 221 group.bits = (group.bits << 1) | 1 222 group.keys[n] = g.keys[j] 223 group.values[n] = g.values[j] 224 break 225 } 226 227 hash++ 228 } 229 } 230 } 231 232 *t = tmp 233 } 234 235 func (t *table32) reset() { 236 t.len = 0 237 238 for i := range t.table { 239 t.table[i] = table32Group{} 240 } 241 } 242 243 func (t *table32) probe(keys []uint32, values []int32) int { 244 return t.probeArray(sparse.MakeUint32Array(keys), values) 245 } 246 247 func (t *table32) probeArray(keys sparse.Uint32Array, values []int32) int { 248 numKeys := keys.Len() 249 250 if totalValues := t.len + numKeys; totalValues > t.maxLen { 251 t.grow(totalValues) 252 } 253 254 var hashes [probesPerLoop]uintptr 255 var baseLength = t.len 256 var useAesHash = aeshash.Enabled() 257 258 _ = values[:numKeys] 259 260 for i := 0; i < numKeys; { 261 j := len(hashes) + i 262 n := len(hashes) 263 264 if j > numKeys { 265 j = numKeys 266 n = numKeys - i 267 } 268 269 k := keys.Slice(i, j) 270 v := values[i:j:j] 271 h := hashes[:n:n] 272 273 if useAesHash { 274 aeshash.MultiHashUint32Array(h, k, t.seed) 275 } else { 276 wyhash.MultiHashUint32Array(h, k, t.seed) 277 } 278 279 t.len = multiProbe32(t.table, t.len, h, k, v) 280 i = j 281 } 282 283 return t.len - baseLength 284 } 285 286 func multiProbe32Default(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int { 287 modulo := uintptr(len(table)) - 1 288 289 for i, hash := range hashes { 290 key := keys.Index(i) 291 for { 292 group := &table[hash&modulo] 293 index := table32GroupSize 294 value := int32(0) 295 296 for j, k := range group.keys { 297 if k == key { 298 index = j 299 break 300 } 301 } 302 303 if n := bits.OnesCount32(group.bits); index < n { 304 value = int32(group.values[index]) 305 } else { 306 if n == table32GroupSize { 307 hash++ 308 continue 309 } 310 311 value = int32(numKeys) 312 group.bits = (group.bits << 1) | 1 313 group.keys[n] = key 314 group.values[n] = uint32(value) 315 numKeys++ 316 } 317 318 values[i] = value 319 break 320 } 321 } 322 323 return numKeys 324 } 325 326 type Int64Table struct{ table64 } 327 328 func NewInt64Table(cap int, maxLoad float64) *Int64Table { 329 return &Int64Table{makeTable64(cap, maxLoad)} 330 } 331 332 func (t *Int64Table) Reset() { t.reset() } 333 334 func (t *Int64Table) Len() int { return t.len } 335 336 func (t *Int64Table) Cap() int { return t.size() } 337 338 func (t *Int64Table) Probe(keys []int64, values []int32) int { 339 return t.probe(unsafecast.Int64ToUint64(keys), values) 340 } 341 342 func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int { 343 return t.probeArray(keys.Uint64Array(), values) 344 } 345 346 type Float64Table struct{ table64 } 347 348 func NewFloat64Table(cap int, maxLoad float64) *Float64Table { 349 return &Float64Table{makeTable64(cap, maxLoad)} 350 } 351 352 func (t *Float64Table) Reset() { t.reset() } 353 354 func (t *Float64Table) Len() int { return t.len } 355 356 func (t *Float64Table) Cap() int { return t.size() } 357 358 func (t *Float64Table) Probe(keys []float64, values []int32) int { 359 return t.probe(unsafecast.Float64ToUint64(keys), values) 360 } 361 362 func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int { 363 return t.probeArray(keys.Uint64Array(), values) 364 } 365 366 type Uint64Table struct{ table64 } 367 368 func NewUint64Table(cap int, maxLoad float64) *Uint64Table { 369 return &Uint64Table{makeTable64(cap, maxLoad)} 370 } 371 372 func (t *Uint64Table) Reset() { t.reset() } 373 374 func (t *Uint64Table) Len() int { return t.len } 375 376 func (t *Uint64Table) Cap() int { return t.size() } 377 378 func (t *Uint64Table) Probe(keys []uint64, values []int32) int { 379 return t.probe(keys, values) 380 } 381 382 func (t *Uint64Table) ProbeArray(keys sparse.Uint64Array, values []int32) int { 383 return t.probeArray(keys, values) 384 } 385 386 // table64 is the generic implementation of probing tables for 64 bit types. 387 // 388 // The table uses a layout similar to the one documented on the table for 32 bit 389 // keys (see table32). Each group holds up to 4 key/value pairs (instead of 7 390 // like table32) so that each group fits in a single CPU cache line. This table 391 // version has a bit lower memory density, with ~23% of table memory being used 392 // for padding. 393 // 394 // Technically we could hold up to 5 entries per group and still fit within the 395 // 64 bytes of a CPU cache line; on x86 platforms, AVX2 registers can only hold 396 // four 64 bit values, we would need twice as many instructions per probe if the 397 // groups were holding 5 values. The trade off of memory for compute efficiency 398 // appeared to be the right choice at the time. 399 type table64 struct { 400 len int 401 maxLen int 402 maxLoad float64 403 seed uintptr 404 table []table64Group 405 } 406 407 const table64GroupSize = 4 408 409 type table64Group struct { 410 keys [table64GroupSize]uint64 411 values [table64GroupSize]uint32 412 bits uint32 413 _ uint32 414 _ uint32 415 _ uint32 416 } 417 418 func makeTable64(cap int, maxLoad float64) (t table64) { 419 if maxLoad < 0 || maxLoad > 1 { 420 panic("max load of probing table must be a value between 0 and 1") 421 } 422 if cap < table64GroupSize { 423 cap = table64GroupSize 424 } 425 t.init(cap, maxLoad) 426 return t 427 } 428 429 func (t *table64) size() int { 430 return table64GroupSize * len(t.table) 431 } 432 433 func (t *table64) init(cap int, maxLoad float64) { 434 m := int(math.Ceil((1 / maxLoad) * float64(cap))) 435 n := nextPowerOf2((m + (table64GroupSize - 1)) / table64GroupSize) 436 *t = table64{ 437 maxLen: int(math.Ceil(maxLoad * float64(table64GroupSize*n))), 438 maxLoad: maxLoad, 439 seed: randSeed(), 440 table: make([]table64Group, n), 441 } 442 } 443 444 func (t *table64) grow(totalValues int) { 445 tmp := table64{} 446 tmp.init(totalValues, t.maxLoad) 447 tmp.len = t.len 448 449 hashes := make([]uintptr, table64GroupSize) 450 modulo := uintptr(len(tmp.table)) - 1 451 452 for i := range t.table { 453 g := &t.table[i] 454 n := bits.OnesCount32(g.bits) 455 456 if aeshash.Enabled() { 457 aeshash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed) 458 } else { 459 wyhash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed) 460 } 461 462 for j, hash := range hashes[:n] { 463 for { 464 group := &tmp.table[hash&modulo] 465 466 if n := bits.OnesCount32(group.bits); n < table64GroupSize { 467 group.bits = (group.bits << 1) | 1 468 group.keys[n] = g.keys[j] 469 group.values[n] = g.values[j] 470 break 471 } 472 473 hash++ 474 } 475 } 476 } 477 478 *t = tmp 479 } 480 481 func (t *table64) reset() { 482 t.len = 0 483 484 for i := range t.table { 485 t.table[i] = table64Group{} 486 } 487 } 488 489 func (t *table64) probe(keys []uint64, values []int32) int { 490 return t.probeArray(sparse.MakeUint64Array(keys), values) 491 } 492 493 func (t *table64) probeArray(keys sparse.Uint64Array, values []int32) int { 494 numKeys := keys.Len() 495 496 if totalValues := t.len + numKeys; totalValues > t.maxLen { 497 t.grow(totalValues) 498 } 499 500 var hashes [probesPerLoop]uintptr 501 var baseLength = t.len 502 var useAesHash = aeshash.Enabled() 503 504 _ = values[:numKeys] 505 506 for i := 0; i < numKeys; { 507 j := len(hashes) + i 508 n := len(hashes) 509 510 if j > numKeys { 511 j = numKeys 512 n = numKeys - i 513 } 514 515 k := keys.Slice(i, j) 516 v := values[i:j:j] 517 h := hashes[:n:n] 518 519 if useAesHash { 520 aeshash.MultiHashUint64Array(h, k, t.seed) 521 } else { 522 wyhash.MultiHashUint64Array(h, k, t.seed) 523 } 524 525 t.len = multiProbe64(t.table, t.len, h, k, v) 526 i = j 527 } 528 529 return t.len - baseLength 530 } 531 532 func multiProbe64Default(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int { 533 modulo := uintptr(len(table)) - 1 534 535 for i, hash := range hashes { 536 key := keys.Index(i) 537 for { 538 group := &table[hash&modulo] 539 index := table64GroupSize 540 value := int32(0) 541 542 for i, k := range group.keys { 543 if k == key { 544 index = i 545 break 546 } 547 } 548 549 if n := bits.OnesCount32(group.bits); index < n { 550 value = int32(group.values[index]) 551 } else { 552 if n == table64GroupSize { 553 hash++ 554 continue 555 } 556 557 value = int32(numKeys) 558 group.bits = (group.bits << 1) | 1 559 group.keys[n] = key 560 group.values[n] = uint32(value) 561 numKeys++ 562 } 563 564 values[i] = value 565 break 566 } 567 } 568 569 return numKeys 570 } 571 572 type Uint128Table struct{ table128 } 573 574 func NewUint128Table(cap int, maxLoad float64) *Uint128Table { 575 return &Uint128Table{makeTable128(cap, maxLoad)} 576 } 577 578 func (t *Uint128Table) Reset() { t.reset() } 579 580 func (t *Uint128Table) Len() int { return t.len } 581 582 func (t *Uint128Table) Cap() int { return t.cap } 583 584 func (t *Uint128Table) Probe(keys [][16]byte, values []int32) int { 585 return t.probe(keys, values) 586 } 587 588 func (t *Uint128Table) ProbeArray(keys sparse.Uint128Array, values []int32) int { 589 return t.probeArray(keys, values) 590 } 591 592 // table128 is the generic implementation of probing tables for 128 bit types. 593 // 594 // This table uses the following memory layout: 595 // 596 // [key A][key B][...][value A][value B][...] 597 // 598 // The table stores values as their actual value plus one, and uses zero as a 599 // sentinel to determine whether a slot is occupied. A linear probing strategy 600 // is used to resolve conflicts. This approach results in at most two memory 601 // loads for every four keys being tested, since the location of a key and its 602 // corresponding value will not be contiguous on the same CPU cache line, but 603 // a cache line can hold four 16 byte keys. 604 type table128 struct { 605 len int 606 cap int 607 maxLen int 608 maxLoad float64 609 seed uintptr 610 table []byte 611 } 612 613 func makeTable128(cap int, maxLoad float64) (t table128) { 614 if maxLoad < 0 || maxLoad > 1 { 615 panic("max load of probing table must be a value between 0 and 1") 616 } 617 if cap < 8 { 618 cap = 8 619 } 620 t.init(cap, maxLoad) 621 return t 622 } 623 624 func (t *table128) init(cap int, maxLoad float64) { 625 m := int(math.Ceil((1 / maxLoad) * float64(cap))) 626 n := nextPowerOf2(m) 627 *t = table128{ 628 cap: n, 629 maxLen: int(math.Ceil(maxLoad * float64(n))), 630 maxLoad: maxLoad, 631 seed: randSeed(), 632 table: make([]byte, 16*n+4*n), 633 } 634 } 635 636 func (t *table128) kv() (keys [][16]byte, values []int32) { 637 i := t.cap * 16 638 return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:]) 639 } 640 641 func (t *table128) grow(totalValues int) { 642 tmp := table128{} 643 tmp.init(totalValues, t.maxLoad) 644 tmp.len = t.len 645 646 keys, values := t.kv() 647 hashes := make([]uintptr, probesPerLoop) 648 useAesHash := aeshash.Enabled() 649 650 _ = values[:len(keys)] 651 652 for i := 0; i < len(keys); { 653 j := len(hashes) + i 654 n := len(hashes) 655 656 if j > len(keys) { 657 j = len(keys) 658 n = len(keys) - i 659 } 660 661 h := hashes[:n:n] 662 k := keys[i:j:j] 663 v := values[i:j:j] 664 665 if useAesHash { 666 aeshash.MultiHash128(h, k, tmp.seed) 667 } else { 668 wyhash.MultiHash128(h, k, tmp.seed) 669 } 670 671 tmp.insert(h, k, v) 672 i = j 673 } 674 675 *t = tmp 676 } 677 678 func (t *table128) insert(hashes []uintptr, keys [][16]byte, values []int32) { 679 tableKeys, tableValues := t.kv() 680 modulo := uintptr(t.cap) - 1 681 682 for i, hash := range hashes { 683 for { 684 j := hash & modulo 685 v := tableValues[j] 686 687 if v == 0 { 688 tableKeys[j] = keys[i] 689 tableValues[j] = values[i] 690 break 691 } 692 693 hash++ 694 } 695 } 696 } 697 698 func (t *table128) reset() { 699 t.len = 0 700 701 for i := range t.table { 702 t.table[i] = 0 703 } 704 } 705 706 func (t *table128) probe(keys [][16]byte, values []int32) int { 707 return t.probeArray(sparse.MakeUint128Array(keys), values) 708 } 709 710 func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int { 711 numKeys := keys.Len() 712 713 if totalValues := t.len + numKeys; totalValues > t.maxLen { 714 t.grow(totalValues) 715 } 716 717 var hashes [probesPerLoop]uintptr 718 var baseLength = t.len 719 var useAesHash = aeshash.Enabled() 720 721 _ = values[:numKeys] 722 723 for i := 0; i < numKeys; { 724 j := len(hashes) + i 725 n := len(hashes) 726 727 if j > numKeys { 728 j = numKeys 729 n = numKeys - i 730 } 731 732 k := keys.Slice(i, j) 733 v := values[i:j:j] 734 h := hashes[:n:n] 735 736 if useAesHash { 737 aeshash.MultiHashUint128Array(h, k, t.seed) 738 } else { 739 wyhash.MultiHashUint128Array(h, k, t.seed) 740 } 741 742 t.len = multiProbe128(t.table, t.cap, t.len, h, k, v) 743 i = j 744 } 745 746 return t.len - baseLength 747 } 748 749 func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int { 750 modulo := uintptr(tableCap) - 1 751 offset := uintptr(tableCap) * 16 752 tableKeys := unsafecast.BytesToUint128(table[:offset]) 753 tableValues := unsafecast.BytesToInt32(table[offset:]) 754 755 for i, hash := range hashes { 756 key := keys.Index(i) 757 for { 758 j := hash & modulo 759 v := tableValues[j] 760 761 if v == 0 { 762 values[i] = int32(tableLen) 763 tableLen++ 764 tableKeys[j] = key 765 tableValues[j] = int32(tableLen) 766 break 767 } 768 769 if key == tableKeys[j] { 770 values[i] = v - 1 771 break 772 } 773 774 hash++ 775 } 776 } 777 778 return tableLen 779 }