github.com/stingnevermore/go@v0.0.0-20180120041312-3810f5bfed72/src/runtime/hash_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 "unsafe" 15 ) 16 17 func TestMemHash32Equality(t *testing.T) { 18 if *UseAeshash { 19 t.Skip("skipping since AES hash implementation is used") 20 } 21 var b [4]byte 22 r := rand.New(rand.NewSource(1234)) 23 seed := uintptr(r.Uint64()) 24 for i := 0; i < 100; i++ { 25 randBytes(r, b[:]) 26 got := MemHash32(unsafe.Pointer(&b), seed) 27 want := MemHash(unsafe.Pointer(&b), seed, 4) 28 if got != want { 29 t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want) 30 } 31 } 32 } 33 34 func TestMemHash64Equality(t *testing.T) { 35 if *UseAeshash { 36 t.Skip("skipping since AES hash implementation is used") 37 } 38 var b [8]byte 39 r := rand.New(rand.NewSource(1234)) 40 seed := uintptr(r.Uint64()) 41 for i := 0; i < 100; i++ { 42 randBytes(r, b[:]) 43 got := MemHash64(unsafe.Pointer(&b), seed) 44 want := MemHash(unsafe.Pointer(&b), seed, 8) 45 if got != want { 46 t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want) 47 } 48 } 49 } 50 51 // Smhasher is a torture test for hash functions. 52 // https://code.google.com/p/smhasher/ 53 // This code is a port of some of the Smhasher tests to Go. 54 // 55 // The current AES hash function passes Smhasher. Our fallback 56 // hash functions don't, so we only enable the difficult tests when 57 // we know the AES implementation is available. 58 59 // Sanity checks. 60 // hash should not depend on values outside key. 61 // hash should not depend on alignment. 62 func TestSmhasherSanity(t *testing.T) { 63 r := rand.New(rand.NewSource(1234)) 64 const REP = 10 65 const KEYMAX = 128 66 const PAD = 16 67 const OFFMAX = 16 68 for k := 0; k < REP; k++ { 69 for n := 0; n < KEYMAX; n++ { 70 for i := 0; i < OFFMAX; i++ { 71 var b [KEYMAX + OFFMAX + 2*PAD]byte 72 var c [KEYMAX + OFFMAX + 2*PAD]byte 73 randBytes(r, b[:]) 74 randBytes(r, c[:]) 75 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 76 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 77 t.Errorf("hash depends on bytes outside key") 78 } 79 } 80 } 81 } 82 } 83 84 type HashSet struct { 85 m map[uintptr]struct{} // set of hashes added 86 n int // number of hashes added 87 } 88 89 func newHashSet() *HashSet { 90 return &HashSet{make(map[uintptr]struct{}), 0} 91 } 92 func (s *HashSet) add(h uintptr) { 93 s.m[h] = struct{}{} 94 s.n++ 95 } 96 func (s *HashSet) addS(x string) { 97 s.add(StringHash(x, 0)) 98 } 99 func (s *HashSet) addB(x []byte) { 100 s.add(BytesHash(x, 0)) 101 } 102 func (s *HashSet) addS_seed(x string, seed uintptr) { 103 s.add(StringHash(x, seed)) 104 } 105 func (s *HashSet) check(t *testing.T) { 106 const SLOP = 10.0 107 collisions := s.n - len(s.m) 108 //fmt.Printf("%d/%d\n", len(s.m), s.n) 109 pairs := int64(s.n) * int64(s.n-1) / 2 110 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 111 stddev := math.Sqrt(expected) 112 if float64(collisions) > expected+SLOP*(3*stddev+1) { 113 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 114 } 115 } 116 117 // a string plus adding zeros must make distinct hashes 118 func TestSmhasherAppendedZeros(t *testing.T) { 119 s := "hello" + strings.Repeat("\x00", 256) 120 h := newHashSet() 121 for i := 0; i <= len(s); i++ { 122 h.addS(s[:i]) 123 } 124 h.check(t) 125 } 126 127 // All 0-3 byte strings have distinct hashes. 128 func TestSmhasherSmallKeys(t *testing.T) { 129 h := newHashSet() 130 var b [3]byte 131 for i := 0; i < 256; i++ { 132 b[0] = byte(i) 133 h.addB(b[:1]) 134 for j := 0; j < 256; j++ { 135 b[1] = byte(j) 136 h.addB(b[:2]) 137 if !testing.Short() { 138 for k := 0; k < 256; k++ { 139 b[2] = byte(k) 140 h.addB(b[:3]) 141 } 142 } 143 } 144 } 145 h.check(t) 146 } 147 148 // Different length strings of all zeros have distinct hashes. 149 func TestSmhasherZeros(t *testing.T) { 150 N := 256 * 1024 151 if testing.Short() { 152 N = 1024 153 } 154 h := newHashSet() 155 b := make([]byte, N) 156 for i := 0; i <= N; i++ { 157 h.addB(b[:i]) 158 } 159 h.check(t) 160 } 161 162 // Strings with up to two nonzero bytes all have distinct hashes. 163 func TestSmhasherTwoNonzero(t *testing.T) { 164 if testing.Short() { 165 t.Skip("Skipping in short mode") 166 } 167 h := newHashSet() 168 for n := 2; n <= 16; n++ { 169 twoNonZero(h, n) 170 } 171 h.check(t) 172 } 173 func twoNonZero(h *HashSet, n int) { 174 b := make([]byte, n) 175 176 // all zero 177 h.addB(b[:]) 178 179 // one non-zero byte 180 for i := 0; i < n; i++ { 181 for x := 1; x < 256; x++ { 182 b[i] = byte(x) 183 h.addB(b[:]) 184 b[i] = 0 185 } 186 } 187 188 // two non-zero bytes 189 for i := 0; i < n; i++ { 190 for x := 1; x < 256; x++ { 191 b[i] = byte(x) 192 for j := i + 1; j < n; j++ { 193 for y := 1; y < 256; y++ { 194 b[j] = byte(y) 195 h.addB(b[:]) 196 b[j] = 0 197 } 198 } 199 b[i] = 0 200 } 201 } 202 } 203 204 // Test strings with repeats, like "abcdabcdabcdabcd..." 205 func TestSmhasherCyclic(t *testing.T) { 206 if testing.Short() { 207 t.Skip("Skipping in short mode") 208 } 209 r := rand.New(rand.NewSource(1234)) 210 const REPEAT = 8 211 const N = 1000000 212 for n := 4; n <= 12; n++ { 213 h := newHashSet() 214 b := make([]byte, REPEAT*n) 215 for i := 0; i < N; i++ { 216 b[0] = byte(i * 79 % 97) 217 b[1] = byte(i * 43 % 137) 218 b[2] = byte(i * 151 % 197) 219 b[3] = byte(i * 199 % 251) 220 randBytes(r, b[4:n]) 221 for j := n; j < n*REPEAT; j++ { 222 b[j] = b[j-n] 223 } 224 h.addB(b) 225 } 226 h.check(t) 227 } 228 } 229 230 // Test strings with only a few bits set 231 func TestSmhasherSparse(t *testing.T) { 232 if testing.Short() { 233 t.Skip("Skipping in short mode") 234 } 235 sparse(t, 32, 6) 236 sparse(t, 40, 6) 237 sparse(t, 48, 5) 238 sparse(t, 56, 5) 239 sparse(t, 64, 5) 240 sparse(t, 96, 4) 241 sparse(t, 256, 3) 242 sparse(t, 2048, 2) 243 } 244 func sparse(t *testing.T, n int, k int) { 245 b := make([]byte, n/8) 246 h := newHashSet() 247 setbits(h, b, 0, k) 248 h.check(t) 249 } 250 251 // set up to k bits at index i and greater 252 func setbits(h *HashSet, b []byte, i int, k int) { 253 h.addB(b) 254 if k == 0 { 255 return 256 } 257 for j := i; j < len(b)*8; j++ { 258 b[j/8] |= byte(1 << uint(j&7)) 259 setbits(h, b, j+1, k-1) 260 b[j/8] &= byte(^(1 << uint(j&7))) 261 } 262 } 263 264 // Test all possible combinations of n blocks from the set s. 265 // "permutation" is a bad name here, but it is what Smhasher uses. 266 func TestSmhasherPermutation(t *testing.T) { 267 if testing.Short() { 268 t.Skip("Skipping in short mode") 269 } 270 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 271 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 272 permutation(t, []uint32{0, 1}, 20) 273 permutation(t, []uint32{0, 1 << 31}, 20) 274 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 275 } 276 func permutation(t *testing.T, s []uint32, n int) { 277 b := make([]byte, n*4) 278 h := newHashSet() 279 genPerm(h, b, s, 0) 280 h.check(t) 281 } 282 func genPerm(h *HashSet, b []byte, s []uint32, n int) { 283 h.addB(b[:n]) 284 if n == len(b) { 285 return 286 } 287 for _, v := range s { 288 b[n] = byte(v) 289 b[n+1] = byte(v >> 8) 290 b[n+2] = byte(v >> 16) 291 b[n+3] = byte(v >> 24) 292 genPerm(h, b, s, n+4) 293 } 294 } 295 296 type Key interface { 297 clear() // set bits all to 0 298 random(r *rand.Rand) // set key to something random 299 bits() int // how many bits key has 300 flipBit(i int) // flip bit i of the key 301 hash() uintptr // hash the key 302 name() string // for error reporting 303 } 304 305 type BytesKey struct { 306 b []byte 307 } 308 309 func (k *BytesKey) clear() { 310 for i := range k.b { 311 k.b[i] = 0 312 } 313 } 314 func (k *BytesKey) random(r *rand.Rand) { 315 randBytes(r, k.b) 316 } 317 func (k *BytesKey) bits() int { 318 return len(k.b) * 8 319 } 320 func (k *BytesKey) flipBit(i int) { 321 k.b[i>>3] ^= byte(1 << uint(i&7)) 322 } 323 func (k *BytesKey) hash() uintptr { 324 return BytesHash(k.b, 0) 325 } 326 func (k *BytesKey) name() string { 327 return fmt.Sprintf("bytes%d", len(k.b)) 328 } 329 330 type Int32Key struct { 331 i uint32 332 } 333 334 func (k *Int32Key) clear() { 335 k.i = 0 336 } 337 func (k *Int32Key) random(r *rand.Rand) { 338 k.i = r.Uint32() 339 } 340 func (k *Int32Key) bits() int { 341 return 32 342 } 343 func (k *Int32Key) flipBit(i int) { 344 k.i ^= 1 << uint(i) 345 } 346 func (k *Int32Key) hash() uintptr { 347 return Int32Hash(k.i, 0) 348 } 349 func (k *Int32Key) name() string { 350 return "int32" 351 } 352 353 type Int64Key struct { 354 i uint64 355 } 356 357 func (k *Int64Key) clear() { 358 k.i = 0 359 } 360 func (k *Int64Key) random(r *rand.Rand) { 361 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 362 } 363 func (k *Int64Key) bits() int { 364 return 64 365 } 366 func (k *Int64Key) flipBit(i int) { 367 k.i ^= 1 << uint(i) 368 } 369 func (k *Int64Key) hash() uintptr { 370 return Int64Hash(k.i, 0) 371 } 372 func (k *Int64Key) name() string { 373 return "int64" 374 } 375 376 type EfaceKey struct { 377 i interface{} 378 } 379 380 func (k *EfaceKey) clear() { 381 k.i = nil 382 } 383 func (k *EfaceKey) random(r *rand.Rand) { 384 k.i = uint64(r.Int63()) 385 } 386 func (k *EfaceKey) bits() int { 387 // use 64 bits. This tests inlined interfaces 388 // on 64-bit targets and indirect interfaces on 389 // 32-bit targets. 390 return 64 391 } 392 func (k *EfaceKey) flipBit(i int) { 393 k.i = k.i.(uint64) ^ uint64(1)<<uint(i) 394 } 395 func (k *EfaceKey) hash() uintptr { 396 return EfaceHash(k.i, 0) 397 } 398 func (k *EfaceKey) name() string { 399 return "Eface" 400 } 401 402 type IfaceKey struct { 403 i interface { 404 F() 405 } 406 } 407 type fInter uint64 408 409 func (x fInter) F() { 410 } 411 412 func (k *IfaceKey) clear() { 413 k.i = nil 414 } 415 func (k *IfaceKey) random(r *rand.Rand) { 416 k.i = fInter(r.Int63()) 417 } 418 func (k *IfaceKey) bits() int { 419 // use 64 bits. This tests inlined interfaces 420 // on 64-bit targets and indirect interfaces on 421 // 32-bit targets. 422 return 64 423 } 424 func (k *IfaceKey) flipBit(i int) { 425 k.i = k.i.(fInter) ^ fInter(1)<<uint(i) 426 } 427 func (k *IfaceKey) hash() uintptr { 428 return IfaceHash(k.i, 0) 429 } 430 func (k *IfaceKey) name() string { 431 return "Iface" 432 } 433 434 // Flipping a single bit of a key should flip each output bit with 50% probability. 435 func TestSmhasherAvalanche(t *testing.T) { 436 if testing.Short() { 437 t.Skip("Skipping in short mode") 438 } 439 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 440 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 441 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 442 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 443 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 444 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 445 avalancheTest1(t, &Int32Key{}) 446 avalancheTest1(t, &Int64Key{}) 447 avalancheTest1(t, &EfaceKey{}) 448 avalancheTest1(t, &IfaceKey{}) 449 } 450 func avalancheTest1(t *testing.T, k Key) { 451 const REP = 100000 452 r := rand.New(rand.NewSource(1234)) 453 n := k.bits() 454 455 // grid[i][j] is a count of whether flipping 456 // input bit i affects output bit j. 457 grid := make([][hashSize]int, n) 458 459 for z := 0; z < REP; z++ { 460 // pick a random key, hash it 461 k.random(r) 462 h := k.hash() 463 464 // flip each bit, hash & compare the results 465 for i := 0; i < n; i++ { 466 k.flipBit(i) 467 d := h ^ k.hash() 468 k.flipBit(i) 469 470 // record the effects of that bit flip 471 g := &grid[i] 472 for j := 0; j < hashSize; j++ { 473 g[j] += int(d & 1) 474 d >>= 1 475 } 476 } 477 } 478 479 // Each entry in the grid should be about REP/2. 480 // More precisely, we did N = k.bits() * hashSize experiments where 481 // each is the sum of REP coin flips. We want to find bounds on the 482 // sum of coin flips such that a truly random experiment would have 483 // all sums inside those bounds with 99% probability. 484 N := n * hashSize 485 var c float64 486 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 487 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 488 } 489 c *= 4.0 // allowed slack - we don't need to be perfectly random 490 mean := .5 * REP 491 stddev := .5 * math.Sqrt(REP) 492 low := int(mean - c*stddev) 493 high := int(mean + c*stddev) 494 for i := 0; i < n; i++ { 495 for j := 0; j < hashSize; j++ { 496 x := grid[i][j] 497 if x < low || x > high { 498 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 499 } 500 } 501 } 502 } 503 504 // All bit rotations of a set of distinct keys 505 func TestSmhasherWindowed(t *testing.T) { 506 windowed(t, &Int32Key{}) 507 windowed(t, &Int64Key{}) 508 windowed(t, &BytesKey{make([]byte, 128)}) 509 } 510 func windowed(t *testing.T, k Key) { 511 if testing.Short() { 512 t.Skip("Skipping in short mode") 513 } 514 const BITS = 16 515 516 for r := 0; r < k.bits(); r++ { 517 h := newHashSet() 518 for i := 0; i < 1<<BITS; i++ { 519 k.clear() 520 for j := 0; j < BITS; j++ { 521 if i>>uint(j)&1 != 0 { 522 k.flipBit((j + r) % k.bits()) 523 } 524 } 525 h.add(k.hash()) 526 } 527 h.check(t) 528 } 529 } 530 531 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 532 func TestSmhasherText(t *testing.T) { 533 if testing.Short() { 534 t.Skip("Skipping in short mode") 535 } 536 text(t, "Foo", "Bar") 537 text(t, "FooBar", "") 538 text(t, "", "FooBar") 539 } 540 func text(t *testing.T, prefix, suffix string) { 541 const N = 4 542 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 543 const L = len(S) 544 b := make([]byte, len(prefix)+N+len(suffix)) 545 copy(b, prefix) 546 copy(b[len(prefix)+N:], suffix) 547 h := newHashSet() 548 c := b[len(prefix):] 549 for i := 0; i < L; i++ { 550 c[0] = S[i] 551 for j := 0; j < L; j++ { 552 c[1] = S[j] 553 for k := 0; k < L; k++ { 554 c[2] = S[k] 555 for x := 0; x < L; x++ { 556 c[3] = S[x] 557 h.addB(b) 558 } 559 } 560 } 561 } 562 h.check(t) 563 } 564 565 // Make sure different seed values generate different hashes. 566 func TestSmhasherSeed(t *testing.T) { 567 h := newHashSet() 568 const N = 100000 569 s := "hello" 570 for i := 0; i < N; i++ { 571 h.addS_seed(s, uintptr(i)) 572 } 573 h.check(t) 574 } 575 576 // size of the hash output (32 or 64 bits) 577 const hashSize = 32 + int(^uintptr(0)>>63<<5) 578 579 func randBytes(r *rand.Rand, b []byte) { 580 for i := range b { 581 b[i] = byte(r.Uint32()) 582 } 583 } 584 585 func benchmarkHash(b *testing.B, n int) { 586 s := strings.Repeat("A", n) 587 588 for i := 0; i < b.N; i++ { 589 StringHash(s, 0) 590 } 591 b.SetBytes(int64(n)) 592 } 593 594 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 595 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 596 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 597 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 598 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } 599 600 func TestArrayHash(t *testing.T) { 601 // Make sure that "" in arrays hash correctly. The hash 602 // should at least scramble the input seed so that, e.g., 603 // {"","foo"} and {"foo",""} have different hashes. 604 605 // If the hash is bad, then all (8 choose 4) = 70 keys 606 // have the same hash. If so, we allocate 70/8 = 8 607 // overflow buckets. If the hash is good we don't 608 // normally allocate any overflow buckets, and the 609 // probability of even one or two overflows goes down rapidly. 610 // (There is always 1 allocation of the bucket array. The map 611 // header is allocated on the stack.) 612 f := func() { 613 // Make the key type at most 128 bytes. Otherwise, 614 // we get an allocation per key. 615 type key [8]string 616 m := make(map[key]bool, 70) 617 618 // fill m with keys that have 4 "foo"s and 4 ""s. 619 for i := 0; i < 256; i++ { 620 var k key 621 cnt := 0 622 for j := uint(0); j < 8; j++ { 623 if i>>j&1 != 0 { 624 k[j] = "foo" 625 cnt++ 626 } 627 } 628 if cnt == 4 { 629 m[k] = true 630 } 631 } 632 if len(m) != 70 { 633 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 634 } 635 } 636 if n := testing.AllocsPerRun(10, f); n > 6 { 637 t.Errorf("too many allocs %f - hash not balanced", n) 638 } 639 } 640 func TestStructHash(t *testing.T) { 641 // See the comment in TestArrayHash. 642 f := func() { 643 type key struct { 644 a, b, c, d, e, f, g, h string 645 } 646 m := make(map[key]bool, 70) 647 648 // fill m with keys that have 4 "foo"s and 4 ""s. 649 for i := 0; i < 256; i++ { 650 var k key 651 cnt := 0 652 if i&1 != 0 { 653 k.a = "foo" 654 cnt++ 655 } 656 if i&2 != 0 { 657 k.b = "foo" 658 cnt++ 659 } 660 if i&4 != 0 { 661 k.c = "foo" 662 cnt++ 663 } 664 if i&8 != 0 { 665 k.d = "foo" 666 cnt++ 667 } 668 if i&16 != 0 { 669 k.e = "foo" 670 cnt++ 671 } 672 if i&32 != 0 { 673 k.f = "foo" 674 cnt++ 675 } 676 if i&64 != 0 { 677 k.g = "foo" 678 cnt++ 679 } 680 if i&128 != 0 { 681 k.h = "foo" 682 cnt++ 683 } 684 if cnt == 4 { 685 m[k] = true 686 } 687 } 688 if len(m) != 70 { 689 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 690 } 691 } 692 if n := testing.AllocsPerRun(10, f); n > 6 { 693 t.Errorf("too many allocs %f - hash not balanced", n) 694 } 695 } 696 697 var sink uint64 698 699 func BenchmarkAlignedLoad(b *testing.B) { 700 var buf [16]byte 701 p := unsafe.Pointer(&buf[0]) 702 var s uint64 703 for i := 0; i < b.N; i++ { 704 s += ReadUnaligned64(p) 705 } 706 sink = s 707 } 708 709 func BenchmarkUnalignedLoad(b *testing.B) { 710 var buf [16]byte 711 p := unsafe.Pointer(&buf[1]) 712 var s uint64 713 for i := 0; i < b.N; i++ { 714 s += ReadUnaligned64(p) 715 } 716 sink = s 717 } 718 719 func TestCollisions(t *testing.T) { 720 if testing.Short() { 721 t.Skip("Skipping in short mode") 722 } 723 for i := 0; i < 16; i++ { 724 for j := 0; j < 16; j++ { 725 if j == i { 726 continue 727 } 728 var a [16]byte 729 m := make(map[uint16]struct{}, 1<<16) 730 for n := 0; n < 1<<16; n++ { 731 a[i] = byte(n) 732 a[j] = byte(n >> 8) 733 m[uint16(BytesHash(a[:], 0))] = struct{}{} 734 } 735 if len(m) <= 1<<15 { 736 t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m)) 737 } 738 } 739 } 740 }