github.com/panjjo/go@v0.0.0-20161104043856-d62b31386338/src/runtime/hash_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 "unsafe" 15 ) 16 17 // Smhasher is a torture test for hash functions. 18 // https://code.google.com/p/smhasher/ 19 // This code is a port of some of the Smhasher tests to Go. 20 // 21 // The current AES hash function passes Smhasher. Our fallback 22 // hash functions don't, so we only enable the difficult tests when 23 // we know the AES implementation is available. 24 25 // Sanity checks. 26 // hash should not depend on values outside key. 27 // hash should not depend on alignment. 28 func TestSmhasherSanity(t *testing.T) { 29 r := rand.New(rand.NewSource(1234)) 30 const REP = 10 31 const KEYMAX = 128 32 const PAD = 16 33 const OFFMAX = 16 34 for k := 0; k < REP; k++ { 35 for n := 0; n < KEYMAX; n++ { 36 for i := 0; i < OFFMAX; i++ { 37 var b [KEYMAX + OFFMAX + 2*PAD]byte 38 var c [KEYMAX + OFFMAX + 2*PAD]byte 39 randBytes(r, b[:]) 40 randBytes(r, c[:]) 41 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 42 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 43 t.Errorf("hash depends on bytes outside key") 44 } 45 } 46 } 47 } 48 } 49 50 type HashSet struct { 51 m map[uintptr]struct{} // set of hashes added 52 n int // number of hashes added 53 } 54 55 func newHashSet() *HashSet { 56 return &HashSet{make(map[uintptr]struct{}), 0} 57 } 58 func (s *HashSet) add(h uintptr) { 59 s.m[h] = struct{}{} 60 s.n++ 61 } 62 func (s *HashSet) addS(x string) { 63 s.add(StringHash(x, 0)) 64 } 65 func (s *HashSet) addB(x []byte) { 66 s.add(BytesHash(x, 0)) 67 } 68 func (s *HashSet) addS_seed(x string, seed uintptr) { 69 s.add(StringHash(x, seed)) 70 } 71 func (s *HashSet) check(t *testing.T) { 72 const SLOP = 10.0 73 collisions := s.n - len(s.m) 74 //fmt.Printf("%d/%d\n", len(s.m), s.n) 75 pairs := int64(s.n) * int64(s.n-1) / 2 76 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 77 stddev := math.Sqrt(expected) 78 if float64(collisions) > expected+SLOP*(3*stddev+1) { 79 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 80 } 81 } 82 83 // a string plus adding zeros must make distinct hashes 84 func TestSmhasherAppendedZeros(t *testing.T) { 85 s := "hello" + strings.Repeat("\x00", 256) 86 h := newHashSet() 87 for i := 0; i <= len(s); i++ { 88 h.addS(s[:i]) 89 } 90 h.check(t) 91 } 92 93 // All 0-3 byte strings have distinct hashes. 94 func TestSmhasherSmallKeys(t *testing.T) { 95 h := newHashSet() 96 var b [3]byte 97 for i := 0; i < 256; i++ { 98 b[0] = byte(i) 99 h.addB(b[:1]) 100 for j := 0; j < 256; j++ { 101 b[1] = byte(j) 102 h.addB(b[:2]) 103 if !testing.Short() { 104 for k := 0; k < 256; k++ { 105 b[2] = byte(k) 106 h.addB(b[:3]) 107 } 108 } 109 } 110 } 111 h.check(t) 112 } 113 114 // Different length strings of all zeros have distinct hashes. 115 func TestSmhasherZeros(t *testing.T) { 116 N := 256 * 1024 117 if testing.Short() { 118 N = 1024 119 } 120 h := newHashSet() 121 b := make([]byte, N) 122 for i := 0; i <= N; i++ { 123 h.addB(b[:i]) 124 } 125 h.check(t) 126 } 127 128 // Strings with up to two nonzero bytes all have distinct hashes. 129 func TestSmhasherTwoNonzero(t *testing.T) { 130 if testing.Short() { 131 t.Skip("Skipping in short mode") 132 } 133 h := newHashSet() 134 for n := 2; n <= 16; n++ { 135 twoNonZero(h, n) 136 } 137 h.check(t) 138 } 139 func twoNonZero(h *HashSet, n int) { 140 b := make([]byte, n) 141 142 // all zero 143 h.addB(b[:]) 144 145 // one non-zero byte 146 for i := 0; i < n; i++ { 147 for x := 1; x < 256; x++ { 148 b[i] = byte(x) 149 h.addB(b[:]) 150 b[i] = 0 151 } 152 } 153 154 // two non-zero bytes 155 for i := 0; i < n; i++ { 156 for x := 1; x < 256; x++ { 157 b[i] = byte(x) 158 for j := i + 1; j < n; j++ { 159 for y := 1; y < 256; y++ { 160 b[j] = byte(y) 161 h.addB(b[:]) 162 b[j] = 0 163 } 164 } 165 b[i] = 0 166 } 167 } 168 } 169 170 // Test strings with repeats, like "abcdabcdabcdabcd..." 171 func TestSmhasherCyclic(t *testing.T) { 172 if testing.Short() { 173 t.Skip("Skipping in short mode") 174 } 175 r := rand.New(rand.NewSource(1234)) 176 const REPEAT = 8 177 const N = 1000000 178 for n := 4; n <= 12; n++ { 179 h := newHashSet() 180 b := make([]byte, REPEAT*n) 181 for i := 0; i < N; i++ { 182 b[0] = byte(i * 79 % 97) 183 b[1] = byte(i * 43 % 137) 184 b[2] = byte(i * 151 % 197) 185 b[3] = byte(i * 199 % 251) 186 randBytes(r, b[4:n]) 187 for j := n; j < n*REPEAT; j++ { 188 b[j] = b[j-n] 189 } 190 h.addB(b) 191 } 192 h.check(t) 193 } 194 } 195 196 // Test strings with only a few bits set 197 func TestSmhasherSparse(t *testing.T) { 198 if testing.Short() { 199 t.Skip("Skipping in short mode") 200 } 201 sparse(t, 32, 6) 202 sparse(t, 40, 6) 203 sparse(t, 48, 5) 204 sparse(t, 56, 5) 205 sparse(t, 64, 5) 206 sparse(t, 96, 4) 207 sparse(t, 256, 3) 208 sparse(t, 2048, 2) 209 } 210 func sparse(t *testing.T, n int, k int) { 211 b := make([]byte, n/8) 212 h := newHashSet() 213 setbits(h, b, 0, k) 214 h.check(t) 215 } 216 217 // set up to k bits at index i and greater 218 func setbits(h *HashSet, b []byte, i int, k int) { 219 h.addB(b) 220 if k == 0 { 221 return 222 } 223 for j := i; j < len(b)*8; j++ { 224 b[j/8] |= byte(1 << uint(j&7)) 225 setbits(h, b, j+1, k-1) 226 b[j/8] &= byte(^(1 << uint(j&7))) 227 } 228 } 229 230 // Test all possible combinations of n blocks from the set s. 231 // "permutation" is a bad name here, but it is what Smhasher uses. 232 func TestSmhasherPermutation(t *testing.T) { 233 if testing.Short() { 234 t.Skip("Skipping in short mode") 235 } 236 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 237 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 238 permutation(t, []uint32{0, 1}, 20) 239 permutation(t, []uint32{0, 1 << 31}, 20) 240 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 241 } 242 func permutation(t *testing.T, s []uint32, n int) { 243 b := make([]byte, n*4) 244 h := newHashSet() 245 genPerm(h, b, s, 0) 246 h.check(t) 247 } 248 func genPerm(h *HashSet, b []byte, s []uint32, n int) { 249 h.addB(b[:n]) 250 if n == len(b) { 251 return 252 } 253 for _, v := range s { 254 b[n] = byte(v) 255 b[n+1] = byte(v >> 8) 256 b[n+2] = byte(v >> 16) 257 b[n+3] = byte(v >> 24) 258 genPerm(h, b, s, n+4) 259 } 260 } 261 262 type Key interface { 263 clear() // set bits all to 0 264 random(r *rand.Rand) // set key to something random 265 bits() int // how many bits key has 266 flipBit(i int) // flip bit i of the key 267 hash() uintptr // hash the key 268 name() string // for error reporting 269 } 270 271 type BytesKey struct { 272 b []byte 273 } 274 275 func (k *BytesKey) clear() { 276 for i := range k.b { 277 k.b[i] = 0 278 } 279 } 280 func (k *BytesKey) random(r *rand.Rand) { 281 randBytes(r, k.b) 282 } 283 func (k *BytesKey) bits() int { 284 return len(k.b) * 8 285 } 286 func (k *BytesKey) flipBit(i int) { 287 k.b[i>>3] ^= byte(1 << uint(i&7)) 288 } 289 func (k *BytesKey) hash() uintptr { 290 return BytesHash(k.b, 0) 291 } 292 func (k *BytesKey) name() string { 293 return fmt.Sprintf("bytes%d", len(k.b)) 294 } 295 296 type Int32Key struct { 297 i uint32 298 } 299 300 func (k *Int32Key) clear() { 301 k.i = 0 302 } 303 func (k *Int32Key) random(r *rand.Rand) { 304 k.i = r.Uint32() 305 } 306 func (k *Int32Key) bits() int { 307 return 32 308 } 309 func (k *Int32Key) flipBit(i int) { 310 k.i ^= 1 << uint(i) 311 } 312 func (k *Int32Key) hash() uintptr { 313 return Int32Hash(k.i, 0) 314 } 315 func (k *Int32Key) name() string { 316 return "int32" 317 } 318 319 type Int64Key struct { 320 i uint64 321 } 322 323 func (k *Int64Key) clear() { 324 k.i = 0 325 } 326 func (k *Int64Key) random(r *rand.Rand) { 327 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 328 } 329 func (k *Int64Key) bits() int { 330 return 64 331 } 332 func (k *Int64Key) flipBit(i int) { 333 k.i ^= 1 << uint(i) 334 } 335 func (k *Int64Key) hash() uintptr { 336 return Int64Hash(k.i, 0) 337 } 338 func (k *Int64Key) name() string { 339 return "int64" 340 } 341 342 type EfaceKey struct { 343 i interface{} 344 } 345 346 func (k *EfaceKey) clear() { 347 k.i = nil 348 } 349 func (k *EfaceKey) random(r *rand.Rand) { 350 k.i = uint64(r.Int63()) 351 } 352 func (k *EfaceKey) bits() int { 353 // use 64 bits. This tests inlined interfaces 354 // on 64-bit targets and indirect interfaces on 355 // 32-bit targets. 356 return 64 357 } 358 func (k *EfaceKey) flipBit(i int) { 359 k.i = k.i.(uint64) ^ uint64(1)<<uint(i) 360 } 361 func (k *EfaceKey) hash() uintptr { 362 return EfaceHash(k.i, 0) 363 } 364 func (k *EfaceKey) name() string { 365 return "Eface" 366 } 367 368 type IfaceKey struct { 369 i interface { 370 F() 371 } 372 } 373 type fInter uint64 374 375 func (x fInter) F() { 376 } 377 378 func (k *IfaceKey) clear() { 379 k.i = nil 380 } 381 func (k *IfaceKey) random(r *rand.Rand) { 382 k.i = fInter(r.Int63()) 383 } 384 func (k *IfaceKey) bits() int { 385 // use 64 bits. This tests inlined interfaces 386 // on 64-bit targets and indirect interfaces on 387 // 32-bit targets. 388 return 64 389 } 390 func (k *IfaceKey) flipBit(i int) { 391 k.i = k.i.(fInter) ^ fInter(1)<<uint(i) 392 } 393 func (k *IfaceKey) hash() uintptr { 394 return IfaceHash(k.i, 0) 395 } 396 func (k *IfaceKey) name() string { 397 return "Iface" 398 } 399 400 // Flipping a single bit of a key should flip each output bit with 50% probability. 401 func TestSmhasherAvalanche(t *testing.T) { 402 if testing.Short() { 403 t.Skip("Skipping in short mode") 404 } 405 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 406 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 407 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 408 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 409 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 410 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 411 avalancheTest1(t, &Int32Key{}) 412 avalancheTest1(t, &Int64Key{}) 413 avalancheTest1(t, &EfaceKey{}) 414 avalancheTest1(t, &IfaceKey{}) 415 } 416 func avalancheTest1(t *testing.T, k Key) { 417 const REP = 100000 418 r := rand.New(rand.NewSource(1234)) 419 n := k.bits() 420 421 // grid[i][j] is a count of whether flipping 422 // input bit i affects output bit j. 423 grid := make([][hashSize]int, n) 424 425 for z := 0; z < REP; z++ { 426 // pick a random key, hash it 427 k.random(r) 428 h := k.hash() 429 430 // flip each bit, hash & compare the results 431 for i := 0; i < n; i++ { 432 k.flipBit(i) 433 d := h ^ k.hash() 434 k.flipBit(i) 435 436 // record the effects of that bit flip 437 g := &grid[i] 438 for j := 0; j < hashSize; j++ { 439 g[j] += int(d & 1) 440 d >>= 1 441 } 442 } 443 } 444 445 // Each entry in the grid should be about REP/2. 446 // More precisely, we did N = k.bits() * hashSize experiments where 447 // each is the sum of REP coin flips. We want to find bounds on the 448 // sum of coin flips such that a truly random experiment would have 449 // all sums inside those bounds with 99% probability. 450 N := n * hashSize 451 var c float64 452 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 453 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 454 } 455 c *= 4.0 // allowed slack - we don't need to be perfectly random 456 mean := .5 * REP 457 stddev := .5 * math.Sqrt(REP) 458 low := int(mean - c*stddev) 459 high := int(mean + c*stddev) 460 for i := 0; i < n; i++ { 461 for j := 0; j < hashSize; j++ { 462 x := grid[i][j] 463 if x < low || x > high { 464 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 465 } 466 } 467 } 468 } 469 470 // All bit rotations of a set of distinct keys 471 func TestSmhasherWindowed(t *testing.T) { 472 windowed(t, &Int32Key{}) 473 windowed(t, &Int64Key{}) 474 windowed(t, &BytesKey{make([]byte, 128)}) 475 } 476 func windowed(t *testing.T, k Key) { 477 if testing.Short() { 478 t.Skip("Skipping in short mode") 479 } 480 const BITS = 16 481 482 for r := 0; r < k.bits(); r++ { 483 h := newHashSet() 484 for i := 0; i < 1<<BITS; i++ { 485 k.clear() 486 for j := 0; j < BITS; j++ { 487 if i>>uint(j)&1 != 0 { 488 k.flipBit((j + r) % k.bits()) 489 } 490 } 491 h.add(k.hash()) 492 } 493 h.check(t) 494 } 495 } 496 497 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 498 func TestSmhasherText(t *testing.T) { 499 if testing.Short() { 500 t.Skip("Skipping in short mode") 501 } 502 text(t, "Foo", "Bar") 503 text(t, "FooBar", "") 504 text(t, "", "FooBar") 505 } 506 func text(t *testing.T, prefix, suffix string) { 507 const N = 4 508 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 509 const L = len(S) 510 b := make([]byte, len(prefix)+N+len(suffix)) 511 copy(b, prefix) 512 copy(b[len(prefix)+N:], suffix) 513 h := newHashSet() 514 c := b[len(prefix):] 515 for i := 0; i < L; i++ { 516 c[0] = S[i] 517 for j := 0; j < L; j++ { 518 c[1] = S[j] 519 for k := 0; k < L; k++ { 520 c[2] = S[k] 521 for x := 0; x < L; x++ { 522 c[3] = S[x] 523 h.addB(b) 524 } 525 } 526 } 527 } 528 h.check(t) 529 } 530 531 // Make sure different seed values generate different hashes. 532 func TestSmhasherSeed(t *testing.T) { 533 h := newHashSet() 534 const N = 100000 535 s := "hello" 536 for i := 0; i < N; i++ { 537 h.addS_seed(s, uintptr(i)) 538 } 539 h.check(t) 540 } 541 542 // size of the hash output (32 or 64 bits) 543 const hashSize = 32 + int(^uintptr(0)>>63<<5) 544 545 func randBytes(r *rand.Rand, b []byte) { 546 for i := range b { 547 b[i] = byte(r.Uint32()) 548 } 549 } 550 551 func benchmarkHash(b *testing.B, n int) { 552 s := strings.Repeat("A", n) 553 554 for i := 0; i < b.N; i++ { 555 StringHash(s, 0) 556 } 557 b.SetBytes(int64(n)) 558 } 559 560 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 561 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 562 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 563 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 564 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } 565 566 func TestArrayHash(t *testing.T) { 567 // Make sure that "" in arrays hash correctly. The hash 568 // should at least scramble the input seed so that, e.g., 569 // {"","foo"} and {"foo",""} have different hashes. 570 571 // If the hash is bad, then all (8 choose 4) = 70 keys 572 // have the same hash. If so, we allocate 70/8 = 8 573 // overflow buckets. If the hash is good we don't 574 // normally allocate any overflow buckets, and the 575 // probability of even one or two overflows goes down rapidly. 576 // (There is always 1 allocation of the bucket array. The map 577 // header is allocated on the stack.) 578 f := func() { 579 // Make the key type at most 128 bytes. Otherwise, 580 // we get an allocation per key. 581 type key [8]string 582 m := make(map[key]bool, 70) 583 584 // fill m with keys that have 4 "foo"s and 4 ""s. 585 for i := 0; i < 256; i++ { 586 var k key 587 cnt := 0 588 for j := uint(0); j < 8; j++ { 589 if i>>j&1 != 0 { 590 k[j] = "foo" 591 cnt++ 592 } 593 } 594 if cnt == 4 { 595 m[k] = true 596 } 597 } 598 if len(m) != 70 { 599 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 600 } 601 } 602 if n := testing.AllocsPerRun(10, f); n > 6 { 603 t.Errorf("too many allocs %f - hash not balanced", n) 604 } 605 } 606 func TestStructHash(t *testing.T) { 607 // See the comment in TestArrayHash. 608 f := func() { 609 type key struct { 610 a, b, c, d, e, f, g, h string 611 } 612 m := make(map[key]bool, 70) 613 614 // fill m with keys that have 4 "foo"s and 4 ""s. 615 for i := 0; i < 256; i++ { 616 var k key 617 cnt := 0 618 if i&1 != 0 { 619 k.a = "foo" 620 cnt++ 621 } 622 if i&2 != 0 { 623 k.b = "foo" 624 cnt++ 625 } 626 if i&4 != 0 { 627 k.c = "foo" 628 cnt++ 629 } 630 if i&8 != 0 { 631 k.d = "foo" 632 cnt++ 633 } 634 if i&16 != 0 { 635 k.e = "foo" 636 cnt++ 637 } 638 if i&32 != 0 { 639 k.f = "foo" 640 cnt++ 641 } 642 if i&64 != 0 { 643 k.g = "foo" 644 cnt++ 645 } 646 if i&128 != 0 { 647 k.h = "foo" 648 cnt++ 649 } 650 if cnt == 4 { 651 m[k] = true 652 } 653 } 654 if len(m) != 70 { 655 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 656 } 657 } 658 if n := testing.AllocsPerRun(10, f); n > 6 { 659 t.Errorf("too many allocs %f - hash not balanced", n) 660 } 661 } 662 663 var sink uint64 664 665 func BenchmarkAlignedLoad(b *testing.B) { 666 var buf [16]byte 667 p := unsafe.Pointer(&buf[0]) 668 var s uint64 669 for i := 0; i < b.N; i++ { 670 s += ReadUnaligned64(p) 671 } 672 sink = s 673 } 674 675 func BenchmarkUnalignedLoad(b *testing.B) { 676 var buf [16]byte 677 p := unsafe.Pointer(&buf[1]) 678 var s uint64 679 for i := 0; i < b.N; i++ { 680 s += ReadUnaligned64(p) 681 } 682 sink = s 683 } 684 685 func TestCollisions(t *testing.T) { 686 if testing.Short() { 687 t.Skip("Skipping in short mode") 688 } 689 for i := 0; i < 16; i++ { 690 for j := 0; j < 16; j++ { 691 if j == i { 692 continue 693 } 694 var a [16]byte 695 m := make(map[uint16]struct{}, 1<<16) 696 for n := 0; n < 1<<16; n++ { 697 a[i] = byte(n) 698 a[j] = byte(n >> 8) 699 m[uint16(BytesHash(a[:], 0))] = struct{}{} 700 } 701 if len(m) <= 1<<15 { 702 t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m)) 703 } 704 } 705 } 706 }