github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/runtime/hash_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 ) 15 16 // Smhasher is a torture test for hash functions. 17 // https://code.google.com/p/smhasher/ 18 // This code is a port of some of the Smhasher tests to Go. 19 // 20 // The current AES hash function passes Smhasher. Our fallback 21 // hash functions don't, so we only enable the difficult tests when 22 // we know the AES implementation is available. 23 24 // Sanity checks. 25 // hash should not depend on values outside key. 26 // hash should not depend on alignment. 27 func TestSmhasherSanity(t *testing.T) { 28 r := rand.New(rand.NewSource(1234)) 29 const REP = 10 30 const KEYMAX = 128 31 const PAD = 16 32 const OFFMAX = 16 33 for k := 0; k < REP; k++ { 34 for n := 0; n < KEYMAX; n++ { 35 for i := 0; i < OFFMAX; i++ { 36 var b [KEYMAX + OFFMAX + 2*PAD]byte 37 var c [KEYMAX + OFFMAX + 2*PAD]byte 38 randBytes(r, b[:]) 39 randBytes(r, c[:]) 40 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 41 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 42 t.Errorf("hash depends on bytes outside key") 43 } 44 } 45 } 46 } 47 } 48 49 type HashSet struct { 50 m map[uintptr]struct{} // set of hashes added 51 n int // number of hashes added 52 } 53 54 func newHashSet() *HashSet { 55 return &HashSet{make(map[uintptr]struct{}), 0} 56 } 57 func (s *HashSet) add(h uintptr) { 58 s.m[h] = struct{}{} 59 s.n++ 60 } 61 func (s *HashSet) addS(x string) { 62 s.add(StringHash(x, 0)) 63 } 64 func (s *HashSet) addB(x []byte) { 65 s.add(BytesHash(x, 0)) 66 } 67 func (s *HashSet) addS_seed(x string, seed uintptr) { 68 s.add(StringHash(x, seed)) 69 } 70 func (s *HashSet) check(t *testing.T) { 71 const SLOP = 10.0 72 collisions := s.n - len(s.m) 73 //fmt.Printf("%d/%d\n", len(s.m), s.n) 74 pairs := int64(s.n) * int64(s.n-1) / 2 75 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 76 stddev := math.Sqrt(expected) 77 if float64(collisions) > expected+SLOP*3*stddev { 78 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 79 } 80 } 81 82 // a string plus adding zeros must make distinct hashes 83 func TestSmhasherAppendedZeros(t *testing.T) { 84 s := "hello" + strings.Repeat("\x00", 256) 85 h := newHashSet() 86 for i := 0; i <= len(s); i++ { 87 h.addS(s[:i]) 88 } 89 h.check(t) 90 } 91 92 // All 0-3 byte strings have distinct hashes. 93 func TestSmhasherSmallKeys(t *testing.T) { 94 h := newHashSet() 95 var b [3]byte 96 for i := 0; i < 256; i++ { 97 b[0] = byte(i) 98 h.addB(b[:1]) 99 for j := 0; j < 256; j++ { 100 b[1] = byte(j) 101 h.addB(b[:2]) 102 if !testing.Short() { 103 for k := 0; k < 256; k++ { 104 b[2] = byte(k) 105 h.addB(b[:3]) 106 } 107 } 108 } 109 } 110 h.check(t) 111 } 112 113 // Different length strings of all zeros have distinct hashes. 114 func TestSmhasherZeros(t *testing.T) { 115 N := 256 * 1024 116 if testing.Short() { 117 N = 1024 118 } 119 h := newHashSet() 120 b := make([]byte, N) 121 for i := 0; i <= N; i++ { 122 h.addB(b[:i]) 123 } 124 h.check(t) 125 } 126 127 // Strings with up to two nonzero bytes all have distinct hashes. 128 func TestSmhasherTwoNonzero(t *testing.T) { 129 if testing.Short() { 130 t.Skip("Skipping in short mode") 131 } 132 h := newHashSet() 133 for n := 2; n <= 16; n++ { 134 twoNonZero(h, n) 135 } 136 h.check(t) 137 } 138 func twoNonZero(h *HashSet, n int) { 139 b := make([]byte, n) 140 141 // all zero 142 h.addB(b[:]) 143 144 // one non-zero byte 145 for i := 0; i < n; i++ { 146 for x := 1; x < 256; x++ { 147 b[i] = byte(x) 148 h.addB(b[:]) 149 b[i] = 0 150 } 151 } 152 153 // two non-zero bytes 154 for i := 0; i < n; i++ { 155 for x := 1; x < 256; x++ { 156 b[i] = byte(x) 157 for j := i + 1; j < n; j++ { 158 for y := 1; y < 256; y++ { 159 b[j] = byte(y) 160 h.addB(b[:]) 161 b[j] = 0 162 } 163 } 164 b[i] = 0 165 } 166 } 167 } 168 169 // Test strings with repeats, like "abcdabcdabcdabcd..." 170 func TestSmhasherCyclic(t *testing.T) { 171 if testing.Short() { 172 t.Skip("Skipping in short mode") 173 } 174 if !HaveGoodHash() { 175 t.Skip("fallback hash not good enough for this test") 176 } 177 r := rand.New(rand.NewSource(1234)) 178 const REPEAT = 8 179 const N = 1000000 180 for n := 4; n <= 12; n++ { 181 h := newHashSet() 182 b := make([]byte, REPEAT*n) 183 for i := 0; i < N; i++ { 184 b[0] = byte(i * 79 % 97) 185 b[1] = byte(i * 43 % 137) 186 b[2] = byte(i * 151 % 197) 187 b[3] = byte(i * 199 % 251) 188 randBytes(r, b[4:n]) 189 for j := n; j < n*REPEAT; j++ { 190 b[j] = b[j-n] 191 } 192 h.addB(b) 193 } 194 h.check(t) 195 } 196 } 197 198 // Test strings with only a few bits set 199 func TestSmhasherSparse(t *testing.T) { 200 if testing.Short() { 201 t.Skip("Skipping in short mode") 202 } 203 sparse(t, 32, 6) 204 sparse(t, 40, 6) 205 sparse(t, 48, 5) 206 sparse(t, 56, 5) 207 sparse(t, 64, 5) 208 sparse(t, 96, 4) 209 sparse(t, 256, 3) 210 sparse(t, 2048, 2) 211 } 212 func sparse(t *testing.T, n int, k int) { 213 b := make([]byte, n/8) 214 h := newHashSet() 215 setbits(h, b, 0, k) 216 h.check(t) 217 } 218 219 // set up to k bits at index i and greater 220 func setbits(h *HashSet, b []byte, i int, k int) { 221 h.addB(b) 222 if k == 0 { 223 return 224 } 225 for j := i; j < len(b)*8; j++ { 226 b[j/8] |= byte(1 << uint(j&7)) 227 setbits(h, b, j+1, k-1) 228 b[j/8] &= byte(^(1 << uint(j&7))) 229 } 230 } 231 232 // Test all possible combinations of n blocks from the set s. 233 // "permutation" is a bad name here, but it is what Smhasher uses. 234 func TestSmhasherPermutation(t *testing.T) { 235 if testing.Short() { 236 t.Skip("Skipping in short mode") 237 } 238 if !HaveGoodHash() { 239 t.Skip("fallback hash not good enough for this test") 240 } 241 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 242 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 243 permutation(t, []uint32{0, 1}, 20) 244 permutation(t, []uint32{0, 1 << 31}, 20) 245 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 246 } 247 func permutation(t *testing.T, s []uint32, n int) { 248 b := make([]byte, n*4) 249 h := newHashSet() 250 genPerm(h, b, s, 0) 251 h.check(t) 252 } 253 func genPerm(h *HashSet, b []byte, s []uint32, n int) { 254 h.addB(b[:n]) 255 if n == len(b) { 256 return 257 } 258 for _, v := range s { 259 b[n] = byte(v) 260 b[n+1] = byte(v >> 8) 261 b[n+2] = byte(v >> 16) 262 b[n+3] = byte(v >> 24) 263 genPerm(h, b, s, n+4) 264 } 265 } 266 267 type Key interface { 268 clear() // set bits all to 0 269 random(r *rand.Rand) // set key to something random 270 bits() int // how many bits key has 271 flipBit(i int) // flip bit i of the key 272 hash() uintptr // hash the key 273 name() string // for error reporting 274 } 275 276 type BytesKey struct { 277 b []byte 278 } 279 280 func (k *BytesKey) clear() { 281 for i := range k.b { 282 k.b[i] = 0 283 } 284 } 285 func (k *BytesKey) random(r *rand.Rand) { 286 randBytes(r, k.b) 287 } 288 func (k *BytesKey) bits() int { 289 return len(k.b) * 8 290 } 291 func (k *BytesKey) flipBit(i int) { 292 k.b[i>>3] ^= byte(1 << uint(i&7)) 293 } 294 func (k *BytesKey) hash() uintptr { 295 return BytesHash(k.b, 0) 296 } 297 func (k *BytesKey) name() string { 298 return fmt.Sprintf("bytes%d", len(k.b)) 299 } 300 301 type Int32Key struct { 302 i uint32 303 } 304 305 func (k *Int32Key) clear() { 306 k.i = 0 307 } 308 func (k *Int32Key) random(r *rand.Rand) { 309 k.i = r.Uint32() 310 } 311 func (k *Int32Key) bits() int { 312 return 32 313 } 314 func (k *Int32Key) flipBit(i int) { 315 k.i ^= 1 << uint(i) 316 } 317 func (k *Int32Key) hash() uintptr { 318 return Int32Hash(k.i, 0) 319 } 320 func (k *Int32Key) name() string { 321 return "int32" 322 } 323 324 type Int64Key struct { 325 i uint64 326 } 327 328 func (k *Int64Key) clear() { 329 k.i = 0 330 } 331 func (k *Int64Key) random(r *rand.Rand) { 332 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 333 } 334 func (k *Int64Key) bits() int { 335 return 64 336 } 337 func (k *Int64Key) flipBit(i int) { 338 k.i ^= 1 << uint(i) 339 } 340 func (k *Int64Key) hash() uintptr { 341 return Int64Hash(k.i, 0) 342 } 343 func (k *Int64Key) name() string { 344 return "int64" 345 } 346 347 type EfaceKey struct { 348 i interface{} 349 } 350 351 func (k *EfaceKey) clear() { 352 k.i = nil 353 } 354 func (k *EfaceKey) random(r *rand.Rand) { 355 k.i = uint64(r.Int63()) 356 } 357 func (k *EfaceKey) bits() int { 358 // use 64 bits. This tests inlined interfaces 359 // on 64-bit targets and indirect interfaces on 360 // 32-bit targets. 361 return 64 362 } 363 func (k *EfaceKey) flipBit(i int) { 364 k.i = k.i.(uint64) ^ uint64(1)<<uint(i) 365 } 366 func (k *EfaceKey) hash() uintptr { 367 return EfaceHash(k.i, 0) 368 } 369 func (k *EfaceKey) name() string { 370 return "Eface" 371 } 372 373 type IfaceKey struct { 374 i interface { 375 F() 376 } 377 } 378 type fInter uint64 379 380 func (x fInter) F() { 381 } 382 383 func (k *IfaceKey) clear() { 384 k.i = nil 385 } 386 func (k *IfaceKey) random(r *rand.Rand) { 387 k.i = fInter(r.Int63()) 388 } 389 func (k *IfaceKey) bits() int { 390 // use 64 bits. This tests inlined interfaces 391 // on 64-bit targets and indirect interfaces on 392 // 32-bit targets. 393 return 64 394 } 395 func (k *IfaceKey) flipBit(i int) { 396 k.i = k.i.(fInter) ^ fInter(1)<<uint(i) 397 } 398 func (k *IfaceKey) hash() uintptr { 399 return IfaceHash(k.i, 0) 400 } 401 func (k *IfaceKey) name() string { 402 return "Iface" 403 } 404 405 // Flipping a single bit of a key should flip each output bit with 50% probability. 406 func TestSmhasherAvalanche(t *testing.T) { 407 if !HaveGoodHash() { 408 t.Skip("fallback hash not good enough for this test") 409 } 410 if testing.Short() { 411 t.Skip("Skipping in short mode") 412 } 413 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 414 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 415 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 416 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 417 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 418 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 419 avalancheTest1(t, &Int32Key{}) 420 avalancheTest1(t, &Int64Key{}) 421 avalancheTest1(t, &EfaceKey{}) 422 avalancheTest1(t, &IfaceKey{}) 423 } 424 func avalancheTest1(t *testing.T, k Key) { 425 const REP = 100000 426 r := rand.New(rand.NewSource(1234)) 427 n := k.bits() 428 429 // grid[i][j] is a count of whether flipping 430 // input bit i affects output bit j. 431 grid := make([][hashSize]int, n) 432 433 for z := 0; z < REP; z++ { 434 // pick a random key, hash it 435 k.random(r) 436 h := k.hash() 437 438 // flip each bit, hash & compare the results 439 for i := 0; i < n; i++ { 440 k.flipBit(i) 441 d := h ^ k.hash() 442 k.flipBit(i) 443 444 // record the effects of that bit flip 445 g := &grid[i] 446 for j := 0; j < hashSize; j++ { 447 g[j] += int(d & 1) 448 d >>= 1 449 } 450 } 451 } 452 453 // Each entry in the grid should be about REP/2. 454 // More precisely, we did N = k.bits() * hashSize experiments where 455 // each is the sum of REP coin flips. We want to find bounds on the 456 // sum of coin flips such that a truly random experiment would have 457 // all sums inside those bounds with 99% probability. 458 N := n * hashSize 459 var c float64 460 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 461 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 462 } 463 c *= 4.0 // allowed slack - we don't need to be perfectly random 464 mean := .5 * REP 465 stddev := .5 * math.Sqrt(REP) 466 low := int(mean - c*stddev) 467 high := int(mean + c*stddev) 468 for i := 0; i < n; i++ { 469 for j := 0; j < hashSize; j++ { 470 x := grid[i][j] 471 if x < low || x > high { 472 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 473 } 474 } 475 } 476 } 477 478 // All bit rotations of a set of distinct keys 479 func TestSmhasherWindowed(t *testing.T) { 480 windowed(t, &Int32Key{}) 481 windowed(t, &Int64Key{}) 482 windowed(t, &BytesKey{make([]byte, 128)}) 483 } 484 func windowed(t *testing.T, k Key) { 485 if testing.Short() { 486 t.Skip("Skipping in short mode") 487 } 488 const BITS = 16 489 490 for r := 0; r < k.bits(); r++ { 491 h := newHashSet() 492 for i := 0; i < 1<<BITS; i++ { 493 k.clear() 494 for j := 0; j < BITS; j++ { 495 if i>>uint(j)&1 != 0 { 496 k.flipBit((j + r) % k.bits()) 497 } 498 } 499 h.add(k.hash()) 500 } 501 h.check(t) 502 } 503 } 504 505 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 506 func TestSmhasherText(t *testing.T) { 507 if testing.Short() { 508 t.Skip("Skipping in short mode") 509 } 510 text(t, "Foo", "Bar") 511 text(t, "FooBar", "") 512 text(t, "", "FooBar") 513 } 514 func text(t *testing.T, prefix, suffix string) { 515 const N = 4 516 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 517 const L = len(S) 518 b := make([]byte, len(prefix)+N+len(suffix)) 519 copy(b, prefix) 520 copy(b[len(prefix)+N:], suffix) 521 h := newHashSet() 522 c := b[len(prefix):] 523 for i := 0; i < L; i++ { 524 c[0] = S[i] 525 for j := 0; j < L; j++ { 526 c[1] = S[j] 527 for k := 0; k < L; k++ { 528 c[2] = S[k] 529 for x := 0; x < L; x++ { 530 c[3] = S[x] 531 h.addB(b) 532 } 533 } 534 } 535 } 536 h.check(t) 537 } 538 539 // Make sure different seed values generate different hashes. 540 func TestSmhasherSeed(t *testing.T) { 541 h := newHashSet() 542 const N = 100000 543 s := "hello" 544 for i := 0; i < N; i++ { 545 h.addS_seed(s, uintptr(i)) 546 } 547 h.check(t) 548 } 549 550 // size of the hash output (32 or 64 bits) 551 const hashSize = 32 + int(^uintptr(0)>>63<<5) 552 553 func randBytes(r *rand.Rand, b []byte) { 554 for i := range b { 555 b[i] = byte(r.Uint32()) 556 } 557 } 558 559 func benchmarkHash(b *testing.B, n int) { 560 s := strings.Repeat("A", n) 561 562 for i := 0; i < b.N; i++ { 563 StringHash(s, 0) 564 } 565 b.SetBytes(int64(n)) 566 } 567 568 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 569 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 570 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 571 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 572 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }