github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/runtime/hash_test.go (about) 1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 ) 15 16 // Smhasher is a torture test for hash functions. 17 // https://code.google.com/p/smhasher/ 18 // This code is a port of some of the Smhasher tests to Go. 19 // 20 // The current AES hash function passes Smhasher. Our fallback 21 // hash functions don't, so we only enable the difficult tests when 22 // we know the AES implementation is available. 23 24 // Sanity checks. 25 // hash should not depend on values outside key. 26 // hash should not depend on alignment. 27 func TestSmhasherSanity(t *testing.T) { 28 r := rand.New(rand.NewSource(1234)) 29 const REP = 10 30 const KEYMAX = 128 31 const PAD = 16 32 const OFFMAX = 16 33 for k := 0; k < REP; k++ { 34 for n := 0; n < KEYMAX; n++ { 35 for i := 0; i < OFFMAX; i++ { 36 var b [KEYMAX + OFFMAX + 2*PAD]byte 37 var c [KEYMAX + OFFMAX + 2*PAD]byte 38 randBytes(r, b[:]) 39 randBytes(r, c[:]) 40 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 41 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 42 t.Errorf("hash depends on bytes outside key") 43 } 44 } 45 } 46 } 47 } 48 49 type HashSet struct { 50 m map[uintptr]struct{} // set of hashes added 51 n int // number of hashes added 52 } 53 54 func newHashSet() *HashSet { 55 return &HashSet{make(map[uintptr]struct{}), 0} 56 } 57 func (s *HashSet) add(h uintptr) { 58 s.m[h] = struct{}{} 59 s.n++ 60 } 61 func (s *HashSet) addS(x string) { 62 s.add(StringHash(x, 0)) 63 } 64 func (s *HashSet) addB(x []byte) { 65 s.add(BytesHash(x, 0)) 66 } 67 func (s *HashSet) addS_seed(x string, seed uintptr) { 68 s.add(StringHash(x, seed)) 69 } 70 func (s *HashSet) check(t *testing.T) { 71 const SLOP = 10.0 72 collisions := s.n - len(s.m) 73 //fmt.Printf("%d/%d\n", len(s.m), s.n) 74 pairs := int64(s.n) * int64(s.n-1) / 2 75 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 76 stddev := math.Sqrt(expected) 77 if float64(collisions) > expected+SLOP*3*stddev { 78 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 79 } 80 } 81 82 // a string plus adding zeros must make distinct hashes 83 func TestSmhasherAppendedZeros(t *testing.T) { 84 s := "hello" + strings.Repeat("\x00", 256) 85 h := newHashSet() 86 for i := 0; i <= len(s); i++ { 87 h.addS(s[:i]) 88 } 89 h.check(t) 90 } 91 92 // All 0-3 byte strings have distinct hashes. 93 func TestSmhasherSmallKeys(t *testing.T) { 94 h := newHashSet() 95 var b [3]byte 96 for i := 0; i < 256; i++ { 97 b[0] = byte(i) 98 h.addB(b[:1]) 99 for j := 0; j < 256; j++ { 100 b[1] = byte(j) 101 h.addB(b[:2]) 102 if !testing.Short() { 103 for k := 0; k < 256; k++ { 104 b[2] = byte(k) 105 h.addB(b[:3]) 106 } 107 } 108 } 109 } 110 h.check(t) 111 } 112 113 // Different length strings of all zeros have distinct hashes. 114 func TestSmhasherZeros(t *testing.T) { 115 N := 256 * 1024 116 if testing.Short() { 117 N = 1024 118 } 119 h := newHashSet() 120 b := make([]byte, N) 121 for i := 0; i <= N; i++ { 122 h.addB(b[:i]) 123 } 124 h.check(t) 125 } 126 127 // Strings with up to two nonzero bytes all have distinct hashes. 128 func TestSmhasherTwoNonzero(t *testing.T) { 129 if testing.Short() { 130 t.Skip("Skipping in short mode") 131 } 132 h := newHashSet() 133 for n := 2; n <= 16; n++ { 134 twoNonZero(h, n) 135 } 136 h.check(t) 137 } 138 func twoNonZero(h *HashSet, n int) { 139 b := make([]byte, n) 140 141 // all zero 142 h.addB(b[:]) 143 144 // one non-zero byte 145 for i := 0; i < n; i++ { 146 for x := 1; x < 256; x++ { 147 b[i] = byte(x) 148 h.addB(b[:]) 149 b[i] = 0 150 } 151 } 152 153 // two non-zero bytes 154 for i := 0; i < n; i++ { 155 for x := 1; x < 256; x++ { 156 b[i] = byte(x) 157 for j := i + 1; j < n; j++ { 158 for y := 1; y < 256; y++ { 159 b[j] = byte(y) 160 h.addB(b[:]) 161 b[j] = 0 162 } 163 } 164 b[i] = 0 165 } 166 } 167 } 168 169 // Test strings with repeats, like "abcdabcdabcdabcd..." 170 func TestSmhasherCyclic(t *testing.T) { 171 if testing.Short() { 172 t.Skip("Skipping in short mode") 173 } 174 if !HaveGoodHash() { 175 t.Skip("fallback hash not good enough for this test") 176 } 177 r := rand.New(rand.NewSource(1234)) 178 const REPEAT = 8 179 const N = 1000000 180 for n := 4; n <= 12; n++ { 181 h := newHashSet() 182 b := make([]byte, REPEAT*n) 183 for i := 0; i < N; i++ { 184 b[0] = byte(i * 79 % 97) 185 b[1] = byte(i * 43 % 137) 186 b[2] = byte(i * 151 % 197) 187 b[3] = byte(i * 199 % 251) 188 randBytes(r, b[4:n]) 189 for j := n; j < n*REPEAT; j++ { 190 b[j] = b[j-n] 191 } 192 h.addB(b) 193 } 194 h.check(t) 195 } 196 } 197 198 // Test strings with only a few bits set 199 func TestSmhasherSparse(t *testing.T) { 200 if testing.Short() { 201 t.Skip("Skipping in short mode") 202 } 203 sparse(t, 32, 6) 204 sparse(t, 40, 6) 205 sparse(t, 48, 5) 206 sparse(t, 56, 5) 207 sparse(t, 64, 5) 208 sparse(t, 96, 4) 209 sparse(t, 256, 3) 210 sparse(t, 2048, 2) 211 } 212 func sparse(t *testing.T, n int, k int) { 213 b := make([]byte, n/8) 214 h := newHashSet() 215 setbits(h, b, 0, k) 216 h.check(t) 217 } 218 219 // set up to k bits at index i and greater 220 func setbits(h *HashSet, b []byte, i int, k int) { 221 h.addB(b) 222 if k == 0 { 223 return 224 } 225 for j := i; j < len(b)*8; j++ { 226 b[j/8] |= byte(1 << uint(j&7)) 227 setbits(h, b, j+1, k-1) 228 b[j/8] &= byte(^(1 << uint(j&7))) 229 } 230 } 231 232 // Test all possible combinations of n blocks from the set s. 233 // "permutation" is a bad name here, but it is what Smhasher uses. 234 func TestSmhasherPermutation(t *testing.T) { 235 if testing.Short() { 236 t.Skip("Skipping in short mode") 237 } 238 if !HaveGoodHash() { 239 t.Skip("fallback hash not good enough for this test") 240 } 241 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 242 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 243 permutation(t, []uint32{0, 1}, 20) 244 permutation(t, []uint32{0, 1 << 31}, 20) 245 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 246 } 247 func permutation(t *testing.T, s []uint32, n int) { 248 b := make([]byte, n*4) 249 h := newHashSet() 250 genPerm(h, b, s, 0) 251 h.check(t) 252 } 253 func genPerm(h *HashSet, b []byte, s []uint32, n int) { 254 h.addB(b[:n]) 255 if n == len(b) { 256 return 257 } 258 for _, v := range s { 259 b[n] = byte(v) 260 b[n+1] = byte(v >> 8) 261 b[n+2] = byte(v >> 16) 262 b[n+3] = byte(v >> 24) 263 genPerm(h, b, s, n+4) 264 } 265 } 266 267 type Key interface { 268 clear() // set bits all to 0 269 random(r *rand.Rand) // set key to something random 270 bits() int // how many bits key has 271 flipBit(i int) // flip bit i of the key 272 hash() uintptr // hash the key 273 name() string // for error reporting 274 } 275 276 type BytesKey struct { 277 b []byte 278 } 279 280 func (k *BytesKey) clear() { 281 for i := range k.b { 282 k.b[i] = 0 283 } 284 } 285 func (k *BytesKey) random(r *rand.Rand) { 286 randBytes(r, k.b) 287 } 288 func (k *BytesKey) bits() int { 289 return len(k.b) * 8 290 } 291 func (k *BytesKey) flipBit(i int) { 292 k.b[i>>3] ^= byte(1 << uint(i&7)) 293 } 294 func (k *BytesKey) hash() uintptr { 295 return BytesHash(k.b, 0) 296 } 297 func (k *BytesKey) name() string { 298 return fmt.Sprintf("bytes%d", len(k.b)) 299 } 300 301 type Int32Key struct { 302 i uint32 303 } 304 305 func (k *Int32Key) clear() { 306 k.i = 0 307 } 308 func (k *Int32Key) random(r *rand.Rand) { 309 k.i = r.Uint32() 310 } 311 func (k *Int32Key) bits() int { 312 return 32 313 } 314 func (k *Int32Key) flipBit(i int) { 315 k.i ^= 1 << uint(i) 316 } 317 func (k *Int32Key) hash() uintptr { 318 return Int32Hash(k.i, 0) 319 } 320 func (k *Int32Key) name() string { 321 return "int32" 322 } 323 324 type Int64Key struct { 325 i uint64 326 } 327 328 func (k *Int64Key) clear() { 329 k.i = 0 330 } 331 func (k *Int64Key) random(r *rand.Rand) { 332 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 333 } 334 func (k *Int64Key) bits() int { 335 return 64 336 } 337 func (k *Int64Key) flipBit(i int) { 338 k.i ^= 1 << uint(i) 339 } 340 func (k *Int64Key) hash() uintptr { 341 return Int64Hash(k.i, 0) 342 } 343 func (k *Int64Key) name() string { 344 return "int64" 345 } 346 347 // Flipping a single bit of a key should flip each output bit with 50% probability. 348 func TestSmhasherAvalanche(t *testing.T) { 349 if !HaveGoodHash() { 350 t.Skip("fallback hash not good enough for this test") 351 } 352 if testing.Short() { 353 t.Skip("Skipping in short mode") 354 } 355 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 356 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 357 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 358 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 359 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 360 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 361 avalancheTest1(t, &Int32Key{}) 362 avalancheTest1(t, &Int64Key{}) 363 } 364 func avalancheTest1(t *testing.T, k Key) { 365 const REP = 100000 366 r := rand.New(rand.NewSource(1234)) 367 n := k.bits() 368 369 // grid[i][j] is a count of whether flipping 370 // input bit i affects output bit j. 371 grid := make([][hashSize]int, n) 372 373 for z := 0; z < REP; z++ { 374 // pick a random key, hash it 375 k.random(r) 376 h := k.hash() 377 378 // flip each bit, hash & compare the results 379 for i := 0; i < n; i++ { 380 k.flipBit(i) 381 d := h ^ k.hash() 382 k.flipBit(i) 383 384 // record the effects of that bit flip 385 g := &grid[i] 386 for j := 0; j < hashSize; j++ { 387 g[j] += int(d & 1) 388 d >>= 1 389 } 390 } 391 } 392 393 // Each entry in the grid should be about REP/2. 394 // More precisely, we did N = k.bits() * hashSize experiments where 395 // each is the sum of REP coin flips. We want to find bounds on the 396 // sum of coin flips such that a truly random experiment would have 397 // all sums inside those bounds with 99% probability. 398 N := n * hashSize 399 var c float64 400 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 401 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 402 } 403 c *= 4.0 // allowed slack - we don't need to be perfectly random 404 mean := .5 * REP 405 stddev := .5 * math.Sqrt(REP) 406 low := int(mean - c*stddev) 407 high := int(mean + c*stddev) 408 for i := 0; i < n; i++ { 409 for j := 0; j < hashSize; j++ { 410 x := grid[i][j] 411 if x < low || x > high { 412 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 413 } 414 } 415 } 416 } 417 418 // All bit rotations of a set of distinct keys 419 func TestSmhasherWindowed(t *testing.T) { 420 windowed(t, &Int32Key{}) 421 windowed(t, &Int64Key{}) 422 windowed(t, &BytesKey{make([]byte, 128)}) 423 } 424 func windowed(t *testing.T, k Key) { 425 if testing.Short() { 426 t.Skip("Skipping in short mode") 427 } 428 const BITS = 16 429 430 for r := 0; r < k.bits(); r++ { 431 h := newHashSet() 432 for i := 0; i < 1<<BITS; i++ { 433 k.clear() 434 for j := 0; j < BITS; j++ { 435 if i>>uint(j)&1 != 0 { 436 k.flipBit((j + r) % k.bits()) 437 } 438 } 439 h.add(k.hash()) 440 } 441 h.check(t) 442 } 443 } 444 445 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 446 func TestSmhasherText(t *testing.T) { 447 if testing.Short() { 448 t.Skip("Skipping in short mode") 449 } 450 text(t, "Foo", "Bar") 451 text(t, "FooBar", "") 452 text(t, "", "FooBar") 453 } 454 func text(t *testing.T, prefix, suffix string) { 455 const N = 4 456 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 457 const L = len(S) 458 b := make([]byte, len(prefix)+N+len(suffix)) 459 copy(b, prefix) 460 copy(b[len(prefix)+N:], suffix) 461 h := newHashSet() 462 c := b[len(prefix):] 463 for i := 0; i < L; i++ { 464 c[0] = S[i] 465 for j := 0; j < L; j++ { 466 c[1] = S[j] 467 for k := 0; k < L; k++ { 468 c[2] = S[k] 469 for x := 0; x < L; x++ { 470 c[3] = S[x] 471 h.addB(b) 472 } 473 } 474 } 475 } 476 h.check(t) 477 } 478 479 // Make sure different seed values generate different hashes. 480 func TestSmhasherSeed(t *testing.T) { 481 h := newHashSet() 482 const N = 100000 483 s := "hello" 484 for i := 0; i < N; i++ { 485 h.addS_seed(s, uintptr(i)) 486 } 487 h.check(t) 488 } 489 490 // size of the hash output (32 or 64 bits) 491 const hashSize = 32 + int(^uintptr(0)>>63<<5) 492 493 func randBytes(r *rand.Rand, b []byte) { 494 for i := range b { 495 b[i] = byte(r.Uint32()) 496 } 497 } 498 499 func benchmarkHash(b *testing.B, n int) { 500 s := strings.Repeat("A", n) 501 502 for i := 0; i < b.N; i++ { 503 StringHash(s, 0) 504 } 505 b.SetBytes(int64(n)) 506 } 507 508 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 509 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 510 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 511 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 512 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }