github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/hash/maphash/smhasher_test.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !race 6 7 package maphash 8 9 import ( 10 "fmt" 11 "math" 12 "math/rand" 13 "runtime" 14 "strings" 15 "testing" 16 "unsafe" 17 ) 18 19 // Smhasher is a torture test for hash functions. 20 // https://code.google.com/p/smhasher/ 21 // This code is a port of some of the Smhasher tests to Go. 22 23 // Note: due to the long running time of these tests, they are 24 // currently disabled in -race mode. 25 26 var fixedSeed = MakeSeed() 27 28 // Sanity checks. 29 // hash should not depend on values outside key. 30 // hash should not depend on alignment. 31 func TestSmhasherSanity(t *testing.T) { 32 r := rand.New(rand.NewSource(1234)) 33 const REP = 10 34 const KEYMAX = 128 35 const PAD = 16 36 const OFFMAX = 16 37 for k := 0; k < REP; k++ { 38 for n := 0; n < KEYMAX; n++ { 39 for i := 0; i < OFFMAX; i++ { 40 var b [KEYMAX + OFFMAX + 2*PAD]byte 41 var c [KEYMAX + OFFMAX + 2*PAD]byte 42 randBytes(r, b[:]) 43 randBytes(r, c[:]) 44 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 45 if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) { 46 t.Errorf("hash depends on bytes outside key") 47 } 48 } 49 } 50 } 51 } 52 53 func bytesHash(b []byte) uint64 { 54 var h Hash 55 h.SetSeed(fixedSeed) 56 h.Write(b) 57 return h.Sum64() 58 } 59 func stringHash(s string) uint64 { 60 var h Hash 61 h.SetSeed(fixedSeed) 62 h.WriteString(s) 63 return h.Sum64() 64 } 65 66 const hashSize = 64 67 68 func randBytes(r *rand.Rand, b []byte) { 69 r.Read(b) // can't fail 70 } 71 72 // A hashSet measures the frequency of hash collisions. 73 type hashSet struct { 74 m map[uint64]struct{} // set of hashes added 75 n int // number of hashes added 76 } 77 78 func newHashSet() *hashSet { 79 return &hashSet{make(map[uint64]struct{}), 0} 80 } 81 func (s *hashSet) add(h uint64) { 82 s.m[h] = struct{}{} 83 s.n++ 84 } 85 func (s *hashSet) addS(x string) { 86 s.add(stringHash(x)) 87 } 88 func (s *hashSet) addB(x []byte) { 89 s.add(bytesHash(x)) 90 } 91 func (s *hashSet) addS_seed(x string, seed Seed) { 92 var h Hash 93 h.SetSeed(seed) 94 h.WriteString(x) 95 s.add(h.Sum64()) 96 } 97 func (s *hashSet) check(t *testing.T) { 98 const SLOP = 10.0 99 collisions := s.n - len(s.m) 100 pairs := int64(s.n) * int64(s.n-1) / 2 101 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 102 stddev := math.Sqrt(expected) 103 if float64(collisions) > expected+SLOP*(3*stddev+1) { 104 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 105 } 106 } 107 108 // a string plus adding zeros must make distinct hashes 109 func TestSmhasherAppendedZeros(t *testing.T) { 110 s := "hello" + strings.Repeat("\x00", 256) 111 h := newHashSet() 112 for i := 0; i <= len(s); i++ { 113 h.addS(s[:i]) 114 } 115 h.check(t) 116 } 117 118 // All 0-3 byte strings have distinct hashes. 119 func TestSmhasherSmallKeys(t *testing.T) { 120 h := newHashSet() 121 var b [3]byte 122 for i := 0; i < 256; i++ { 123 b[0] = byte(i) 124 h.addB(b[:1]) 125 for j := 0; j < 256; j++ { 126 b[1] = byte(j) 127 h.addB(b[:2]) 128 if !testing.Short() { 129 for k := 0; k < 256; k++ { 130 b[2] = byte(k) 131 h.addB(b[:3]) 132 } 133 } 134 } 135 } 136 h.check(t) 137 } 138 139 // Different length strings of all zeros have distinct hashes. 140 func TestSmhasherZeros(t *testing.T) { 141 N := 256 * 1024 142 if testing.Short() { 143 N = 1024 144 } 145 h := newHashSet() 146 b := make([]byte, N) 147 for i := 0; i <= N; i++ { 148 h.addB(b[:i]) 149 } 150 h.check(t) 151 } 152 153 // Strings with up to two nonzero bytes all have distinct hashes. 154 func TestSmhasherTwoNonzero(t *testing.T) { 155 if runtime.GOARCH == "wasm" { 156 t.Skip("Too slow on wasm") 157 } 158 if testing.Short() { 159 t.Skip("Skipping in short mode") 160 } 161 h := newHashSet() 162 for n := 2; n <= 16; n++ { 163 twoNonZero(h, n) 164 } 165 h.check(t) 166 } 167 func twoNonZero(h *hashSet, n int) { 168 b := make([]byte, n) 169 170 // all zero 171 h.addB(b) 172 173 // one non-zero byte 174 for i := 0; i < n; i++ { 175 for x := 1; x < 256; x++ { 176 b[i] = byte(x) 177 h.addB(b) 178 b[i] = 0 179 } 180 } 181 182 // two non-zero bytes 183 for i := 0; i < n; i++ { 184 for x := 1; x < 256; x++ { 185 b[i] = byte(x) 186 for j := i + 1; j < n; j++ { 187 for y := 1; y < 256; y++ { 188 b[j] = byte(y) 189 h.addB(b) 190 b[j] = 0 191 } 192 } 193 b[i] = 0 194 } 195 } 196 } 197 198 // Test strings with repeats, like "abcdabcdabcdabcd..." 199 func TestSmhasherCyclic(t *testing.T) { 200 if testing.Short() { 201 t.Skip("Skipping in short mode") 202 } 203 r := rand.New(rand.NewSource(1234)) 204 const REPEAT = 8 205 const N = 1000000 206 for n := 4; n <= 12; n++ { 207 h := newHashSet() 208 b := make([]byte, REPEAT*n) 209 for i := 0; i < N; i++ { 210 b[0] = byte(i * 79 % 97) 211 b[1] = byte(i * 43 % 137) 212 b[2] = byte(i * 151 % 197) 213 b[3] = byte(i * 199 % 251) 214 randBytes(r, b[4:n]) 215 for j := n; j < n*REPEAT; j++ { 216 b[j] = b[j-n] 217 } 218 h.addB(b) 219 } 220 h.check(t) 221 } 222 } 223 224 // Test strings with only a few bits set 225 func TestSmhasherSparse(t *testing.T) { 226 if runtime.GOARCH == "wasm" { 227 t.Skip("Too slow on wasm") 228 } 229 if testing.Short() { 230 t.Skip("Skipping in short mode") 231 } 232 sparse(t, 32, 6) 233 sparse(t, 40, 6) 234 sparse(t, 48, 5) 235 sparse(t, 56, 5) 236 sparse(t, 64, 5) 237 sparse(t, 96, 4) 238 sparse(t, 256, 3) 239 sparse(t, 2048, 2) 240 } 241 func sparse(t *testing.T, n int, k int) { 242 b := make([]byte, n/8) 243 h := newHashSet() 244 setbits(h, b, 0, k) 245 h.check(t) 246 } 247 248 // set up to k bits at index i and greater 249 func setbits(h *hashSet, b []byte, i int, k int) { 250 h.addB(b) 251 if k == 0 { 252 return 253 } 254 for j := i; j < len(b)*8; j++ { 255 b[j/8] |= byte(1 << uint(j&7)) 256 setbits(h, b, j+1, k-1) 257 b[j/8] &= byte(^(1 << uint(j&7))) 258 } 259 } 260 261 // Test all possible combinations of n blocks from the set s. 262 // "permutation" is a bad name here, but it is what Smhasher uses. 263 func TestSmhasherPermutation(t *testing.T) { 264 if runtime.GOARCH == "wasm" { 265 t.Skip("Too slow on wasm") 266 } 267 if testing.Short() { 268 t.Skip("Skipping in short mode") 269 } 270 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 271 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 272 permutation(t, []uint32{0, 1}, 20) 273 permutation(t, []uint32{0, 1 << 31}, 20) 274 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 275 } 276 func permutation(t *testing.T, s []uint32, n int) { 277 b := make([]byte, n*4) 278 h := newHashSet() 279 genPerm(h, b, s, 0) 280 h.check(t) 281 } 282 func genPerm(h *hashSet, b []byte, s []uint32, n int) { 283 h.addB(b[:n]) 284 if n == len(b) { 285 return 286 } 287 for _, v := range s { 288 b[n] = byte(v) 289 b[n+1] = byte(v >> 8) 290 b[n+2] = byte(v >> 16) 291 b[n+3] = byte(v >> 24) 292 genPerm(h, b, s, n+4) 293 } 294 } 295 296 type key interface { 297 clear() // set bits all to 0 298 random(r *rand.Rand) // set key to something random 299 bits() int // how many bits key has 300 flipBit(i int) // flip bit i of the key 301 hash() uint64 // hash the key 302 name() string // for error reporting 303 } 304 305 type bytesKey struct { 306 b []byte 307 } 308 309 func (k *bytesKey) clear() { 310 for i := range k.b { 311 k.b[i] = 0 312 } 313 } 314 func (k *bytesKey) random(r *rand.Rand) { 315 randBytes(r, k.b) 316 } 317 func (k *bytesKey) bits() int { 318 return len(k.b) * 8 319 } 320 func (k *bytesKey) flipBit(i int) { 321 k.b[i>>3] ^= byte(1 << uint(i&7)) 322 } 323 func (k *bytesKey) hash() uint64 { 324 return bytesHash(k.b) 325 } 326 func (k *bytesKey) name() string { 327 return fmt.Sprintf("bytes%d", len(k.b)) 328 } 329 330 // Flipping a single bit of a key should flip each output bit with 50% probability. 331 func TestSmhasherAvalanche(t *testing.T) { 332 if runtime.GOARCH == "wasm" { 333 t.Skip("Too slow on wasm") 334 } 335 if testing.Short() { 336 t.Skip("Skipping in short mode") 337 } 338 avalancheTest1(t, &bytesKey{make([]byte, 2)}) 339 avalancheTest1(t, &bytesKey{make([]byte, 4)}) 340 avalancheTest1(t, &bytesKey{make([]byte, 8)}) 341 avalancheTest1(t, &bytesKey{make([]byte, 16)}) 342 avalancheTest1(t, &bytesKey{make([]byte, 32)}) 343 avalancheTest1(t, &bytesKey{make([]byte, 200)}) 344 } 345 func avalancheTest1(t *testing.T, k key) { 346 const REP = 100000 347 r := rand.New(rand.NewSource(1234)) 348 n := k.bits() 349 350 // grid[i][j] is a count of whether flipping 351 // input bit i affects output bit j. 352 grid := make([][hashSize]int, n) 353 354 for z := 0; z < REP; z++ { 355 // pick a random key, hash it 356 k.random(r) 357 h := k.hash() 358 359 // flip each bit, hash & compare the results 360 for i := 0; i < n; i++ { 361 k.flipBit(i) 362 d := h ^ k.hash() 363 k.flipBit(i) 364 365 // record the effects of that bit flip 366 g := &grid[i] 367 for j := 0; j < hashSize; j++ { 368 g[j] += int(d & 1) 369 d >>= 1 370 } 371 } 372 } 373 374 // Each entry in the grid should be about REP/2. 375 // More precisely, we did N = k.bits() * hashSize experiments where 376 // each is the sum of REP coin flips. We want to find bounds on the 377 // sum of coin flips such that a truly random experiment would have 378 // all sums inside those bounds with 99% probability. 379 N := n * hashSize 380 var c float64 381 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 382 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 383 } 384 c *= 11.0 // allowed slack: 40% to 60% - we don't need to be perfectly random 385 mean := .5 * REP 386 stddev := .5 * math.Sqrt(REP) 387 low := int(mean - c*stddev) 388 high := int(mean + c*stddev) 389 for i := 0; i < n; i++ { 390 for j := 0; j < hashSize; j++ { 391 x := grid[i][j] 392 if x < low || x > high { 393 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 394 } 395 } 396 } 397 } 398 399 // All bit rotations of a set of distinct keys 400 func TestSmhasherWindowed(t *testing.T) { 401 windowed(t, &bytesKey{make([]byte, 128)}) 402 } 403 func windowed(t *testing.T, k key) { 404 if runtime.GOARCH == "wasm" { 405 t.Skip("Too slow on wasm") 406 } 407 if testing.Short() { 408 t.Skip("Skipping in short mode") 409 } 410 const BITS = 16 411 412 for r := 0; r < k.bits(); r++ { 413 h := newHashSet() 414 for i := 0; i < 1<<BITS; i++ { 415 k.clear() 416 for j := 0; j < BITS; j++ { 417 if i>>uint(j)&1 != 0 { 418 k.flipBit((j + r) % k.bits()) 419 } 420 } 421 h.add(k.hash()) 422 } 423 h.check(t) 424 } 425 } 426 427 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 428 func TestSmhasherText(t *testing.T) { 429 if testing.Short() { 430 t.Skip("Skipping in short mode") 431 } 432 text(t, "Foo", "Bar") 433 text(t, "FooBar", "") 434 text(t, "", "FooBar") 435 } 436 func text(t *testing.T, prefix, suffix string) { 437 const N = 4 438 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 439 const L = len(S) 440 b := make([]byte, len(prefix)+N+len(suffix)) 441 copy(b, prefix) 442 copy(b[len(prefix)+N:], suffix) 443 h := newHashSet() 444 c := b[len(prefix):] 445 for i := 0; i < L; i++ { 446 c[0] = S[i] 447 for j := 0; j < L; j++ { 448 c[1] = S[j] 449 for k := 0; k < L; k++ { 450 c[2] = S[k] 451 for x := 0; x < L; x++ { 452 c[3] = S[x] 453 h.addB(b) 454 } 455 } 456 } 457 } 458 h.check(t) 459 } 460 461 // Make sure different seed values generate different hashes. 462 func TestSmhasherSeed(t *testing.T) { 463 if unsafe.Sizeof(uintptr(0)) == 4 { 464 t.Skip("32-bit platforms don't have ideal seed-input distributions (see issue 33988)") 465 } 466 h := newHashSet() 467 const N = 100000 468 s := "hello" 469 for i := 0; i < N; i++ { 470 h.addS_seed(s, Seed{s: uint64(i + 1)}) 471 h.addS_seed(s, Seed{s: uint64(i+1) << 32}) // make sure high bits are used 472 } 473 h.check(t) 474 }