github.com/pidato/unsafe@v0.1.4/memory/hash/smash_test.go (about) 1 package hash 2 3 import ( 4 "fmt" 5 "math" 6 "math/rand" 7 "runtime" 8 "strings" 9 "testing" 10 "unsafe" 11 ) 12 13 // Smhasher is a torture test for hash functions. 14 // https://code.google.com/p/smhasher/ 15 // This code is a port of some of the Smhasher tests to Go. 16 17 var fixedSeed = makeSeed() 18 19 // Sanity checks. 20 // hash should not depend on values outside key. 21 // hash should not depend on alignment. 22 func TestSmhasherSanity(t *testing.T) { 23 r := rand.New(rand.NewSource(1234)) 24 const REP = 10 25 const KEYMAX = 128 26 const PAD = 16 27 const OFFMAX = 16 28 for k := 0; k < REP; k++ { 29 for n := 0; n < KEYMAX; n++ { 30 for i := 0; i < OFFMAX; i++ { 31 var b [KEYMAX + OFFMAX + 2*PAD]byte 32 var c [KEYMAX + OFFMAX + 2*PAD]byte 33 randBytes(r, b[:]) 34 randBytes(r, c[:]) 35 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 36 if bytesHash(b[PAD:PAD+n]) != bytesHash(c[PAD+i:PAD+i+n]) { 37 t.Errorf("hash depends on bytes outside key") 38 } 39 } 40 } 41 } 42 } 43 44 func bytesHash(b []byte) uint64 { 45 return Hash(*(*unsafe.Pointer)(unsafe.Pointer(&b)), uint64(len(b)), DefaultSeed) 46 } 47 func stringHash(s string) uint64 { 48 return String(s) 49 } 50 51 const hashSize = 64 52 53 func randBytes(r *rand.Rand, b []byte) { 54 r.Read(b) // can't fail 55 } 56 57 // A hashSet measures the frequency of hash collisions. 58 type hashSet struct { 59 m map[uint64]struct{} // set of hashes added 60 n int // number of hashes added 61 } 62 63 func newHashSet() *hashSet { 64 return &hashSet{make(map[uint64]struct{}), 0} 65 } 66 func (s *hashSet) add(h uint64) { 67 s.m[h] = struct{}{} 68 s.n++ 69 } 70 func (s *hashSet) addS(x string) { 71 s.add(stringHash(x)) 72 } 73 74 func (s *hashSet) addB(x []byte) { 75 s.add(bytesHash(x)) 76 } 77 78 func (s *hashSet) addS_seed(x string, seed seed) { 79 s.add(Hash(*(*unsafe.Pointer)(unsafe.Pointer(&x)), uint64(len(x)), seed.s)) 80 } 81 82 func (s *hashSet) check(t *testing.T) { 83 const SLOP = 10.0 84 collisions := s.n - len(s.m) 85 pairs := int64(s.n) * int64(s.n-1) / 2 86 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 87 stddev := math.Sqrt(expected) 88 if float64(collisions) > expected+SLOP*(3*stddev+1) { 89 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 90 } 91 } 92 93 // a string plus adding zeros must make distinct hashes 94 func TestSmhasherAppendedZeros(t *testing.T) { 95 s := "hello" + strings.Repeat("\x00", 256) 96 h := newHashSet() 97 for i := 0; i <= len(s); i++ { 98 h.addS(s[:i]) 99 } 100 h.check(t) 101 } 102 103 // All 0-3 byte strings have distinct hashes. 104 func TestSmhasherSmallKeys(t *testing.T) { 105 h := newHashSet() 106 var b [3]byte 107 for i := 0; i < 256; i++ { 108 b[0] = byte(i) 109 h.addB(b[:1]) 110 for j := 0; j < 256; j++ { 111 b[1] = byte(j) 112 h.addB(b[:2]) 113 if !testing.Short() { 114 for k := 0; k < 256; k++ { 115 b[2] = byte(k) 116 h.addB(b[:3]) 117 } 118 } 119 } 120 } 121 h.check(t) 122 } 123 124 // Different length strings of all zeros have distinct hashes. 125 func TestSmhasherZeros(t *testing.T) { 126 N := 256 * 1024 127 if testing.Short() { 128 N = 1024 129 } 130 h := newHashSet() 131 b := make([]byte, N) 132 for i := 0; i <= N; i++ { 133 h.addB(b[:i]) 134 } 135 h.check(t) 136 } 137 138 // Strings with up to two nonzero bytes all have distinct hashes. 139 func TestSmhasherTwoNonzero(t *testing.T) { 140 if runtime.GOARCH == "wasm" { 141 t.Skip("Too slow on wasm") 142 } 143 if testing.Short() { 144 t.Skip("Skipping in short mode") 145 } 146 h := newHashSet() 147 for n := 2; n <= 16; n++ { 148 twoNonZero(h, n) 149 } 150 h.check(t) 151 } 152 func twoNonZero(h *hashSet, n int) { 153 b := make([]byte, n) 154 155 // all zero 156 h.addB(b) 157 158 // one non-zero byte 159 for i := 0; i < n; i++ { 160 for x := 1; x < 256; x++ { 161 b[i] = byte(x) 162 h.addB(b) 163 b[i] = 0 164 } 165 } 166 167 // two non-zero bytes 168 for i := 0; i < n; i++ { 169 for x := 1; x < 256; x++ { 170 b[i] = byte(x) 171 for j := i + 1; j < n; j++ { 172 for y := 1; y < 256; y++ { 173 b[j] = byte(y) 174 h.addB(b) 175 b[j] = 0 176 } 177 } 178 b[i] = 0 179 } 180 } 181 } 182 183 // Test strings with repeats, like "abcdabcdabcdabcd..." 184 func TestSmhasherCyclic(t *testing.T) { 185 if testing.Short() { 186 t.Skip("Skipping in short mode") 187 } 188 r := rand.New(rand.NewSource(1234)) 189 const REPEAT = 8 190 const N = 1000000 191 for n := 4; n <= 12; n++ { 192 h := newHashSet() 193 b := make([]byte, REPEAT*n) 194 for i := 0; i < N; i++ { 195 b[0] = byte(i * 79 % 97) 196 b[1] = byte(i * 43 % 137) 197 b[2] = byte(i * 151 % 197) 198 b[3] = byte(i * 199 % 251) 199 randBytes(r, b[4:n]) 200 for j := n; j < n*REPEAT; j++ { 201 b[j] = b[j-n] 202 } 203 h.addB(b) 204 } 205 h.check(t) 206 } 207 } 208 209 // Test strings with only a few bits set 210 func TestSmhasherSparse(t *testing.T) { 211 if runtime.GOARCH == "wasm" { 212 t.Skip("Too slow on wasm") 213 } 214 if testing.Short() { 215 t.Skip("Skipping in short mode") 216 } 217 sparse(t, 32, 6) 218 sparse(t, 40, 6) 219 sparse(t, 48, 5) 220 sparse(t, 56, 5) 221 sparse(t, 64, 5) 222 sparse(t, 96, 4) 223 sparse(t, 256, 3) 224 sparse(t, 2048, 2) 225 } 226 func sparse(t *testing.T, n int, k int) { 227 b := make([]byte, n/8) 228 h := newHashSet() 229 setbits(h, b, 0, k) 230 h.check(t) 231 } 232 233 // set up to k bits at index i and greater 234 func setbits(h *hashSet, b []byte, i int, k int) { 235 h.addB(b) 236 if k == 0 { 237 return 238 } 239 for j := i; j < len(b)*8; j++ { 240 b[j/8] |= byte(1 << uint(j&7)) 241 setbits(h, b, j+1, k-1) 242 b[j/8] &= byte(^(1 << uint(j&7))) 243 } 244 } 245 246 // Test all possible combinations of n blocks from the set s. 247 // "permutation" is a bad name here, but it is what Smhasher uses. 248 func TestSmhasherPermutation(t *testing.T) { 249 if runtime.GOARCH == "wasm" { 250 t.Skip("Too slow on wasm") 251 } 252 if testing.Short() { 253 t.Skip("Skipping in short mode") 254 } 255 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 256 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 257 permutation(t, []uint32{0, 1}, 20) 258 permutation(t, []uint32{0, 1 << 31}, 20) 259 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 260 } 261 func permutation(t *testing.T, s []uint32, n int) { 262 b := make([]byte, n*4) 263 h := newHashSet() 264 genPerm(h, b, s, 0) 265 h.check(t) 266 } 267 func genPerm(h *hashSet, b []byte, s []uint32, n int) { 268 h.addB(b[:n]) 269 if n == len(b) { 270 return 271 } 272 for _, v := range s { 273 b[n] = byte(v) 274 b[n+1] = byte(v >> 8) 275 b[n+2] = byte(v >> 16) 276 b[n+3] = byte(v >> 24) 277 genPerm(h, b, s, n+4) 278 } 279 } 280 281 type key interface { 282 clear() // set bits all to 0 283 random(r *rand.Rand) // set key to something random 284 bits() int // how many bits key has 285 flipBit(i int) // flip bit i of the key 286 hash() uint64 // hash the key 287 name() string // for error reporting 288 } 289 290 type bytesKey struct { 291 b []byte 292 } 293 294 func (k *bytesKey) clear() { 295 for i := range k.b { 296 k.b[i] = 0 297 } 298 } 299 func (k *bytesKey) random(r *rand.Rand) { 300 randBytes(r, k.b) 301 } 302 func (k *bytesKey) bits() int { 303 return len(k.b) * 8 304 } 305 func (k *bytesKey) flipBit(i int) { 306 k.b[i>>3] ^= byte(1 << uint(i&7)) 307 } 308 func (k *bytesKey) hash() uint64 { 309 return bytesHash(k.b) 310 } 311 func (k *bytesKey) name() string { 312 return fmt.Sprintf("bytes%d", len(k.b)) 313 } 314 315 // Flipping a single bit of a key should flip each output bit with 50% probability. 316 func TestSmhasherAvalanche(t *testing.T) { 317 if runtime.GOARCH == "wasm" { 318 t.Skip("Too slow on wasm") 319 } 320 if testing.Short() { 321 t.Skip("Skipping in short mode") 322 } 323 avalancheTest1(t, &bytesKey{make([]byte, 2)}) 324 avalancheTest1(t, &bytesKey{make([]byte, 4)}) 325 avalancheTest1(t, &bytesKey{make([]byte, 8)}) 326 avalancheTest1(t, &bytesKey{make([]byte, 16)}) 327 avalancheTest1(t, &bytesKey{make([]byte, 32)}) 328 avalancheTest1(t, &bytesKey{make([]byte, 200)}) 329 } 330 func avalancheTest1(t *testing.T, k key) { 331 const REP = 100000 332 r := rand.New(rand.NewSource(1234)) 333 n := k.bits() 334 335 // grid[i][j] is a count of whether flipping 336 // input bit i affects output bit j. 337 grid := make([][hashSize]int, n) 338 339 for z := 0; z < REP; z++ { 340 // pick a random key, hash it 341 k.random(r) 342 h := k.hash() 343 344 // flip each bit, hash & compare the results 345 for i := 0; i < n; i++ { 346 k.flipBit(i) 347 d := h ^ k.hash() 348 k.flipBit(i) 349 350 // record the effects of that bit flip 351 g := &grid[i] 352 for j := 0; j < hashSize; j++ { 353 g[j] += int(d & 1) 354 d >>= 1 355 } 356 } 357 } 358 359 // Each entry in the grid should be about REP/2. 360 // More precisely, we did N = k.bits() * hashSize experiments where 361 // each is the sum of REP coin flips. We want to find bounds on the 362 // sum of coin flips such that a truly random experiment would have 363 // all sums inside those bounds with 99% probability. 364 N := n * hashSize 365 var c float64 366 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 367 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 368 } 369 c *= 4.0 // allowed slack - we don't need to be perfectly random 370 mean := .5 * REP 371 stddev := .5 * math.Sqrt(REP) 372 low := int(mean - c*stddev) 373 high := int(mean + c*stddev) 374 for i := 0; i < n; i++ { 375 for j := 0; j < hashSize; j++ { 376 x := grid[i][j] 377 if x < low || x > high { 378 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 379 } 380 } 381 } 382 } 383 384 // All bit rotations of a set of distinct keys 385 func TestSmhasherWindowed(t *testing.T) { 386 windowed(t, &bytesKey{make([]byte, 128)}) 387 } 388 func windowed(t *testing.T, k key) { 389 if runtime.GOARCH == "wasm" { 390 t.Skip("Too slow on wasm") 391 } 392 if testing.Short() { 393 t.Skip("Skipping in short mode") 394 } 395 const BITS = 16 396 397 for r := 0; r < k.bits(); r++ { 398 h := newHashSet() 399 for i := 0; i < 1<<BITS; i++ { 400 k.clear() 401 for j := 0; j < BITS; j++ { 402 if i>>uint(j)&1 != 0 { 403 k.flipBit((j + r) % k.bits()) 404 } 405 } 406 h.add(k.hash()) 407 } 408 h.check(t) 409 } 410 } 411 412 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 413 func TestSmhasherText(t *testing.T) { 414 if testing.Short() { 415 t.Skip("Skipping in short mode") 416 } 417 text(t, "Foo", "Bar") 418 text(t, "FooBar", "") 419 text(t, "", "FooBar") 420 } 421 func text(t *testing.T, prefix, suffix string) { 422 const N = 4 423 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 424 const L = len(S) 425 b := make([]byte, len(prefix)+N+len(suffix)) 426 copy(b, prefix) 427 copy(b[len(prefix)+N:], suffix) 428 h := newHashSet() 429 c := b[len(prefix):] 430 for i := 0; i < L; i++ { 431 c[0] = S[i] 432 for j := 0; j < L; j++ { 433 c[1] = S[j] 434 for k := 0; k < L; k++ { 435 c[2] = S[k] 436 for x := 0; x < L; x++ { 437 c[3] = S[x] 438 h.addB(b) 439 } 440 } 441 } 442 } 443 h.check(t) 444 } 445 446 // Make sure different seed values generate different hashes. 447 func TestSmhasherSeed(t *testing.T) { 448 if unsafe.Sizeof(uintptr(0)) == 4 { 449 t.Skip("32-bit platforms don't have ideal seed-input distributions (see issue 33988)") 450 } 451 h := newHashSet() 452 const N = 100000 453 s := "hello" 454 for i := 0; i < N; i++ { 455 h.addS_seed(s, seed{s: uint64(i + 1)}) 456 h.addS_seed(s, seed{s: uint64(i+1) << 32}) // make sure high bits are used 457 } 458 h.check(t) 459 } 460 461 type seed struct { 462 s uint64 463 } 464 465 //go:linkname runtime_fastrand runtime.fastrand 466 func runtime_fastrand() uint32 467 468 // makeSeed returns a new random seed. 469 func makeSeed() seed { 470 var s1, s2 uint64 471 for { 472 s1 = uint64(runtime_fastrand()) 473 s2 = uint64(runtime_fastrand()) 474 // We use seed 0 to indicate an uninitialized seed/hash, 475 // so keep trying until we get a non-zero seed. 476 if s1|s2 != 0 { 477 break 478 } 479 } 480 return seed{s: s1<<32 + s2} 481 }