github.com/ader1990/go@v0.0.0-20140630135419-8c24447fa791/src/pkg/runtime/hash_test.go (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"math/rand"
    11  	. "runtime"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  // Smhasher is a torture test for hash functions.
    17  // https://code.google.com/p/smhasher/
    18  // This code is a port of some of the Smhasher tests to Go.
    19  //
    20  // The current AES hash function passes Smhasher.  Our fallback
    21  // hash functions don't, so we only enable the difficult tests when
    22  // we know the AES implementation is available.
    23  
    24  // Sanity checks.
    25  // hash should not depend on values outside key.
    26  // hash should not depend on alignment.
    27  func TestSmhasherSanity(t *testing.T) {
    28  	r := rand.New(rand.NewSource(1234))
    29  	const REP = 10
    30  	const KEYMAX = 128
    31  	const PAD = 16
    32  	const OFFMAX = 16
    33  	for k := 0; k < REP; k++ {
    34  		for n := 0; n < KEYMAX; n++ {
    35  			for i := 0; i < OFFMAX; i++ {
    36  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    37  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    38  				randBytes(r, b[:])
    39  				randBytes(r, c[:])
    40  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    41  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    42  					t.Errorf("hash depends on bytes outside key")
    43  				}
    44  			}
    45  		}
    46  	}
    47  }
    48  
    49  type HashSet struct {
    50  	m map[uintptr]struct{} // set of hashes added
    51  	n int                  // number of hashes added
    52  }
    53  
    54  func newHashSet() *HashSet {
    55  	return &HashSet{make(map[uintptr]struct{}), 0}
    56  }
    57  func (s *HashSet) add(h uintptr) {
    58  	s.m[h] = struct{}{}
    59  	s.n++
    60  }
    61  func (s *HashSet) addS(x string) {
    62  	s.add(StringHash(x, 0))
    63  }
    64  func (s *HashSet) addB(x []byte) {
    65  	s.add(BytesHash(x, 0))
    66  }
    67  func (s *HashSet) addS_seed(x string, seed uintptr) {
    68  	s.add(StringHash(x, seed))
    69  }
    70  func (s *HashSet) check(t *testing.T) {
    71  	const SLOP = 10.0
    72  	collisions := s.n - len(s.m)
    73  	//fmt.Printf("%d/%d\n", len(s.m), s.n)
    74  	pairs := int64(s.n) * int64(s.n-1) / 2
    75  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
    76  	stddev := math.Sqrt(expected)
    77  	if float64(collisions) > expected+SLOP*3*stddev {
    78  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
    79  	}
    80  }
    81  
    82  // a string plus adding zeros must make distinct hashes
    83  func TestSmhasherAppendedZeros(t *testing.T) {
    84  	s := "hello" + strings.Repeat("\x00", 256)
    85  	h := newHashSet()
    86  	for i := 0; i <= len(s); i++ {
    87  		h.addS(s[:i])
    88  	}
    89  	h.check(t)
    90  }
    91  
    92  // All 0-3 byte strings have distinct hashes.
    93  func TestSmhasherSmallKeys(t *testing.T) {
    94  	h := newHashSet()
    95  	var b [3]byte
    96  	for i := 0; i < 256; i++ {
    97  		b[0] = byte(i)
    98  		h.addB(b[:1])
    99  		for j := 0; j < 256; j++ {
   100  			b[1] = byte(j)
   101  			h.addB(b[:2])
   102  			if !testing.Short() {
   103  				for k := 0; k < 256; k++ {
   104  					b[2] = byte(k)
   105  					h.addB(b[:3])
   106  				}
   107  			}
   108  		}
   109  	}
   110  	h.check(t)
   111  }
   112  
   113  // Different length strings of all zeros have distinct hashes.
   114  func TestSmhasherZeros(t *testing.T) {
   115  	N := 256 * 1024
   116  	if testing.Short() {
   117  		N = 1024
   118  	}
   119  	h := newHashSet()
   120  	b := make([]byte, N)
   121  	for i := 0; i <= N; i++ {
   122  		h.addB(b[:i])
   123  	}
   124  	h.check(t)
   125  }
   126  
   127  // Strings with up to two nonzero bytes all have distinct hashes.
   128  func TestSmhasherTwoNonzero(t *testing.T) {
   129  	if testing.Short() {
   130  		t.Skip("Skipping in short mode")
   131  	}
   132  	h := newHashSet()
   133  	for n := 2; n <= 16; n++ {
   134  		twoNonZero(h, n)
   135  	}
   136  	h.check(t)
   137  }
   138  func twoNonZero(h *HashSet, n int) {
   139  	b := make([]byte, n)
   140  
   141  	// all zero
   142  	h.addB(b[:])
   143  
   144  	// one non-zero byte
   145  	for i := 0; i < n; i++ {
   146  		for x := 1; x < 256; x++ {
   147  			b[i] = byte(x)
   148  			h.addB(b[:])
   149  			b[i] = 0
   150  		}
   151  	}
   152  
   153  	// two non-zero bytes
   154  	for i := 0; i < n; i++ {
   155  		for x := 1; x < 256; x++ {
   156  			b[i] = byte(x)
   157  			for j := i + 1; j < n; j++ {
   158  				for y := 1; y < 256; y++ {
   159  					b[j] = byte(y)
   160  					h.addB(b[:])
   161  					b[j] = 0
   162  				}
   163  			}
   164  			b[i] = 0
   165  		}
   166  	}
   167  }
   168  
   169  // Test strings with repeats, like "abcdabcdabcdabcd..."
   170  func TestSmhasherCyclic(t *testing.T) {
   171  	if testing.Short() {
   172  		t.Skip("Skipping in short mode")
   173  	}
   174  	if !HaveGoodHash() {
   175  		t.Skip("fallback hash not good enough for this test")
   176  	}
   177  	r := rand.New(rand.NewSource(1234))
   178  	const REPEAT = 8
   179  	const N = 1000000
   180  	for n := 4; n <= 12; n++ {
   181  		h := newHashSet()
   182  		b := make([]byte, REPEAT*n)
   183  		for i := 0; i < N; i++ {
   184  			b[0] = byte(i * 79 % 97)
   185  			b[1] = byte(i * 43 % 137)
   186  			b[2] = byte(i * 151 % 197)
   187  			b[3] = byte(i * 199 % 251)
   188  			randBytes(r, b[4:n])
   189  			for j := n; j < n*REPEAT; j++ {
   190  				b[j] = b[j-n]
   191  			}
   192  			h.addB(b)
   193  		}
   194  		h.check(t)
   195  	}
   196  }
   197  
   198  // Test strings with only a few bits set
   199  func TestSmhasherSparse(t *testing.T) {
   200  	if testing.Short() {
   201  		t.Skip("Skipping in short mode")
   202  	}
   203  	sparse(t, 32, 6)
   204  	sparse(t, 40, 6)
   205  	sparse(t, 48, 5)
   206  	sparse(t, 56, 5)
   207  	sparse(t, 64, 5)
   208  	sparse(t, 96, 4)
   209  	sparse(t, 256, 3)
   210  	sparse(t, 2048, 2)
   211  }
   212  func sparse(t *testing.T, n int, k int) {
   213  	b := make([]byte, n/8)
   214  	h := newHashSet()
   215  	setbits(h, b, 0, k)
   216  	h.check(t)
   217  }
   218  
   219  // set up to k bits at index i and greater
   220  func setbits(h *HashSet, b []byte, i int, k int) {
   221  	h.addB(b)
   222  	if k == 0 {
   223  		return
   224  	}
   225  	for j := i; j < len(b)*8; j++ {
   226  		b[j/8] |= byte(1 << uint(j&7))
   227  		setbits(h, b, j+1, k-1)
   228  		b[j/8] &= byte(^(1 << uint(j&7)))
   229  	}
   230  }
   231  
   232  // Test all possible combinations of n blocks from the set s.
   233  // "permutation" is a bad name here, but it is what Smhasher uses.
   234  func TestSmhasherPermutation(t *testing.T) {
   235  	if testing.Short() {
   236  		t.Skip("Skipping in short mode")
   237  	}
   238  	if !HaveGoodHash() {
   239  		t.Skip("fallback hash not good enough for this test")
   240  	}
   241  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   242  	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   243  	permutation(t, []uint32{0, 1}, 20)
   244  	permutation(t, []uint32{0, 1 << 31}, 20)
   245  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   246  }
   247  func permutation(t *testing.T, s []uint32, n int) {
   248  	b := make([]byte, n*4)
   249  	h := newHashSet()
   250  	genPerm(h, b, s, 0)
   251  	h.check(t)
   252  }
   253  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   254  	h.addB(b[:n])
   255  	if n == len(b) {
   256  		return
   257  	}
   258  	for _, v := range s {
   259  		b[n] = byte(v)
   260  		b[n+1] = byte(v >> 8)
   261  		b[n+2] = byte(v >> 16)
   262  		b[n+3] = byte(v >> 24)
   263  		genPerm(h, b, s, n+4)
   264  	}
   265  }
   266  
   267  type Key interface {
   268  	clear()              // set bits all to 0
   269  	random(r *rand.Rand) // set key to something random
   270  	bits() int           // how many bits key has
   271  	flipBit(i int)       // flip bit i of the key
   272  	hash() uintptr       // hash the key
   273  	name() string        // for error reporting
   274  }
   275  
   276  type BytesKey struct {
   277  	b []byte
   278  }
   279  
   280  func (k *BytesKey) clear() {
   281  	for i := range k.b {
   282  		k.b[i] = 0
   283  	}
   284  }
   285  func (k *BytesKey) random(r *rand.Rand) {
   286  	randBytes(r, k.b)
   287  }
   288  func (k *BytesKey) bits() int {
   289  	return len(k.b) * 8
   290  }
   291  func (k *BytesKey) flipBit(i int) {
   292  	k.b[i>>3] ^= byte(1 << uint(i&7))
   293  }
   294  func (k *BytesKey) hash() uintptr {
   295  	return BytesHash(k.b, 0)
   296  }
   297  func (k *BytesKey) name() string {
   298  	return fmt.Sprintf("bytes%d", len(k.b))
   299  }
   300  
   301  type Int32Key struct {
   302  	i uint32
   303  }
   304  
   305  func (k *Int32Key) clear() {
   306  	k.i = 0
   307  }
   308  func (k *Int32Key) random(r *rand.Rand) {
   309  	k.i = r.Uint32()
   310  }
   311  func (k *Int32Key) bits() int {
   312  	return 32
   313  }
   314  func (k *Int32Key) flipBit(i int) {
   315  	k.i ^= 1 << uint(i)
   316  }
   317  func (k *Int32Key) hash() uintptr {
   318  	return Int32Hash(k.i, 0)
   319  }
   320  func (k *Int32Key) name() string {
   321  	return "int32"
   322  }
   323  
   324  type Int64Key struct {
   325  	i uint64
   326  }
   327  
   328  func (k *Int64Key) clear() {
   329  	k.i = 0
   330  }
   331  func (k *Int64Key) random(r *rand.Rand) {
   332  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   333  }
   334  func (k *Int64Key) bits() int {
   335  	return 64
   336  }
   337  func (k *Int64Key) flipBit(i int) {
   338  	k.i ^= 1 << uint(i)
   339  }
   340  func (k *Int64Key) hash() uintptr {
   341  	return Int64Hash(k.i, 0)
   342  }
   343  func (k *Int64Key) name() string {
   344  	return "int64"
   345  }
   346  
   347  // Flipping a single bit of a key should flip each output bit with 50% probability.
   348  func TestSmhasherAvalanche(t *testing.T) {
   349  	if !HaveGoodHash() {
   350  		t.Skip("fallback hash not good enough for this test")
   351  	}
   352  	if testing.Short() {
   353  		t.Skip("Skipping in short mode")
   354  	}
   355  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   356  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   357  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   358  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   359  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   360  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   361  	avalancheTest1(t, &Int32Key{})
   362  	avalancheTest1(t, &Int64Key{})
   363  }
   364  func avalancheTest1(t *testing.T, k Key) {
   365  	const REP = 100000
   366  	r := rand.New(rand.NewSource(1234))
   367  	n := k.bits()
   368  
   369  	// grid[i][j] is a count of whether flipping
   370  	// input bit i affects output bit j.
   371  	grid := make([][hashSize]int, n)
   372  
   373  	for z := 0; z < REP; z++ {
   374  		// pick a random key, hash it
   375  		k.random(r)
   376  		h := k.hash()
   377  
   378  		// flip each bit, hash & compare the results
   379  		for i := 0; i < n; i++ {
   380  			k.flipBit(i)
   381  			d := h ^ k.hash()
   382  			k.flipBit(i)
   383  
   384  			// record the effects of that bit flip
   385  			g := &grid[i]
   386  			for j := 0; j < hashSize; j++ {
   387  				g[j] += int(d & 1)
   388  				d >>= 1
   389  			}
   390  		}
   391  	}
   392  
   393  	// Each entry in the grid should be about REP/2.
   394  	// More precisely, we did N = k.bits() * hashSize experiments where
   395  	// each is the sum of REP coin flips.  We want to find bounds on the
   396  	// sum of coin flips such that a truly random experiment would have
   397  	// all sums inside those bounds with 99% probability.
   398  	N := n * hashSize
   399  	var c float64
   400  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   401  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   402  	}
   403  	c *= 4.0 // allowed slack - we don't need to be perfectly random
   404  	mean := .5 * REP
   405  	stddev := .5 * math.Sqrt(REP)
   406  	low := int(mean - c*stddev)
   407  	high := int(mean + c*stddev)
   408  	for i := 0; i < n; i++ {
   409  		for j := 0; j < hashSize; j++ {
   410  			x := grid[i][j]
   411  			if x < low || x > high {
   412  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   413  			}
   414  		}
   415  	}
   416  }
   417  
   418  // All bit rotations of a set of distinct keys
   419  func TestSmhasherWindowed(t *testing.T) {
   420  	windowed(t, &Int32Key{})
   421  	windowed(t, &Int64Key{})
   422  	windowed(t, &BytesKey{make([]byte, 128)})
   423  }
   424  func windowed(t *testing.T, k Key) {
   425  	if testing.Short() {
   426  		t.Skip("Skipping in short mode")
   427  	}
   428  	const BITS = 16
   429  
   430  	for r := 0; r < k.bits(); r++ {
   431  		h := newHashSet()
   432  		for i := 0; i < 1<<BITS; i++ {
   433  			k.clear()
   434  			for j := 0; j < BITS; j++ {
   435  				if i>>uint(j)&1 != 0 {
   436  					k.flipBit((j + r) % k.bits())
   437  				}
   438  			}
   439  			h.add(k.hash())
   440  		}
   441  		h.check(t)
   442  	}
   443  }
   444  
   445  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   446  func TestSmhasherText(t *testing.T) {
   447  	if testing.Short() {
   448  		t.Skip("Skipping in short mode")
   449  	}
   450  	text(t, "Foo", "Bar")
   451  	text(t, "FooBar", "")
   452  	text(t, "", "FooBar")
   453  }
   454  func text(t *testing.T, prefix, suffix string) {
   455  	const N = 4
   456  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   457  	const L = len(S)
   458  	b := make([]byte, len(prefix)+N+len(suffix))
   459  	copy(b, prefix)
   460  	copy(b[len(prefix)+N:], suffix)
   461  	h := newHashSet()
   462  	c := b[len(prefix):]
   463  	for i := 0; i < L; i++ {
   464  		c[0] = S[i]
   465  		for j := 0; j < L; j++ {
   466  			c[1] = S[j]
   467  			for k := 0; k < L; k++ {
   468  				c[2] = S[k]
   469  				for x := 0; x < L; x++ {
   470  					c[3] = S[x]
   471  					h.addB(b)
   472  				}
   473  			}
   474  		}
   475  	}
   476  	h.check(t)
   477  }
   478  
   479  // Make sure different seed values generate different hashes.
   480  func TestSmhasherSeed(t *testing.T) {
   481  	h := newHashSet()
   482  	const N = 100000
   483  	s := "hello"
   484  	for i := 0; i < N; i++ {
   485  		h.addS_seed(s, uintptr(i))
   486  	}
   487  	h.check(t)
   488  }
   489  
   490  // size of the hash output (32 or 64 bits)
   491  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   492  
   493  func randBytes(r *rand.Rand, b []byte) {
   494  	for i := range b {
   495  		b[i] = byte(r.Uint32())
   496  	}
   497  }
   498  
   499  func benchmarkHash(b *testing.B, n int) {
   500  	s := strings.Repeat("A", n)
   501  
   502  	for i := 0; i < b.N; i++ {
   503  		StringHash(s, 0)
   504  	}
   505  	b.SetBytes(int64(n))
   506  }
   507  
   508  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   509  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   510  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   511  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   512  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }