github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/runtime/hash_test.go

github.com/zach-klippenstein/go@v0.0.0-20150108044943-fcfbeb3adf58/src/runtime/hash_test.go (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"math/rand"
    11  	. "runtime"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  // Smhasher is a torture test for hash functions.
    17  // https://code.google.com/p/smhasher/
    18  // This code is a port of some of the Smhasher tests to Go.
    19  //
    20  // The current AES hash function passes Smhasher.  Our fallback
    21  // hash functions don't, so we only enable the difficult tests when
    22  // we know the AES implementation is available.
    23  
    24  // Sanity checks.
    25  // hash should not depend on values outside key.
    26  // hash should not depend on alignment.
    27  func TestSmhasherSanity(t *testing.T) {
    28  	r := rand.New(rand.NewSource(1234))
    29  	const REP = 10
    30  	const KEYMAX = 128
    31  	const PAD = 16
    32  	const OFFMAX = 16
    33  	for k := 0; k < REP; k++ {
    34  		for n := 0; n < KEYMAX; n++ {
    35  			for i := 0; i < OFFMAX; i++ {
    36  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    37  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    38  				randBytes(r, b[:])
    39  				randBytes(r, c[:])
    40  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    41  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    42  					t.Errorf("hash depends on bytes outside key")
    43  				}
    44  			}
    45  		}
    46  	}
    47  }
    48  
    49  type HashSet struct {
    50  	m map[uintptr]struct{} // set of hashes added
    51  	n int                  // number of hashes added
    52  }
    53  
    54  func newHashSet() *HashSet {
    55  	return &HashSet{make(map[uintptr]struct{}), 0}
    56  }
    57  func (s *HashSet) add(h uintptr) {
    58  	s.m[h] = struct{}{}
    59  	s.n++
    60  }
    61  func (s *HashSet) addS(x string) {
    62  	s.add(StringHash(x, 0))
    63  }
    64  func (s *HashSet) addB(x []byte) {
    65  	s.add(BytesHash(x, 0))
    66  }
    67  func (s *HashSet) addS_seed(x string, seed uintptr) {
    68  	s.add(StringHash(x, seed))
    69  }
    70  func (s *HashSet) check(t *testing.T) {
    71  	const SLOP = 10.0
    72  	collisions := s.n - len(s.m)
    73  	//fmt.Printf("%d/%d\n", len(s.m), s.n)
    74  	pairs := int64(s.n) * int64(s.n-1) / 2
    75  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
    76  	stddev := math.Sqrt(expected)
    77  	if float64(collisions) > expected+SLOP*3*stddev {
    78  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
    79  	}
    80  }
    81  
    82  // a string plus adding zeros must make distinct hashes
    83  func TestSmhasherAppendedZeros(t *testing.T) {
    84  	s := "hello" + strings.Repeat("\x00", 256)
    85  	h := newHashSet()
    86  	for i := 0; i <= len(s); i++ {
    87  		h.addS(s[:i])
    88  	}
    89  	h.check(t)
    90  }
    91  
    92  // All 0-3 byte strings have distinct hashes.
    93  func TestSmhasherSmallKeys(t *testing.T) {
    94  	h := newHashSet()
    95  	var b [3]byte
    96  	for i := 0; i < 256; i++ {
    97  		b[0] = byte(i)
    98  		h.addB(b[:1])
    99  		for j := 0; j < 256; j++ {
   100  			b[1] = byte(j)
   101  			h.addB(b[:2])
   102  			if !testing.Short() {
   103  				for k := 0; k < 256; k++ {
   104  					b[2] = byte(k)
   105  					h.addB(b[:3])
   106  				}
   107  			}
   108  		}
   109  	}
   110  	h.check(t)
   111  }
   112  
   113  // Different length strings of all zeros have distinct hashes.
   114  func TestSmhasherZeros(t *testing.T) {
   115  	N := 256 * 1024
   116  	if testing.Short() {
   117  		N = 1024
   118  	}
   119  	h := newHashSet()
   120  	b := make([]byte, N)
   121  	for i := 0; i <= N; i++ {
   122  		h.addB(b[:i])
   123  	}
   124  	h.check(t)
   125  }
   126  
   127  // Strings with up to two nonzero bytes all have distinct hashes.
   128  func TestSmhasherTwoNonzero(t *testing.T) {
   129  	if testing.Short() {
   130  		t.Skip("Skipping in short mode")
   131  	}
   132  	h := newHashSet()
   133  	for n := 2; n <= 16; n++ {
   134  		twoNonZero(h, n)
   135  	}
   136  	h.check(t)
   137  }
   138  func twoNonZero(h *HashSet, n int) {
   139  	b := make([]byte, n)
   140  
   141  	// all zero
   142  	h.addB(b[:])
   143  
   144  	// one non-zero byte
   145  	for i := 0; i < n; i++ {
   146  		for x := 1; x < 256; x++ {
   147  			b[i] = byte(x)
   148  			h.addB(b[:])
   149  			b[i] = 0
   150  		}
   151  	}
   152  
   153  	// two non-zero bytes
   154  	for i := 0; i < n; i++ {
   155  		for x := 1; x < 256; x++ {
   156  			b[i] = byte(x)
   157  			for j := i + 1; j < n; j++ {
   158  				for y := 1; y < 256; y++ {
   159  					b[j] = byte(y)
   160  					h.addB(b[:])
   161  					b[j] = 0
   162  				}
   163  			}
   164  			b[i] = 0
   165  		}
   166  	}
   167  }
   168  
   169  // Test strings with repeats, like "abcdabcdabcdabcd..."
   170  func TestSmhasherCyclic(t *testing.T) {
   171  	if testing.Short() {
   172  		t.Skip("Skipping in short mode")
   173  	}
   174  	r := rand.New(rand.NewSource(1234))
   175  	const REPEAT = 8
   176  	const N = 1000000
   177  	for n := 4; n <= 12; n++ {
   178  		h := newHashSet()
   179  		b := make([]byte, REPEAT*n)
   180  		for i := 0; i < N; i++ {
   181  			b[0] = byte(i * 79 % 97)
   182  			b[1] = byte(i * 43 % 137)
   183  			b[2] = byte(i * 151 % 197)
   184  			b[3] = byte(i * 199 % 251)
   185  			randBytes(r, b[4:n])
   186  			for j := n; j < n*REPEAT; j++ {
   187  				b[j] = b[j-n]
   188  			}
   189  			h.addB(b)
   190  		}
   191  		h.check(t)
   192  	}
   193  }
   194  
   195  // Test strings with only a few bits set
   196  func TestSmhasherSparse(t *testing.T) {
   197  	if testing.Short() {
   198  		t.Skip("Skipping in short mode")
   199  	}
   200  	sparse(t, 32, 6)
   201  	sparse(t, 40, 6)
   202  	sparse(t, 48, 5)
   203  	sparse(t, 56, 5)
   204  	sparse(t, 64, 5)
   205  	sparse(t, 96, 4)
   206  	sparse(t, 256, 3)
   207  	sparse(t, 2048, 2)
   208  }
   209  func sparse(t *testing.T, n int, k int) {
   210  	b := make([]byte, n/8)
   211  	h := newHashSet()
   212  	setbits(h, b, 0, k)
   213  	h.check(t)
   214  }
   215  
   216  // set up to k bits at index i and greater
   217  func setbits(h *HashSet, b []byte, i int, k int) {
   218  	h.addB(b)
   219  	if k == 0 {
   220  		return
   221  	}
   222  	for j := i; j < len(b)*8; j++ {
   223  		b[j/8] |= byte(1 << uint(j&7))
   224  		setbits(h, b, j+1, k-1)
   225  		b[j/8] &= byte(^(1 << uint(j&7)))
   226  	}
   227  }
   228  
   229  // Test all possible combinations of n blocks from the set s.
   230  // "permutation" is a bad name here, but it is what Smhasher uses.
   231  func TestSmhasherPermutation(t *testing.T) {
   232  	if testing.Short() {
   233  		t.Skip("Skipping in short mode")
   234  	}
   235  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   236  	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   237  	permutation(t, []uint32{0, 1}, 20)
   238  	permutation(t, []uint32{0, 1 << 31}, 20)
   239  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   240  }
   241  func permutation(t *testing.T, s []uint32, n int) {
   242  	b := make([]byte, n*4)
   243  	h := newHashSet()
   244  	genPerm(h, b, s, 0)
   245  	h.check(t)
   246  }
   247  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   248  	h.addB(b[:n])
   249  	if n == len(b) {
   250  		return
   251  	}
   252  	for _, v := range s {
   253  		b[n] = byte(v)
   254  		b[n+1] = byte(v >> 8)
   255  		b[n+2] = byte(v >> 16)
   256  		b[n+3] = byte(v >> 24)
   257  		genPerm(h, b, s, n+4)
   258  	}
   259  }
   260  
   261  type Key interface {
   262  	clear()              // set bits all to 0
   263  	random(r *rand.Rand) // set key to something random
   264  	bits() int           // how many bits key has
   265  	flipBit(i int)       // flip bit i of the key
   266  	hash() uintptr       // hash the key
   267  	name() string        // for error reporting
   268  }
   269  
   270  type BytesKey struct {
   271  	b []byte
   272  }
   273  
   274  func (k *BytesKey) clear() {
   275  	for i := range k.b {
   276  		k.b[i] = 0
   277  	}
   278  }
   279  func (k *BytesKey) random(r *rand.Rand) {
   280  	randBytes(r, k.b)
   281  }
   282  func (k *BytesKey) bits() int {
   283  	return len(k.b) * 8
   284  }
   285  func (k *BytesKey) flipBit(i int) {
   286  	k.b[i>>3] ^= byte(1 << uint(i&7))
   287  }
   288  func (k *BytesKey) hash() uintptr {
   289  	return BytesHash(k.b, 0)
   290  }
   291  func (k *BytesKey) name() string {
   292  	return fmt.Sprintf("bytes%d", len(k.b))
   293  }
   294  
   295  type Int32Key struct {
   296  	i uint32
   297  }
   298  
   299  func (k *Int32Key) clear() {
   300  	k.i = 0
   301  }
   302  func (k *Int32Key) random(r *rand.Rand) {
   303  	k.i = r.Uint32()
   304  }
   305  func (k *Int32Key) bits() int {
   306  	return 32
   307  }
   308  func (k *Int32Key) flipBit(i int) {
   309  	k.i ^= 1 << uint(i)
   310  }
   311  func (k *Int32Key) hash() uintptr {
   312  	return Int32Hash(k.i, 0)
   313  }
   314  func (k *Int32Key) name() string {
   315  	return "int32"
   316  }
   317  
   318  type Int64Key struct {
   319  	i uint64
   320  }
   321  
   322  func (k *Int64Key) clear() {
   323  	k.i = 0
   324  }
   325  func (k *Int64Key) random(r *rand.Rand) {
   326  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   327  }
   328  func (k *Int64Key) bits() int {
   329  	return 64
   330  }
   331  func (k *Int64Key) flipBit(i int) {
   332  	k.i ^= 1 << uint(i)
   333  }
   334  func (k *Int64Key) hash() uintptr {
   335  	return Int64Hash(k.i, 0)
   336  }
   337  func (k *Int64Key) name() string {
   338  	return "int64"
   339  }
   340  
   341  type EfaceKey struct {
   342  	i interface{}
   343  }
   344  
   345  func (k *EfaceKey) clear() {
   346  	k.i = nil
   347  }
   348  func (k *EfaceKey) random(r *rand.Rand) {
   349  	k.i = uint64(r.Int63())
   350  }
   351  func (k *EfaceKey) bits() int {
   352  	// use 64 bits.  This tests inlined interfaces
   353  	// on 64-bit targets and indirect interfaces on
   354  	// 32-bit targets.
   355  	return 64
   356  }
   357  func (k *EfaceKey) flipBit(i int) {
   358  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
   359  }
   360  func (k *EfaceKey) hash() uintptr {
   361  	return EfaceHash(k.i, 0)
   362  }
   363  func (k *EfaceKey) name() string {
   364  	return "Eface"
   365  }
   366  
   367  type IfaceKey struct {
   368  	i interface {
   369  		F()
   370  	}
   371  }
   372  type fInter uint64
   373  
   374  func (x fInter) F() {
   375  }
   376  
   377  func (k *IfaceKey) clear() {
   378  	k.i = nil
   379  }
   380  func (k *IfaceKey) random(r *rand.Rand) {
   381  	k.i = fInter(r.Int63())
   382  }
   383  func (k *IfaceKey) bits() int {
   384  	// use 64 bits.  This tests inlined interfaces
   385  	// on 64-bit targets and indirect interfaces on
   386  	// 32-bit targets.
   387  	return 64
   388  }
   389  func (k *IfaceKey) flipBit(i int) {
   390  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
   391  }
   392  func (k *IfaceKey) hash() uintptr {
   393  	return IfaceHash(k.i, 0)
   394  }
   395  func (k *IfaceKey) name() string {
   396  	return "Iface"
   397  }
   398  
   399  // Flipping a single bit of a key should flip each output bit with 50% probability.
   400  func TestSmhasherAvalanche(t *testing.T) {
   401  	if testing.Short() {
   402  		t.Skip("Skipping in short mode")
   403  	}
   404  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   405  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   406  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   407  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   408  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   409  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   410  	avalancheTest1(t, &Int32Key{})
   411  	avalancheTest1(t, &Int64Key{})
   412  	avalancheTest1(t, &EfaceKey{})
   413  	avalancheTest1(t, &IfaceKey{})
   414  }
   415  func avalancheTest1(t *testing.T, k Key) {
   416  	const REP = 100000
   417  	r := rand.New(rand.NewSource(1234))
   418  	n := k.bits()
   419  
   420  	// grid[i][j] is a count of whether flipping
   421  	// input bit i affects output bit j.
   422  	grid := make([][hashSize]int, n)
   423  
   424  	for z := 0; z < REP; z++ {
   425  		// pick a random key, hash it
   426  		k.random(r)
   427  		h := k.hash()
   428  
   429  		// flip each bit, hash & compare the results
   430  		for i := 0; i < n; i++ {
   431  			k.flipBit(i)
   432  			d := h ^ k.hash()
   433  			k.flipBit(i)
   434  
   435  			// record the effects of that bit flip
   436  			g := &grid[i]
   437  			for j := 0; j < hashSize; j++ {
   438  				g[j] += int(d & 1)
   439  				d >>= 1
   440  			}
   441  		}
   442  	}
   443  
   444  	// Each entry in the grid should be about REP/2.
   445  	// More precisely, we did N = k.bits() * hashSize experiments where
   446  	// each is the sum of REP coin flips.  We want to find bounds on the
   447  	// sum of coin flips such that a truly random experiment would have
   448  	// all sums inside those bounds with 99% probability.
   449  	N := n * hashSize
   450  	var c float64
   451  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   452  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   453  	}
   454  	c *= 4.0 // allowed slack - we don't need to be perfectly random
   455  	mean := .5 * REP
   456  	stddev := .5 * math.Sqrt(REP)
   457  	low := int(mean - c*stddev)
   458  	high := int(mean + c*stddev)
   459  	for i := 0; i < n; i++ {
   460  		for j := 0; j < hashSize; j++ {
   461  			x := grid[i][j]
   462  			if x < low || x > high {
   463  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   464  			}
   465  		}
   466  	}
   467  }
   468  
   469  // All bit rotations of a set of distinct keys
   470  func TestSmhasherWindowed(t *testing.T) {
   471  	windowed(t, &Int32Key{})
   472  	windowed(t, &Int64Key{})
   473  	windowed(t, &BytesKey{make([]byte, 128)})
   474  }
   475  func windowed(t *testing.T, k Key) {
   476  	if testing.Short() {
   477  		t.Skip("Skipping in short mode")
   478  	}
   479  	const BITS = 16
   480  
   481  	for r := 0; r < k.bits(); r++ {
   482  		h := newHashSet()
   483  		for i := 0; i < 1<<BITS; i++ {
   484  			k.clear()
   485  			for j := 0; j < BITS; j++ {
   486  				if i>>uint(j)&1 != 0 {
   487  					k.flipBit((j + r) % k.bits())
   488  				}
   489  			}
   490  			h.add(k.hash())
   491  		}
   492  		h.check(t)
   493  	}
   494  }
   495  
   496  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   497  func TestSmhasherText(t *testing.T) {
   498  	if testing.Short() {
   499  		t.Skip("Skipping in short mode")
   500  	}
   501  	text(t, "Foo", "Bar")
   502  	text(t, "FooBar", "")
   503  	text(t, "", "FooBar")
   504  }
   505  func text(t *testing.T, prefix, suffix string) {
   506  	const N = 4
   507  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   508  	const L = len(S)
   509  	b := make([]byte, len(prefix)+N+len(suffix))
   510  	copy(b, prefix)
   511  	copy(b[len(prefix)+N:], suffix)
   512  	h := newHashSet()
   513  	c := b[len(prefix):]
   514  	for i := 0; i < L; i++ {
   515  		c[0] = S[i]
   516  		for j := 0; j < L; j++ {
   517  			c[1] = S[j]
   518  			for k := 0; k < L; k++ {
   519  				c[2] = S[k]
   520  				for x := 0; x < L; x++ {
   521  					c[3] = S[x]
   522  					h.addB(b)
   523  				}
   524  			}
   525  		}
   526  	}
   527  	h.check(t)
   528  }
   529  
   530  // Make sure different seed values generate different hashes.
   531  func TestSmhasherSeed(t *testing.T) {
   532  	h := newHashSet()
   533  	const N = 100000
   534  	s := "hello"
   535  	for i := 0; i < N; i++ {
   536  		h.addS_seed(s, uintptr(i))
   537  	}
   538  	h.check(t)
   539  }
   540  
   541  // size of the hash output (32 or 64 bits)
   542  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   543  
   544  func randBytes(r *rand.Rand, b []byte) {
   545  	for i := range b {
   546  		b[i] = byte(r.Uint32())
   547  	}
   548  }
   549  
   550  func benchmarkHash(b *testing.B, n int) {
   551  	s := strings.Repeat("A", n)
   552  
   553  	for i := 0; i < b.N; i++ {
   554  		StringHash(s, 0)
   555  	}
   556  	b.SetBytes(int64(n))
   557  }
   558  
   559  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   560  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   561  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   562  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   563  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }