github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/runtime/hash_test.go

github.com/hbdrawn/golang@v0.0.0-20141214014649-6b835209aba2/src/runtime/hash_test.go (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"math/rand"
    11  	. "runtime"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  // Smhasher is a torture test for hash functions.
    17  // https://code.google.com/p/smhasher/
    18  // This code is a port of some of the Smhasher tests to Go.
    19  //
    20  // The current AES hash function passes Smhasher.  Our fallback
    21  // hash functions don't, so we only enable the difficult tests when
    22  // we know the AES implementation is available.
    23  
    24  // Sanity checks.
    25  // hash should not depend on values outside key.
    26  // hash should not depend on alignment.
    27  func TestSmhasherSanity(t *testing.T) {
    28  	r := rand.New(rand.NewSource(1234))
    29  	const REP = 10
    30  	const KEYMAX = 128
    31  	const PAD = 16
    32  	const OFFMAX = 16
    33  	for k := 0; k < REP; k++ {
    34  		for n := 0; n < KEYMAX; n++ {
    35  			for i := 0; i < OFFMAX; i++ {
    36  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    37  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    38  				randBytes(r, b[:])
    39  				randBytes(r, c[:])
    40  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    41  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    42  					t.Errorf("hash depends on bytes outside key")
    43  				}
    44  			}
    45  		}
    46  	}
    47  }
    48  
    49  type HashSet struct {
    50  	m map[uintptr]struct{} // set of hashes added
    51  	n int                  // number of hashes added
    52  }
    53  
    54  func newHashSet() *HashSet {
    55  	return &HashSet{make(map[uintptr]struct{}), 0}
    56  }
    57  func (s *HashSet) add(h uintptr) {
    58  	s.m[h] = struct{}{}
    59  	s.n++
    60  }
    61  func (s *HashSet) addS(x string) {
    62  	s.add(StringHash(x, 0))
    63  }
    64  func (s *HashSet) addB(x []byte) {
    65  	s.add(BytesHash(x, 0))
    66  }
    67  func (s *HashSet) addS_seed(x string, seed uintptr) {
    68  	s.add(StringHash(x, seed))
    69  }
    70  func (s *HashSet) check(t *testing.T) {
    71  	const SLOP = 10.0
    72  	collisions := s.n - len(s.m)
    73  	//fmt.Printf("%d/%d\n", len(s.m), s.n)
    74  	pairs := int64(s.n) * int64(s.n-1) / 2
    75  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
    76  	stddev := math.Sqrt(expected)
    77  	if float64(collisions) > expected+SLOP*3*stddev {
    78  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
    79  	}
    80  }
    81  
    82  // a string plus adding zeros must make distinct hashes
    83  func TestSmhasherAppendedZeros(t *testing.T) {
    84  	s := "hello" + strings.Repeat("\x00", 256)
    85  	h := newHashSet()
    86  	for i := 0; i <= len(s); i++ {
    87  		h.addS(s[:i])
    88  	}
    89  	h.check(t)
    90  }
    91  
    92  // All 0-3 byte strings have distinct hashes.
    93  func TestSmhasherSmallKeys(t *testing.T) {
    94  	h := newHashSet()
    95  	var b [3]byte
    96  	for i := 0; i < 256; i++ {
    97  		b[0] = byte(i)
    98  		h.addB(b[:1])
    99  		for j := 0; j < 256; j++ {
   100  			b[1] = byte(j)
   101  			h.addB(b[:2])
   102  			if !testing.Short() {
   103  				for k := 0; k < 256; k++ {
   104  					b[2] = byte(k)
   105  					h.addB(b[:3])
   106  				}
   107  			}
   108  		}
   109  	}
   110  	h.check(t)
   111  }
   112  
   113  // Different length strings of all zeros have distinct hashes.
   114  func TestSmhasherZeros(t *testing.T) {
   115  	N := 256 * 1024
   116  	if testing.Short() {
   117  		N = 1024
   118  	}
   119  	h := newHashSet()
   120  	b := make([]byte, N)
   121  	for i := 0; i <= N; i++ {
   122  		h.addB(b[:i])
   123  	}
   124  	h.check(t)
   125  }
   126  
   127  // Strings with up to two nonzero bytes all have distinct hashes.
   128  func TestSmhasherTwoNonzero(t *testing.T) {
   129  	if testing.Short() {
   130  		t.Skip("Skipping in short mode")
   131  	}
   132  	h := newHashSet()
   133  	for n := 2; n <= 16; n++ {
   134  		twoNonZero(h, n)
   135  	}
   136  	h.check(t)
   137  }
   138  func twoNonZero(h *HashSet, n int) {
   139  	b := make([]byte, n)
   140  
   141  	// all zero
   142  	h.addB(b[:])
   143  
   144  	// one non-zero byte
   145  	for i := 0; i < n; i++ {
   146  		for x := 1; x < 256; x++ {
   147  			b[i] = byte(x)
   148  			h.addB(b[:])
   149  			b[i] = 0
   150  		}
   151  	}
   152  
   153  	// two non-zero bytes
   154  	for i := 0; i < n; i++ {
   155  		for x := 1; x < 256; x++ {
   156  			b[i] = byte(x)
   157  			for j := i + 1; j < n; j++ {
   158  				for y := 1; y < 256; y++ {
   159  					b[j] = byte(y)
   160  					h.addB(b[:])
   161  					b[j] = 0
   162  				}
   163  			}
   164  			b[i] = 0
   165  		}
   166  	}
   167  }
   168  
   169  // Test strings with repeats, like "abcdabcdabcdabcd..."
   170  func TestSmhasherCyclic(t *testing.T) {
   171  	if testing.Short() {
   172  		t.Skip("Skipping in short mode")
   173  	}
   174  	if !HaveGoodHash() {
   175  		t.Skip("fallback hash not good enough for this test")
   176  	}
   177  	r := rand.New(rand.NewSource(1234))
   178  	const REPEAT = 8
   179  	const N = 1000000
   180  	for n := 4; n <= 12; n++ {
   181  		h := newHashSet()
   182  		b := make([]byte, REPEAT*n)
   183  		for i := 0; i < N; i++ {
   184  			b[0] = byte(i * 79 % 97)
   185  			b[1] = byte(i * 43 % 137)
   186  			b[2] = byte(i * 151 % 197)
   187  			b[3] = byte(i * 199 % 251)
   188  			randBytes(r, b[4:n])
   189  			for j := n; j < n*REPEAT; j++ {
   190  				b[j] = b[j-n]
   191  			}
   192  			h.addB(b)
   193  		}
   194  		h.check(t)
   195  	}
   196  }
   197  
   198  // Test strings with only a few bits set
   199  func TestSmhasherSparse(t *testing.T) {
   200  	if testing.Short() {
   201  		t.Skip("Skipping in short mode")
   202  	}
   203  	sparse(t, 32, 6)
   204  	sparse(t, 40, 6)
   205  	sparse(t, 48, 5)
   206  	sparse(t, 56, 5)
   207  	sparse(t, 64, 5)
   208  	sparse(t, 96, 4)
   209  	sparse(t, 256, 3)
   210  	sparse(t, 2048, 2)
   211  }
   212  func sparse(t *testing.T, n int, k int) {
   213  	b := make([]byte, n/8)
   214  	h := newHashSet()
   215  	setbits(h, b, 0, k)
   216  	h.check(t)
   217  }
   218  
   219  // set up to k bits at index i and greater
   220  func setbits(h *HashSet, b []byte, i int, k int) {
   221  	h.addB(b)
   222  	if k == 0 {
   223  		return
   224  	}
   225  	for j := i; j < len(b)*8; j++ {
   226  		b[j/8] |= byte(1 << uint(j&7))
   227  		setbits(h, b, j+1, k-1)
   228  		b[j/8] &= byte(^(1 << uint(j&7)))
   229  	}
   230  }
   231  
   232  // Test all possible combinations of n blocks from the set s.
   233  // "permutation" is a bad name here, but it is what Smhasher uses.
   234  func TestSmhasherPermutation(t *testing.T) {
   235  	if testing.Short() {
   236  		t.Skip("Skipping in short mode")
   237  	}
   238  	if !HaveGoodHash() {
   239  		t.Skip("fallback hash not good enough for this test")
   240  	}
   241  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   242  	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   243  	permutation(t, []uint32{0, 1}, 20)
   244  	permutation(t, []uint32{0, 1 << 31}, 20)
   245  	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   246  }
   247  func permutation(t *testing.T, s []uint32, n int) {
   248  	b := make([]byte, n*4)
   249  	h := newHashSet()
   250  	genPerm(h, b, s, 0)
   251  	h.check(t)
   252  }
   253  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   254  	h.addB(b[:n])
   255  	if n == len(b) {
   256  		return
   257  	}
   258  	for _, v := range s {
   259  		b[n] = byte(v)
   260  		b[n+1] = byte(v >> 8)
   261  		b[n+2] = byte(v >> 16)
   262  		b[n+3] = byte(v >> 24)
   263  		genPerm(h, b, s, n+4)
   264  	}
   265  }
   266  
   267  type Key interface {
   268  	clear()              // set bits all to 0
   269  	random(r *rand.Rand) // set key to something random
   270  	bits() int           // how many bits key has
   271  	flipBit(i int)       // flip bit i of the key
   272  	hash() uintptr       // hash the key
   273  	name() string        // for error reporting
   274  }
   275  
   276  type BytesKey struct {
   277  	b []byte
   278  }
   279  
   280  func (k *BytesKey) clear() {
   281  	for i := range k.b {
   282  		k.b[i] = 0
   283  	}
   284  }
   285  func (k *BytesKey) random(r *rand.Rand) {
   286  	randBytes(r, k.b)
   287  }
   288  func (k *BytesKey) bits() int {
   289  	return len(k.b) * 8
   290  }
   291  func (k *BytesKey) flipBit(i int) {
   292  	k.b[i>>3] ^= byte(1 << uint(i&7))
   293  }
   294  func (k *BytesKey) hash() uintptr {
   295  	return BytesHash(k.b, 0)
   296  }
   297  func (k *BytesKey) name() string {
   298  	return fmt.Sprintf("bytes%d", len(k.b))
   299  }
   300  
   301  type Int32Key struct {
   302  	i uint32
   303  }
   304  
   305  func (k *Int32Key) clear() {
   306  	k.i = 0
   307  }
   308  func (k *Int32Key) random(r *rand.Rand) {
   309  	k.i = r.Uint32()
   310  }
   311  func (k *Int32Key) bits() int {
   312  	return 32
   313  }
   314  func (k *Int32Key) flipBit(i int) {
   315  	k.i ^= 1 << uint(i)
   316  }
   317  func (k *Int32Key) hash() uintptr {
   318  	return Int32Hash(k.i, 0)
   319  }
   320  func (k *Int32Key) name() string {
   321  	return "int32"
   322  }
   323  
   324  type Int64Key struct {
   325  	i uint64
   326  }
   327  
   328  func (k *Int64Key) clear() {
   329  	k.i = 0
   330  }
   331  func (k *Int64Key) random(r *rand.Rand) {
   332  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   333  }
   334  func (k *Int64Key) bits() int {
   335  	return 64
   336  }
   337  func (k *Int64Key) flipBit(i int) {
   338  	k.i ^= 1 << uint(i)
   339  }
   340  func (k *Int64Key) hash() uintptr {
   341  	return Int64Hash(k.i, 0)
   342  }
   343  func (k *Int64Key) name() string {
   344  	return "int64"
   345  }
   346  
   347  type EfaceKey struct {
   348  	i interface{}
   349  }
   350  
   351  func (k *EfaceKey) clear() {
   352  	k.i = nil
   353  }
   354  func (k *EfaceKey) random(r *rand.Rand) {
   355  	k.i = uint64(r.Int63())
   356  }
   357  func (k *EfaceKey) bits() int {
   358  	// use 64 bits.  This tests inlined interfaces
   359  	// on 64-bit targets and indirect interfaces on
   360  	// 32-bit targets.
   361  	return 64
   362  }
   363  func (k *EfaceKey) flipBit(i int) {
   364  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
   365  }
   366  func (k *EfaceKey) hash() uintptr {
   367  	return EfaceHash(k.i, 0)
   368  }
   369  func (k *EfaceKey) name() string {
   370  	return "Eface"
   371  }
   372  
   373  type IfaceKey struct {
   374  	i interface {
   375  		F()
   376  	}
   377  }
   378  type fInter uint64
   379  
   380  func (x fInter) F() {
   381  }
   382  
   383  func (k *IfaceKey) clear() {
   384  	k.i = nil
   385  }
   386  func (k *IfaceKey) random(r *rand.Rand) {
   387  	k.i = fInter(r.Int63())
   388  }
   389  func (k *IfaceKey) bits() int {
   390  	// use 64 bits.  This tests inlined interfaces
   391  	// on 64-bit targets and indirect interfaces on
   392  	// 32-bit targets.
   393  	return 64
   394  }
   395  func (k *IfaceKey) flipBit(i int) {
   396  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
   397  }
   398  func (k *IfaceKey) hash() uintptr {
   399  	return IfaceHash(k.i, 0)
   400  }
   401  func (k *IfaceKey) name() string {
   402  	return "Iface"
   403  }
   404  
   405  // Flipping a single bit of a key should flip each output bit with 50% probability.
   406  func TestSmhasherAvalanche(t *testing.T) {
   407  	if !HaveGoodHash() {
   408  		t.Skip("fallback hash not good enough for this test")
   409  	}
   410  	if testing.Short() {
   411  		t.Skip("Skipping in short mode")
   412  	}
   413  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   414  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   415  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   416  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   417  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   418  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   419  	avalancheTest1(t, &Int32Key{})
   420  	avalancheTest1(t, &Int64Key{})
   421  	avalancheTest1(t, &EfaceKey{})
   422  	avalancheTest1(t, &IfaceKey{})
   423  }
   424  func avalancheTest1(t *testing.T, k Key) {
   425  	const REP = 100000
   426  	r := rand.New(rand.NewSource(1234))
   427  	n := k.bits()
   428  
   429  	// grid[i][j] is a count of whether flipping
   430  	// input bit i affects output bit j.
   431  	grid := make([][hashSize]int, n)
   432  
   433  	for z := 0; z < REP; z++ {
   434  		// pick a random key, hash it
   435  		k.random(r)
   436  		h := k.hash()
   437  
   438  		// flip each bit, hash & compare the results
   439  		for i := 0; i < n; i++ {
   440  			k.flipBit(i)
   441  			d := h ^ k.hash()
   442  			k.flipBit(i)
   443  
   444  			// record the effects of that bit flip
   445  			g := &grid[i]
   446  			for j := 0; j < hashSize; j++ {
   447  				g[j] += int(d & 1)
   448  				d >>= 1
   449  			}
   450  		}
   451  	}
   452  
   453  	// Each entry in the grid should be about REP/2.
   454  	// More precisely, we did N = k.bits() * hashSize experiments where
   455  	// each is the sum of REP coin flips.  We want to find bounds on the
   456  	// sum of coin flips such that a truly random experiment would have
   457  	// all sums inside those bounds with 99% probability.
   458  	N := n * hashSize
   459  	var c float64
   460  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   461  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   462  	}
   463  	c *= 4.0 // allowed slack - we don't need to be perfectly random
   464  	mean := .5 * REP
   465  	stddev := .5 * math.Sqrt(REP)
   466  	low := int(mean - c*stddev)
   467  	high := int(mean + c*stddev)
   468  	for i := 0; i < n; i++ {
   469  		for j := 0; j < hashSize; j++ {
   470  			x := grid[i][j]
   471  			if x < low || x > high {
   472  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   473  			}
   474  		}
   475  	}
   476  }
   477  
   478  // All bit rotations of a set of distinct keys
   479  func TestSmhasherWindowed(t *testing.T) {
   480  	windowed(t, &Int32Key{})
   481  	windowed(t, &Int64Key{})
   482  	windowed(t, &BytesKey{make([]byte, 128)})
   483  }
   484  func windowed(t *testing.T, k Key) {
   485  	if testing.Short() {
   486  		t.Skip("Skipping in short mode")
   487  	}
   488  	const BITS = 16
   489  
   490  	for r := 0; r < k.bits(); r++ {
   491  		h := newHashSet()
   492  		for i := 0; i < 1<<BITS; i++ {
   493  			k.clear()
   494  			for j := 0; j < BITS; j++ {
   495  				if i>>uint(j)&1 != 0 {
   496  					k.flipBit((j + r) % k.bits())
   497  				}
   498  			}
   499  			h.add(k.hash())
   500  		}
   501  		h.check(t)
   502  	}
   503  }
   504  
   505  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   506  func TestSmhasherText(t *testing.T) {
   507  	if testing.Short() {
   508  		t.Skip("Skipping in short mode")
   509  	}
   510  	text(t, "Foo", "Bar")
   511  	text(t, "FooBar", "")
   512  	text(t, "", "FooBar")
   513  }
   514  func text(t *testing.T, prefix, suffix string) {
   515  	const N = 4
   516  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   517  	const L = len(S)
   518  	b := make([]byte, len(prefix)+N+len(suffix))
   519  	copy(b, prefix)
   520  	copy(b[len(prefix)+N:], suffix)
   521  	h := newHashSet()
   522  	c := b[len(prefix):]
   523  	for i := 0; i < L; i++ {
   524  		c[0] = S[i]
   525  		for j := 0; j < L; j++ {
   526  			c[1] = S[j]
   527  			for k := 0; k < L; k++ {
   528  				c[2] = S[k]
   529  				for x := 0; x < L; x++ {
   530  					c[3] = S[x]
   531  					h.addB(b)
   532  				}
   533  			}
   534  		}
   535  	}
   536  	h.check(t)
   537  }
   538  
   539  // Make sure different seed values generate different hashes.
   540  func TestSmhasherSeed(t *testing.T) {
   541  	h := newHashSet()
   542  	const N = 100000
   543  	s := "hello"
   544  	for i := 0; i < N; i++ {
   545  		h.addS_seed(s, uintptr(i))
   546  	}
   547  	h.check(t)
   548  }
   549  
   550  // size of the hash output (32 or 64 bits)
   551  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   552  
   553  func randBytes(r *rand.Rand, b []byte) {
   554  	for i := range b {
   555  		b[i] = byte(r.Uint32())
   556  	}
   557  }
   558  
   559  func benchmarkHash(b *testing.B, n int) {
   560  	s := strings.Repeat("A", n)
   561  
   562  	for i := 0; i < b.N; i++ {
   563  		StringHash(s, 0)
   564  	}
   565  	b.SetBytes(int64(n))
   566  }
   567  
   568  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   569  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   570  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   571  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   572  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }