github.com/c12o16h1/go/src@v0.0.0-20200114212001-5a151c0f00ed/hash/maphash/maphash.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package maphash provides hash functions on byte sequences.
     6  // These hash functions are intended to be used to implement hash tables or
     7  // other data structures that need to map arbitrary strings or byte
     8  // sequences to a uniform distribution of integers.
     9  //
    10  // The hash functions are collision-resistant but not cryptographically secure.
    11  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
    12  package maphash
    13  
    14  import "unsafe"
    15  
    16  // A Seed is a random value that selects the specific hash function
    17  // computed by a Hash. If two Hashes use the same Seeds, they
    18  // will compute the same hash values for any given input.
    19  // If two Hashes use different Seeds, they are very likely to compute
    20  // distinct hash values for any given input.
    21  //
    22  // A Seed must be initialized by calling MakeSeed.
    23  // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
    24  //
    25  // Each Seed value is local to a single process and cannot be serialized
    26  // or otherwise recreated in a different process.
    27  type Seed struct {
    28  	s uint64
    29  }
    30  
    31  // A Hash computes a seeded hash of a byte sequence.
    32  //
    33  // The zero Hash is a valid Hash ready to use.
    34  // A zero Hash chooses a random seed for itself during
    35  // the first call to a Reset, Write, Seed, Sum64, or Seed method.
    36  // For control over the seed, use SetSeed.
    37  //
    38  // The computed hash values depend only on the initial seed and
    39  // the sequence of bytes provided to the Hash object, not on the way
    40  // in which the bytes are provided. For example, the three sequences
    41  //
    42  //     h.Write([]byte{'f','o','o'})
    43  //     h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
    44  //     h.WriteString("foo")
    45  //
    46  // all have the same effect.
    47  //
    48  // Hashes are intended to be collision-resistant, even for situations
    49  // where an adversary controls the byte sequences being hashed.
    50  //
    51  // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
    52  // If multiple goroutines must compute the same seeded hash,
    53  // each can declare its own Hash and call SetSeed with a common Seed.
    54  type Hash struct {
    55  	_     [0]func() // not comparable
    56  	seed  Seed      // initial seed used for this hash
    57  	state Seed      // current hash of all flushed bytes
    58  	buf   [64]byte  // unflushed byte buffer
    59  	n     int       // number of unflushed bytes
    60  }
    61  
    62  // initSeed seeds the hash if necessary.
    63  // initSeed is called lazily before any operation that actually uses h.seed/h.state.
    64  // Note that this does not include Write/WriteByte/WriteString in the case
    65  // where they only add to h.buf. (If they write too much, they call h.flush,
    66  // which does call h.initSeed.)
    67  func (h *Hash) initSeed() {
    68  	if h.seed.s == 0 {
    69  		h.SetSeed(MakeSeed())
    70  	}
    71  }
    72  
    73  // WriteByte adds b to the sequence of bytes hashed by h.
    74  // It never fails; the error result is for implementing io.ByteWriter.
    75  func (h *Hash) WriteByte(b byte) error {
    76  	if h.n == len(h.buf) {
    77  		h.flush()
    78  	}
    79  	h.buf[h.n] = b
    80  	h.n++
    81  	return nil
    82  }
    83  
    84  // Write adds b to the sequence of bytes hashed by h.
    85  // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
    86  func (h *Hash) Write(b []byte) (int, error) {
    87  	size := len(b)
    88  	for h.n+len(b) > len(h.buf) {
    89  		k := copy(h.buf[h.n:], b)
    90  		h.n = len(h.buf)
    91  		b = b[k:]
    92  		h.flush()
    93  	}
    94  	h.n += copy(h.buf[h.n:], b)
    95  	return size, nil
    96  }
    97  
    98  // WriteString adds the bytes of s to the sequence of bytes hashed by h.
    99  // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
   100  func (h *Hash) WriteString(s string) (int, error) {
   101  	size := len(s)
   102  	for h.n+len(s) > len(h.buf) {
   103  		k := copy(h.buf[h.n:], s)
   104  		h.n = len(h.buf)
   105  		s = s[k:]
   106  		h.flush()
   107  	}
   108  	h.n += copy(h.buf[h.n:], s)
   109  	return size, nil
   110  }
   111  
   112  // Seed returns h's seed value.
   113  func (h *Hash) Seed() Seed {
   114  	h.initSeed()
   115  	return h.seed
   116  }
   117  
   118  // SetSeed sets h to use seed, which must have been returned by MakeSeed
   119  // or by another Hash's Seed method.
   120  // Two Hash objects with the same seed behave identically.
   121  // Two Hash objects with different seeds will very likely behave differently.
   122  // Any bytes added to h before this call will be discarded.
   123  func (h *Hash) SetSeed(seed Seed) {
   124  	if seed.s == 0 {
   125  		panic("maphash: use of uninitialized Seed")
   126  	}
   127  	h.seed = seed
   128  	h.state = seed
   129  	h.n = 0
   130  }
   131  
   132  // Reset discards all bytes added to h.
   133  // (The seed remains the same.)
   134  func (h *Hash) Reset() {
   135  	h.initSeed()
   136  	h.state = h.seed
   137  	h.n = 0
   138  }
   139  
   140  // precondition: buffer is full.
   141  func (h *Hash) flush() {
   142  	if h.n != len(h.buf) {
   143  		panic("maphash: flush of partially full buffer")
   144  	}
   145  	h.initSeed()
   146  	h.state.s = rthash(h.buf[:], h.state.s)
   147  	h.n = 0
   148  }
   149  
   150  // Sum64 returns h's current 64-bit value, which depends on
   151  // h's seed and the sequence of bytes added to h since the
   152  // last call to Reset or SetSeed.
   153  //
   154  // All bits of the Sum64 result are close to uniformly and
   155  // independently distributed, so it can be safely reduced
   156  // by using bit masking, shifting, or modular arithmetic.
   157  func (h *Hash) Sum64() uint64 {
   158  	h.initSeed()
   159  	return rthash(h.buf[:h.n], h.state.s)
   160  }
   161  
   162  // MakeSeed returns a new random seed.
   163  func MakeSeed() Seed {
   164  	var s1, s2 uint64
   165  	for {
   166  		s1 = uint64(runtime_fastrand())
   167  		s2 = uint64(runtime_fastrand())
   168  		// We use seed 0 to indicate an uninitialized seed/hash,
   169  		// so keep trying until we get a non-zero seed.
   170  		if s1|s2 != 0 {
   171  			break
   172  		}
   173  	}
   174  	return Seed{s: s1<<32 + s2}
   175  }
   176  
   177  //go:linkname runtime_fastrand runtime.fastrand
   178  func runtime_fastrand() uint32
   179  
   180  func rthash(b []byte, seed uint64) uint64 {
   181  	if len(b) == 0 {
   182  		return seed
   183  	}
   184  	// The runtime hasher only works on uintptr. For 64-bit
   185  	// architectures, we use the hasher directly. Otherwise,
   186  	// we use two parallel hashers on the lower and upper 32 bits.
   187  	if unsafe.Sizeof(uintptr(0)) == 8 {
   188  		return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))))
   189  	}
   190  	lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))
   191  	hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b)))
   192  	return uint64(hi)<<32 | uint64(lo)
   193  }
   194  
   195  //go:linkname runtime_memhash runtime.memhash
   196  //go:noescape
   197  func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
   198  
   199  // Sum appends the hash's current 64-bit value to b.
   200  // It exists for implementing hash.Hash.
   201  // For direct calls, it is more efficient to use Sum64.
   202  func (h *Hash) Sum(b []byte) []byte {
   203  	x := h.Sum64()
   204  	return append(b,
   205  		byte(x>>0),
   206  		byte(x>>8),
   207  		byte(x>>16),
   208  		byte(x>>24),
   209  		byte(x>>32),
   210  		byte(x>>40),
   211  		byte(x>>48),
   212  		byte(x>>56))
   213  }
   214  
   215  // Size returns h's hash value size, 8 bytes.
   216  func (h *Hash) Size() int { return 8 }
   217  
   218  // BlockSize returns h's block size.
   219  func (h *Hash) BlockSize() int { return len(h.buf) }