github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/hash/maphash/maphash.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package maphash provides hash functions on byte sequences.
     6  // These hash functions are intended to be used to implement hash tables or
     7  // other data structures that need to map arbitrary strings or byte
     8  // sequences to a uniform distribution on unsigned 64-bit integers.
     9  // Each different instance of a hash table or data structure should use its own Seed.
    10  //
    11  // The hash functions are not cryptographically secure.
    12  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
    13  //
    14  package maphash
    15  
    16  import "unsafe"
    17  
    18  // A Seed is a random value that selects the specific hash function
    19  // computed by a Hash. If two Hashes use the same Seeds, they
    20  // will compute the same hash values for any given input.
    21  // If two Hashes use different Seeds, they are very likely to compute
    22  // distinct hash values for any given input.
    23  //
    24  // A Seed must be initialized by calling MakeSeed.
    25  // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
    26  //
    27  // Each Seed value is local to a single process and cannot be serialized
    28  // or otherwise recreated in a different process.
    29  type Seed struct {
    30  	s uint64
    31  }
    32  
    33  // A Hash computes a seeded hash of a byte sequence.
    34  //
    35  // The zero Hash is a valid Hash ready to use.
    36  // A zero Hash chooses a random seed for itself during
    37  // the first call to a Reset, Write, Seed, Sum64, or Seed method.
    38  // For control over the seed, use SetSeed.
    39  //
    40  // The computed hash values depend only on the initial seed and
    41  // the sequence of bytes provided to the Hash object, not on the way
    42  // in which the bytes are provided. For example, the three sequences
    43  //
    44  //     h.Write([]byte{'f','o','o'})
    45  //     h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
    46  //     h.WriteString("foo")
    47  //
    48  // all have the same effect.
    49  //
    50  // Hashes are intended to be collision-resistant, even for situations
    51  // where an adversary controls the byte sequences being hashed.
    52  //
    53  // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
    54  // If multiple goroutines must compute the same seeded hash,
    55  // each can declare its own Hash and call SetSeed with a common Seed.
    56  type Hash struct {
    57  	_     [0]func() // not comparable
    58  	seed  Seed      // initial seed used for this hash
    59  	state Seed      // current hash of all flushed bytes
    60  	buf   [64]byte  // unflushed byte buffer
    61  	n     int       // number of unflushed bytes
    62  }
    63  
    64  // initSeed seeds the hash if necessary.
    65  // initSeed is called lazily before any operation that actually uses h.seed/h.state.
    66  // Note that this does not include Write/WriteByte/WriteString in the case
    67  // where they only add to h.buf. (If they write too much, they call h.flush,
    68  // which does call h.initSeed.)
    69  func (h *Hash) initSeed() {
    70  	if h.seed.s == 0 {
    71  		h.setSeed(MakeSeed())
    72  	}
    73  }
    74  
    75  // WriteByte adds b to the sequence of bytes hashed by h.
    76  // It never fails; the error result is for implementing io.ByteWriter.
    77  func (h *Hash) WriteByte(b byte) error {
    78  	if h.n == len(h.buf) {
    79  		h.flush()
    80  	}
    81  	h.buf[h.n] = b
    82  	h.n++
    83  	return nil
    84  }
    85  
    86  // Write adds b to the sequence of bytes hashed by h.
    87  // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
    88  func (h *Hash) Write(b []byte) (int, error) {
    89  	size := len(b)
    90  	for h.n+len(b) > len(h.buf) {
    91  		k := copy(h.buf[h.n:], b)
    92  		h.n = len(h.buf)
    93  		b = b[k:]
    94  		h.flush()
    95  	}
    96  	h.n += copy(h.buf[h.n:], b)
    97  	return size, nil
    98  }
    99  
   100  // WriteString adds the bytes of s to the sequence of bytes hashed by h.
   101  // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
   102  func (h *Hash) WriteString(s string) (int, error) {
   103  	size := len(s)
   104  	for h.n+len(s) > len(h.buf) {
   105  		k := copy(h.buf[h.n:], s)
   106  		h.n = len(h.buf)
   107  		s = s[k:]
   108  		h.flush()
   109  	}
   110  	h.n += copy(h.buf[h.n:], s)
   111  	return size, nil
   112  }
   113  
   114  // Seed returns h's seed value.
   115  func (h *Hash) Seed() Seed {
   116  	h.initSeed()
   117  	return h.seed
   118  }
   119  
   120  // SetSeed sets h to use seed, which must have been returned by MakeSeed
   121  // or by another Hash's Seed method.
   122  // Two Hash objects with the same seed behave identically.
   123  // Two Hash objects with different seeds will very likely behave differently.
   124  // Any bytes added to h before this call will be discarded.
   125  func (h *Hash) SetSeed(seed Seed) {
   126  	h.setSeed(seed)
   127  	h.n = 0
   128  }
   129  
   130  // setSeed sets seed without discarding accumulated data.
   131  func (h *Hash) setSeed(seed Seed) {
   132  	if seed.s == 0 {
   133  		panic("maphash: use of uninitialized Seed")
   134  	}
   135  	h.seed = seed
   136  	h.state = seed
   137  }
   138  
   139  // Reset discards all bytes added to h.
   140  // (The seed remains the same.)
   141  func (h *Hash) Reset() {
   142  	h.initSeed()
   143  	h.state = h.seed
   144  	h.n = 0
   145  }
   146  
   147  // precondition: buffer is full.
   148  func (h *Hash) flush() {
   149  	if h.n != len(h.buf) {
   150  		panic("maphash: flush of partially full buffer")
   151  	}
   152  	h.initSeed()
   153  	h.state.s = rthash(h.buf[:], h.state.s)
   154  	h.n = 0
   155  }
   156  
   157  // Sum64 returns h's current 64-bit value, which depends on
   158  // h's seed and the sequence of bytes added to h since the
   159  // last call to Reset or SetSeed.
   160  //
   161  // All bits of the Sum64 result are close to uniformly and
   162  // independently distributed, so it can be safely reduced
   163  // by using bit masking, shifting, or modular arithmetic.
   164  func (h *Hash) Sum64() uint64 {
   165  	h.initSeed()
   166  	return rthash(h.buf[:h.n], h.state.s)
   167  }
   168  
   169  // MakeSeed returns a new random seed.
   170  func MakeSeed() Seed {
   171  	var s1, s2 uint64
   172  	for {
   173  		s1 = uint64(runtime_fastrand())
   174  		s2 = uint64(runtime_fastrand())
   175  		// We use seed 0 to indicate an uninitialized seed/hash,
   176  		// so keep trying until we get a non-zero seed.
   177  		if s1|s2 != 0 {
   178  			break
   179  		}
   180  	}
   181  	return Seed{s: s1<<32 + s2}
   182  }
   183  
   184  //go:linkname runtime_fastrand runtime.fastrand
   185  func runtime_fastrand() uint32
   186  
   187  func rthash(b []byte, seed uint64) uint64 {
   188  	if len(b) == 0 {
   189  		return seed
   190  	}
   191  	// The runtime hasher only works on uintptr. For 64-bit
   192  	// architectures, we use the hasher directly. Otherwise,
   193  	// we use two parallel hashers on the lower and upper 32 bits.
   194  	if unsafe.Sizeof(uintptr(0)) == 8 {
   195  		return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))))
   196  	}
   197  	lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))
   198  	hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b)))
   199  	return uint64(hi)<<32 | uint64(lo)
   200  }
   201  
   202  //go:linkname runtime_memhash runtime.memhash
   203  //go:noescape
   204  func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
   205  
   206  // Sum appends the hash's current 64-bit value to b.
   207  // It exists for implementing hash.Hash.
   208  // For direct calls, it is more efficient to use Sum64.
   209  func (h *Hash) Sum(b []byte) []byte {
   210  	x := h.Sum64()
   211  	return append(b,
   212  		byte(x>>0),
   213  		byte(x>>8),
   214  		byte(x>>16),
   215  		byte(x>>24),
   216  		byte(x>>32),
   217  		byte(x>>40),
   218  		byte(x>>48),
   219  		byte(x>>56))
   220  }
   221  
   222  // Size returns h's hash value size, 8 bytes.
   223  func (h *Hash) Size() int { return 8 }
   224  
   225  // BlockSize returns h's block size.
   226  func (h *Hash) BlockSize() int { return len(h.buf) }