github.com/go-board/x-go@v0.1.2-0.20220610024734-db1323f6cb15/xhash/internal/maphash/hash.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package maphash provides hash functions on byte sequences.
     6  // These hash functions are intended to be used to implement hash tables or
     7  // other data structures that need to map arbitrary strings or byte
     8  // sequences to a uniform distribution on unsigned 64-bit integers.
     9  //
    10  // The hash functions are collision-resistant but not cryptographically secure.
    11  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
    12  //
    13  // The hash value of a given byte sequence is consistent within a
    14  // single process, but will be different in different processes.
    15  package maphash
    16  
    17  import "unsafe"
    18  
    19  // A Seed is a random value that selects the specific hash function
    20  // computed by a Hash. If two Hashes use the same Seeds, they
    21  // will compute the same hash values for any given input.
    22  // If two Hashes use different Seeds, they are very likely to compute
    23  // distinct hash values for any given input.
    24  //
    25  // A Seed must be initialized by calling MakeSeed.
    26  // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
    27  //
    28  // Each Seed value is local to a single process and cannot be serialized
    29  // or otherwise recreated in a different process.
    30  type Seed struct {
    31  	s uint64
    32  }
    33  
    34  // A Hash computes a seeded hash of a byte sequence.
    35  //
    36  // The zero Hash is a valid Hash ready to use.
    37  // A zero Hash chooses a random seed for itself during
    38  // the first call to a Reset, Write, Seed, Sum64, or Seed method.
    39  // For control over the seed, use SetSeed.
    40  //
    41  // The computed hash values depend only on the initial seed and
    42  // the sequence of bytes provided to the Hash object, not on the way
    43  // in which the bytes are provided. For example, the three sequences
    44  //
    45  //     h.Write([]byte{'f','o','o'})
    46  //     h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
    47  //     h.WriteString("foo")
    48  //
    49  // all have the same effect.
    50  //
    51  // Hashes are intended to be collision-resistant, even for situations
    52  // where an adversary controls the byte sequences being hashed.
    53  //
    54  // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
    55  // If multiple goroutines must compute the same seeded hash,
    56  // each can declare its own Hash and call SetSeed with a common Seed.
    57  type Hash struct {
    58  	_     [0]func() // not comparable
    59  	seed  Seed      // initial seed used for this hash
    60  	state Seed      // current hash of all flushed bytes
    61  	buf   [64]byte  // unflushed byte buffer
    62  	n     int       // number of unflushed bytes
    63  }
    64  
    65  // initSeed seeds the hash if necessary.
    66  // initSeed is called lazily before any operation that actually uses h.seed/h.state.
    67  // Note that this does not include Write/WriteByte/WriteString in the case
    68  // where they only add to h.buf. (If they write too much, they call h.flush,
    69  // which does call h.initSeed.)
    70  func (h *Hash) initSeed() {
    71  	if h.seed.s == 0 {
    72  		h.setSeed(MakeSeed())
    73  	}
    74  }
    75  
    76  // WriteByte adds b to the sequence of bytes hashed by h.
    77  // It never fails; the error result is for implementing io.ByteWriter.
    78  func (h *Hash) WriteByte(b byte) error {
    79  	if h.n == len(h.buf) {
    80  		h.flush()
    81  	}
    82  	h.buf[h.n] = b
    83  	h.n++
    84  	return nil
    85  }
    86  
    87  // Write adds b to the sequence of bytes hashed by h.
    88  // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
    89  func (h *Hash) Write(b []byte) (int, error) {
    90  	size := len(b)
    91  	for h.n+len(b) > len(h.buf) {
    92  		k := copy(h.buf[h.n:], b)
    93  		h.n = len(h.buf)
    94  		b = b[k:]
    95  		h.flush()
    96  	}
    97  	h.n += copy(h.buf[h.n:], b)
    98  	return size, nil
    99  }
   100  
   101  // WriteString adds the bytes of s to the sequence of bytes hashed by h.
   102  // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
   103  func (h *Hash) WriteString(s string) (int, error) {
   104  	size := len(s)
   105  	for h.n+len(s) > len(h.buf) {
   106  		k := copy(h.buf[h.n:], s)
   107  		h.n = len(h.buf)
   108  		s = s[k:]
   109  		h.flush()
   110  	}
   111  	h.n += copy(h.buf[h.n:], s)
   112  	return size, nil
   113  }
   114  
   115  // Seed returns h's seed value.
   116  func (h *Hash) Seed() Seed {
   117  	h.initSeed()
   118  	return h.seed
   119  }
   120  
   121  // SetSeed sets h to use seed, which must have been returned by MakeSeed
   122  // or by another Hash's Seed method.
   123  // Two Hash objects with the same seed behave identically.
   124  // Two Hash objects with different seeds will very likely behave differently.
   125  // Any bytes added to h before this call will be discarded.
   126  func (h *Hash) SetSeed(seed Seed) {
   127  	h.setSeed(seed)
   128  	h.n = 0
   129  }
   130  
   131  // setSeed sets seed without discarding accumulated data.
   132  func (h *Hash) setSeed(seed Seed) {
   133  	if seed.s == 0 {
   134  		panic("maphash: use of uninitialized Seed")
   135  	}
   136  	h.seed = seed
   137  	h.state = seed
   138  }
   139  
   140  // Reset discards all bytes added to h.
   141  // (The seed remains the same.)
   142  func (h *Hash) Reset() {
   143  	h.initSeed()
   144  	h.state = h.seed
   145  	h.n = 0
   146  }
   147  
   148  // precondition: buffer is full.
   149  func (h *Hash) flush() {
   150  	if h.n != len(h.buf) {
   151  		panic("maphash: flush of partially full buffer")
   152  	}
   153  	h.initSeed()
   154  	h.state.s = rthash(h.buf[:], h.state.s)
   155  	h.n = 0
   156  }
   157  
   158  // Sum64 returns h's current 64-bit value, which depends on
   159  // h's seed and the sequence of bytes added to h since the
   160  // last call to Reset or SetSeed.
   161  //
   162  // All bits of the Sum64 result are close to uniformly and
   163  // independently distributed, so it can be safely reduced
   164  // by using bit masking, shifting, or modular arithmetic.
   165  func (h *Hash) Sum64() uint64 {
   166  	h.initSeed()
   167  	return rthash(h.buf[:h.n], h.state.s)
   168  }
   169  
   170  // MakeSeed returns a new random seed.
   171  func MakeSeed() Seed {
   172  	var s1, s2 uint64
   173  	for {
   174  		s1 = uint64(runtime_fastrand())
   175  		s2 = uint64(runtime_fastrand())
   176  		// We use seed 0 to indicate an uninitialized seed/hash,
   177  		// so keep trying until we get a non-zero seed.
   178  		if s1|s2 != 0 {
   179  			break
   180  		}
   181  	}
   182  	return Seed{s: s1<<32 + s2}
   183  }
   184  
   185  //go:linkname runtime_fastrand runtime.fastrand
   186  func runtime_fastrand() uint32
   187  
   188  func rthash(b []byte, seed uint64) uint64 {
   189  	if len(b) == 0 {
   190  		return seed
   191  	}
   192  	// The runtime hasher only works on uintptr. For 64-bit
   193  	// architectures, we use the hasher directly. Otherwise,
   194  	// we use two parallel hashers on the lower and upper 32 bits.
   195  	if unsafe.Sizeof(uintptr(0)) == 8 {
   196  		return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))))
   197  	}
   198  	lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))
   199  	hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b)))
   200  	return uint64(hi)<<32 | uint64(lo)
   201  }
   202  
   203  //go:linkname runtime_memhash runtime.memhash
   204  //go:noescape
   205  func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
   206  
   207  // Sum appends the hash's current 64-bit value to b.
   208  // It exists for implementing hash.Hash.
   209  // For direct calls, it is more efficient to use Sum64.
   210  func (h *Hash) Sum(b []byte) []byte {
   211  	x := h.Sum64()
   212  	return append(b,
   213  		byte(x>>0),
   214  		byte(x>>8),
   215  		byte(x>>16),
   216  		byte(x>>24),
   217  		byte(x>>32),
   218  		byte(x>>40),
   219  		byte(x>>48),
   220  		byte(x>>56))
   221  }
   222  
   223  // Size returns h's hash value size, 8 bytes.
   224  func (h *Hash) Size() int { return 8 }
   225  
   226  // BlockSize returns h's block size.
   227  func (h *Hash) BlockSize() int { return len(h.buf) }