github.com/geraldss/go/src@v0.0.0-20210511222824-ac7d0ebfc235/hash/maphash/maphash.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // Each different instance of a hash table or data structure should use its own Seed. 10 // 11 // The hash functions are not cryptographically secure. 12 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 13 // 14 package maphash 15 16 import "unsafe" 17 18 // A Seed is a random value that selects the specific hash function 19 // computed by a Hash. If two Hashes use the same Seeds, they 20 // will compute the same hash values for any given input. 21 // If two Hashes use different Seeds, they are very likely to compute 22 // distinct hash values for any given input. 23 // 24 // A Seed must be initialized by calling MakeSeed. 25 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 26 // 27 // Each Seed value is local to a single process and cannot be serialized 28 // or otherwise recreated in a different process. 29 type Seed struct { 30 s uint64 31 } 32 33 // A Hash computes a seeded hash of a byte sequence. 34 // 35 // The zero Hash is a valid Hash ready to use. 36 // A zero Hash chooses a random seed for itself during 37 // the first call to a Reset, Write, Seed, Sum64, or Seed method. 38 // For control over the seed, use SetSeed. 39 // 40 // The computed hash values depend only on the initial seed and 41 // the sequence of bytes provided to the Hash object, not on the way 42 // in which the bytes are provided. For example, the three sequences 43 // 44 // h.Write([]byte{'f','o','o'}) 45 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 46 // h.WriteString("foo") 47 // 48 // all have the same effect. 49 // 50 // Hashes are intended to be collision-resistant, even for situations 51 // where an adversary controls the byte sequences being hashed. 52 // 53 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 54 // If multiple goroutines must compute the same seeded hash, 55 // each can declare its own Hash and call SetSeed with a common Seed. 56 type Hash struct { 57 _ [0]func() // not comparable 58 seed Seed // initial seed used for this hash 59 state Seed // current hash of all flushed bytes 60 buf [64]byte // unflushed byte buffer 61 n int // number of unflushed bytes 62 } 63 64 // initSeed seeds the hash if necessary. 65 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 66 // Note that this does not include Write/WriteByte/WriteString in the case 67 // where they only add to h.buf. (If they write too much, they call h.flush, 68 // which does call h.initSeed.) 69 func (h *Hash) initSeed() { 70 if h.seed.s == 0 { 71 h.setSeed(MakeSeed()) 72 } 73 } 74 75 // WriteByte adds b to the sequence of bytes hashed by h. 76 // It never fails; the error result is for implementing io.ByteWriter. 77 func (h *Hash) WriteByte(b byte) error { 78 if h.n == len(h.buf) { 79 h.flush() 80 } 81 h.buf[h.n] = b 82 h.n++ 83 return nil 84 } 85 86 // Write adds b to the sequence of bytes hashed by h. 87 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 88 func (h *Hash) Write(b []byte) (int, error) { 89 size := len(b) 90 for h.n+len(b) > len(h.buf) { 91 k := copy(h.buf[h.n:], b) 92 h.n = len(h.buf) 93 b = b[k:] 94 h.flush() 95 } 96 h.n += copy(h.buf[h.n:], b) 97 return size, nil 98 } 99 100 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 101 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 102 func (h *Hash) WriteString(s string) (int, error) { 103 size := len(s) 104 for h.n+len(s) > len(h.buf) { 105 k := copy(h.buf[h.n:], s) 106 h.n = len(h.buf) 107 s = s[k:] 108 h.flush() 109 } 110 h.n += copy(h.buf[h.n:], s) 111 return size, nil 112 } 113 114 // Seed returns h's seed value. 115 func (h *Hash) Seed() Seed { 116 h.initSeed() 117 return h.seed 118 } 119 120 // SetSeed sets h to use seed, which must have been returned by MakeSeed 121 // or by another Hash's Seed method. 122 // Two Hash objects with the same seed behave identically. 123 // Two Hash objects with different seeds will very likely behave differently. 124 // Any bytes added to h before this call will be discarded. 125 func (h *Hash) SetSeed(seed Seed) { 126 h.setSeed(seed) 127 h.n = 0 128 } 129 130 // setSeed sets seed without discarding accumulated data. 131 func (h *Hash) setSeed(seed Seed) { 132 if seed.s == 0 { 133 panic("maphash: use of uninitialized Seed") 134 } 135 h.seed = seed 136 h.state = seed 137 } 138 139 // Reset discards all bytes added to h. 140 // (The seed remains the same.) 141 func (h *Hash) Reset() { 142 h.initSeed() 143 h.state = h.seed 144 h.n = 0 145 } 146 147 // precondition: buffer is full. 148 func (h *Hash) flush() { 149 if h.n != len(h.buf) { 150 panic("maphash: flush of partially full buffer") 151 } 152 h.initSeed() 153 h.state.s = rthash(h.buf[:], h.state.s) 154 h.n = 0 155 } 156 157 // Sum64 returns h's current 64-bit value, which depends on 158 // h's seed and the sequence of bytes added to h since the 159 // last call to Reset or SetSeed. 160 // 161 // All bits of the Sum64 result are close to uniformly and 162 // independently distributed, so it can be safely reduced 163 // by using bit masking, shifting, or modular arithmetic. 164 func (h *Hash) Sum64() uint64 { 165 h.initSeed() 166 return rthash(h.buf[:h.n], h.state.s) 167 } 168 169 // MakeSeed returns a new random seed. 170 func MakeSeed() Seed { 171 var s1, s2 uint64 172 for { 173 s1 = uint64(runtime_fastrand()) 174 s2 = uint64(runtime_fastrand()) 175 // We use seed 0 to indicate an uninitialized seed/hash, 176 // so keep trying until we get a non-zero seed. 177 if s1|s2 != 0 { 178 break 179 } 180 } 181 return Seed{s: s1<<32 + s2} 182 } 183 184 //go:linkname runtime_fastrand runtime.fastrand 185 func runtime_fastrand() uint32 186 187 func rthash(b []byte, seed uint64) uint64 { 188 if len(b) == 0 { 189 return seed 190 } 191 // The runtime hasher only works on uintptr. For 64-bit 192 // architectures, we use the hasher directly. Otherwise, 193 // we use two parallel hashers on the lower and upper 32 bits. 194 if unsafe.Sizeof(uintptr(0)) == 8 { 195 return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))) 196 } 197 lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))) 198 hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b))) 199 return uint64(hi)<<32 | uint64(lo) 200 } 201 202 //go:linkname runtime_memhash runtime.memhash 203 //go:noescape 204 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 205 206 // Sum appends the hash's current 64-bit value to b. 207 // It exists for implementing hash.Hash. 208 // For direct calls, it is more efficient to use Sum64. 209 func (h *Hash) Sum(b []byte) []byte { 210 x := h.Sum64() 211 return append(b, 212 byte(x>>0), 213 byte(x>>8), 214 byte(x>>16), 215 byte(x>>24), 216 byte(x>>32), 217 byte(x>>40), 218 byte(x>>48), 219 byte(x>>56)) 220 } 221 222 // Size returns h's hash value size, 8 bytes. 223 func (h *Hash) Size() int { return 8 } 224 225 // BlockSize returns h's block size. 226 func (h *Hash) BlockSize() int { return len(h.buf) }