github.com/c12o16h1/go/src@v0.0.0-20200114212001-5a151c0f00ed/hash/maphash/maphash.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution of integers. 9 // 10 // The hash functions are collision-resistant but not cryptographically secure. 11 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 12 package maphash 13 14 import "unsafe" 15 16 // A Seed is a random value that selects the specific hash function 17 // computed by a Hash. If two Hashes use the same Seeds, they 18 // will compute the same hash values for any given input. 19 // If two Hashes use different Seeds, they are very likely to compute 20 // distinct hash values for any given input. 21 // 22 // A Seed must be initialized by calling MakeSeed. 23 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 24 // 25 // Each Seed value is local to a single process and cannot be serialized 26 // or otherwise recreated in a different process. 27 type Seed struct { 28 s uint64 29 } 30 31 // A Hash computes a seeded hash of a byte sequence. 32 // 33 // The zero Hash is a valid Hash ready to use. 34 // A zero Hash chooses a random seed for itself during 35 // the first call to a Reset, Write, Seed, Sum64, or Seed method. 36 // For control over the seed, use SetSeed. 37 // 38 // The computed hash values depend only on the initial seed and 39 // the sequence of bytes provided to the Hash object, not on the way 40 // in which the bytes are provided. For example, the three sequences 41 // 42 // h.Write([]byte{'f','o','o'}) 43 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 44 // h.WriteString("foo") 45 // 46 // all have the same effect. 47 // 48 // Hashes are intended to be collision-resistant, even for situations 49 // where an adversary controls the byte sequences being hashed. 50 // 51 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 52 // If multiple goroutines must compute the same seeded hash, 53 // each can declare its own Hash and call SetSeed with a common Seed. 54 type Hash struct { 55 _ [0]func() // not comparable 56 seed Seed // initial seed used for this hash 57 state Seed // current hash of all flushed bytes 58 buf [64]byte // unflushed byte buffer 59 n int // number of unflushed bytes 60 } 61 62 // initSeed seeds the hash if necessary. 63 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 64 // Note that this does not include Write/WriteByte/WriteString in the case 65 // where they only add to h.buf. (If they write too much, they call h.flush, 66 // which does call h.initSeed.) 67 func (h *Hash) initSeed() { 68 if h.seed.s == 0 { 69 h.SetSeed(MakeSeed()) 70 } 71 } 72 73 // WriteByte adds b to the sequence of bytes hashed by h. 74 // It never fails; the error result is for implementing io.ByteWriter. 75 func (h *Hash) WriteByte(b byte) error { 76 if h.n == len(h.buf) { 77 h.flush() 78 } 79 h.buf[h.n] = b 80 h.n++ 81 return nil 82 } 83 84 // Write adds b to the sequence of bytes hashed by h. 85 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 86 func (h *Hash) Write(b []byte) (int, error) { 87 size := len(b) 88 for h.n+len(b) > len(h.buf) { 89 k := copy(h.buf[h.n:], b) 90 h.n = len(h.buf) 91 b = b[k:] 92 h.flush() 93 } 94 h.n += copy(h.buf[h.n:], b) 95 return size, nil 96 } 97 98 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 99 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 100 func (h *Hash) WriteString(s string) (int, error) { 101 size := len(s) 102 for h.n+len(s) > len(h.buf) { 103 k := copy(h.buf[h.n:], s) 104 h.n = len(h.buf) 105 s = s[k:] 106 h.flush() 107 } 108 h.n += copy(h.buf[h.n:], s) 109 return size, nil 110 } 111 112 // Seed returns h's seed value. 113 func (h *Hash) Seed() Seed { 114 h.initSeed() 115 return h.seed 116 } 117 118 // SetSeed sets h to use seed, which must have been returned by MakeSeed 119 // or by another Hash's Seed method. 120 // Two Hash objects with the same seed behave identically. 121 // Two Hash objects with different seeds will very likely behave differently. 122 // Any bytes added to h before this call will be discarded. 123 func (h *Hash) SetSeed(seed Seed) { 124 if seed.s == 0 { 125 panic("maphash: use of uninitialized Seed") 126 } 127 h.seed = seed 128 h.state = seed 129 h.n = 0 130 } 131 132 // Reset discards all bytes added to h. 133 // (The seed remains the same.) 134 func (h *Hash) Reset() { 135 h.initSeed() 136 h.state = h.seed 137 h.n = 0 138 } 139 140 // precondition: buffer is full. 141 func (h *Hash) flush() { 142 if h.n != len(h.buf) { 143 panic("maphash: flush of partially full buffer") 144 } 145 h.initSeed() 146 h.state.s = rthash(h.buf[:], h.state.s) 147 h.n = 0 148 } 149 150 // Sum64 returns h's current 64-bit value, which depends on 151 // h's seed and the sequence of bytes added to h since the 152 // last call to Reset or SetSeed. 153 // 154 // All bits of the Sum64 result are close to uniformly and 155 // independently distributed, so it can be safely reduced 156 // by using bit masking, shifting, or modular arithmetic. 157 func (h *Hash) Sum64() uint64 { 158 h.initSeed() 159 return rthash(h.buf[:h.n], h.state.s) 160 } 161 162 // MakeSeed returns a new random seed. 163 func MakeSeed() Seed { 164 var s1, s2 uint64 165 for { 166 s1 = uint64(runtime_fastrand()) 167 s2 = uint64(runtime_fastrand()) 168 // We use seed 0 to indicate an uninitialized seed/hash, 169 // so keep trying until we get a non-zero seed. 170 if s1|s2 != 0 { 171 break 172 } 173 } 174 return Seed{s: s1<<32 + s2} 175 } 176 177 //go:linkname runtime_fastrand runtime.fastrand 178 func runtime_fastrand() uint32 179 180 func rthash(b []byte, seed uint64) uint64 { 181 if len(b) == 0 { 182 return seed 183 } 184 // The runtime hasher only works on uintptr. For 64-bit 185 // architectures, we use the hasher directly. Otherwise, 186 // we use two parallel hashers on the lower and upper 32 bits. 187 if unsafe.Sizeof(uintptr(0)) == 8 { 188 return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))) 189 } 190 lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))) 191 hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b))) 192 return uint64(hi)<<32 | uint64(lo) 193 } 194 195 //go:linkname runtime_memhash runtime.memhash 196 //go:noescape 197 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 198 199 // Sum appends the hash's current 64-bit value to b. 200 // It exists for implementing hash.Hash. 201 // For direct calls, it is more efficient to use Sum64. 202 func (h *Hash) Sum(b []byte) []byte { 203 x := h.Sum64() 204 return append(b, 205 byte(x>>0), 206 byte(x>>8), 207 byte(x>>16), 208 byte(x>>24), 209 byte(x>>32), 210 byte(x>>40), 211 byte(x>>48), 212 byte(x>>56)) 213 } 214 215 // Size returns h's hash value size, 8 bytes. 216 func (h *Hash) Size() int { return 8 } 217 218 // BlockSize returns h's block size. 219 func (h *Hash) BlockSize() int { return len(h.buf) }