github.com/go-board/x-go@v0.1.2-0.20220610024734-db1323f6cb15/xhash/internal/maphash/hash.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package maphash provides hash functions on byte sequences. 6 // These hash functions are intended to be used to implement hash tables or 7 // other data structures that need to map arbitrary strings or byte 8 // sequences to a uniform distribution on unsigned 64-bit integers. 9 // 10 // The hash functions are collision-resistant but not cryptographically secure. 11 // (See crypto/sha256 and crypto/sha512 for cryptographic use.) 12 // 13 // The hash value of a given byte sequence is consistent within a 14 // single process, but will be different in different processes. 15 package maphash 16 17 import "unsafe" 18 19 // A Seed is a random value that selects the specific hash function 20 // computed by a Hash. If two Hashes use the same Seeds, they 21 // will compute the same hash values for any given input. 22 // If two Hashes use different Seeds, they are very likely to compute 23 // distinct hash values for any given input. 24 // 25 // A Seed must be initialized by calling MakeSeed. 26 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. 27 // 28 // Each Seed value is local to a single process and cannot be serialized 29 // or otherwise recreated in a different process. 30 type Seed struct { 31 s uint64 32 } 33 34 // A Hash computes a seeded hash of a byte sequence. 35 // 36 // The zero Hash is a valid Hash ready to use. 37 // A zero Hash chooses a random seed for itself during 38 // the first call to a Reset, Write, Seed, Sum64, or Seed method. 39 // For control over the seed, use SetSeed. 40 // 41 // The computed hash values depend only on the initial seed and 42 // the sequence of bytes provided to the Hash object, not on the way 43 // in which the bytes are provided. For example, the three sequences 44 // 45 // h.Write([]byte{'f','o','o'}) 46 // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') 47 // h.WriteString("foo") 48 // 49 // all have the same effect. 50 // 51 // Hashes are intended to be collision-resistant, even for situations 52 // where an adversary controls the byte sequences being hashed. 53 // 54 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. 55 // If multiple goroutines must compute the same seeded hash, 56 // each can declare its own Hash and call SetSeed with a common Seed. 57 type Hash struct { 58 _ [0]func() // not comparable 59 seed Seed // initial seed used for this hash 60 state Seed // current hash of all flushed bytes 61 buf [64]byte // unflushed byte buffer 62 n int // number of unflushed bytes 63 } 64 65 // initSeed seeds the hash if necessary. 66 // initSeed is called lazily before any operation that actually uses h.seed/h.state. 67 // Note that this does not include Write/WriteByte/WriteString in the case 68 // where they only add to h.buf. (If they write too much, they call h.flush, 69 // which does call h.initSeed.) 70 func (h *Hash) initSeed() { 71 if h.seed.s == 0 { 72 h.setSeed(MakeSeed()) 73 } 74 } 75 76 // WriteByte adds b to the sequence of bytes hashed by h. 77 // It never fails; the error result is for implementing io.ByteWriter. 78 func (h *Hash) WriteByte(b byte) error { 79 if h.n == len(h.buf) { 80 h.flush() 81 } 82 h.buf[h.n] = b 83 h.n++ 84 return nil 85 } 86 87 // Write adds b to the sequence of bytes hashed by h. 88 // It always writes all of b and never fails; the count and error result are for implementing io.Writer. 89 func (h *Hash) Write(b []byte) (int, error) { 90 size := len(b) 91 for h.n+len(b) > len(h.buf) { 92 k := copy(h.buf[h.n:], b) 93 h.n = len(h.buf) 94 b = b[k:] 95 h.flush() 96 } 97 h.n += copy(h.buf[h.n:], b) 98 return size, nil 99 } 100 101 // WriteString adds the bytes of s to the sequence of bytes hashed by h. 102 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. 103 func (h *Hash) WriteString(s string) (int, error) { 104 size := len(s) 105 for h.n+len(s) > len(h.buf) { 106 k := copy(h.buf[h.n:], s) 107 h.n = len(h.buf) 108 s = s[k:] 109 h.flush() 110 } 111 h.n += copy(h.buf[h.n:], s) 112 return size, nil 113 } 114 115 // Seed returns h's seed value. 116 func (h *Hash) Seed() Seed { 117 h.initSeed() 118 return h.seed 119 } 120 121 // SetSeed sets h to use seed, which must have been returned by MakeSeed 122 // or by another Hash's Seed method. 123 // Two Hash objects with the same seed behave identically. 124 // Two Hash objects with different seeds will very likely behave differently. 125 // Any bytes added to h before this call will be discarded. 126 func (h *Hash) SetSeed(seed Seed) { 127 h.setSeed(seed) 128 h.n = 0 129 } 130 131 // setSeed sets seed without discarding accumulated data. 132 func (h *Hash) setSeed(seed Seed) { 133 if seed.s == 0 { 134 panic("maphash: use of uninitialized Seed") 135 } 136 h.seed = seed 137 h.state = seed 138 } 139 140 // Reset discards all bytes added to h. 141 // (The seed remains the same.) 142 func (h *Hash) Reset() { 143 h.initSeed() 144 h.state = h.seed 145 h.n = 0 146 } 147 148 // precondition: buffer is full. 149 func (h *Hash) flush() { 150 if h.n != len(h.buf) { 151 panic("maphash: flush of partially full buffer") 152 } 153 h.initSeed() 154 h.state.s = rthash(h.buf[:], h.state.s) 155 h.n = 0 156 } 157 158 // Sum64 returns h's current 64-bit value, which depends on 159 // h's seed and the sequence of bytes added to h since the 160 // last call to Reset or SetSeed. 161 // 162 // All bits of the Sum64 result are close to uniformly and 163 // independently distributed, so it can be safely reduced 164 // by using bit masking, shifting, or modular arithmetic. 165 func (h *Hash) Sum64() uint64 { 166 h.initSeed() 167 return rthash(h.buf[:h.n], h.state.s) 168 } 169 170 // MakeSeed returns a new random seed. 171 func MakeSeed() Seed { 172 var s1, s2 uint64 173 for { 174 s1 = uint64(runtime_fastrand()) 175 s2 = uint64(runtime_fastrand()) 176 // We use seed 0 to indicate an uninitialized seed/hash, 177 // so keep trying until we get a non-zero seed. 178 if s1|s2 != 0 { 179 break 180 } 181 } 182 return Seed{s: s1<<32 + s2} 183 } 184 185 //go:linkname runtime_fastrand runtime.fastrand 186 func runtime_fastrand() uint32 187 188 func rthash(b []byte, seed uint64) uint64 { 189 if len(b) == 0 { 190 return seed 191 } 192 // The runtime hasher only works on uintptr. For 64-bit 193 // architectures, we use the hasher directly. Otherwise, 194 // we use two parallel hashers on the lower and upper 32 bits. 195 if unsafe.Sizeof(uintptr(0)) == 8 { 196 return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))) 197 } 198 lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))) 199 hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b))) 200 return uint64(hi)<<32 | uint64(lo) 201 } 202 203 //go:linkname runtime_memhash runtime.memhash 204 //go:noescape 205 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr 206 207 // Sum appends the hash's current 64-bit value to b. 208 // It exists for implementing hash.Hash. 209 // For direct calls, it is more efficient to use Sum64. 210 func (h *Hash) Sum(b []byte) []byte { 211 x := h.Sum64() 212 return append(b, 213 byte(x>>0), 214 byte(x>>8), 215 byte(x>>16), 216 byte(x>>24), 217 byte(x>>32), 218 byte(x>>40), 219 byte(x>>48), 220 byte(x>>56)) 221 } 222 223 // Size returns h's hash value size, 8 bytes. 224 func (h *Hash) Size() int { return 8 } 225 226 // BlockSize returns h's block size. 227 func (h *Hash) BlockSize() int { return len(h.buf) }