github.com/scottcagno/storage@v1.8.0/pkg/hash/cityhash/cityhash.go (about) 1 /* 2 * // Copyright (c) 2021. Scott Cagno. All rights reserved. 3 * // The license can be found in the root of this project; see LICENSE. 4 */ 5 6 package cityhash 7 8 // Some primes between 2^63 and 2^64 for various uses. 9 const ( 10 k0 = uint64(0xc3a5c85c97cb3127) 11 k1 = uint64(0xb492b66fbe98f273) 12 k2 = uint64(0x9ae16a3b2f90404f) 13 ) 14 15 // Magic numbers for 32-bit hashing. Copied from Murmur3. 16 const ( 17 c1 = uint32(0xcc9e2d51) 18 c2 = uint32(0x1b873593) 19 ) 20 21 // Hash64 returns a 64-bit hash for a slice of bytes. 22 func Hash64(s []byte) uint64 { 23 n := uint64(len(s)) 24 if n <= 32 { 25 if n <= 16 { 26 return hash64Len0to16(s) 27 } 28 return hash64Len17to32(s) 29 } else if n <= 64 { 30 return hash64Len33to64(s) 31 } 32 33 // For strings over 64 bytes we hash the end first, and then as we loop we 34 // keep 56 bytes of state: v, w, x, y, and z. 35 x := fetch64(s[n-40:]) 36 y := fetch64(s[n-16:]) + fetch64(s[n-56:]) 37 z := hash64Len16(fetch64(s[n-48:])+n, fetch64(s[n-24:])) 38 39 v1, v2 := weakHashLen32WithSeeds(s[n-64:], n, z) 40 w1, w2 := weakHashLen32WithSeeds(s[n-32:], y+k1, x) 41 x = x*k1 + fetch64(s) 42 43 // Decrease n to the nearest multiple of 64, and operate on 64-byte chunks. 44 n = (n - 1) &^ 63 45 for { 46 x = ror64(x+y+v1+fetch64(s[8:]), 37) * k1 47 y = ror64(y+v2+fetch64(s[48:]), 42) * k1 48 x ^= w2 49 y += v1 + fetch64(s[40:]) 50 z = ror64(z+w1, 33) * k1 51 v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1) 52 w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s[16:])) 53 z, x = x, z 54 s = s[64:] 55 n -= 64 56 if n == 0 { 57 break 58 } 59 } 60 return hash64Len16(hash64Len16(v1, w1)+shiftMix(y)*k1+z, hash64Len16(v2, w2)+x) 61 } 62 63 // Hash64WithSeed returns a 64-bit hash for s that includes seed. 64 func Hash64WithSeed(s []byte, seed uint64) uint64 { 65 return Hash64WithSeeds(s, k2, seed) 66 } 67 68 // Hash64WithSeeds returns a 64-bit hash for s that includes the two seed 69 // values. 70 func Hash64WithSeeds(s []byte, seed0, seed1 uint64) uint64 { 71 return hash64Len16(Hash64(s)-seed0, seed1) 72 } 73 74 // Hash32 returns a 32-bit hash for s. 75 func Hash32(s []byte) uint32 { 76 n := uint32(len(s)) 77 if n <= 24 { 78 if n <= 12 { 79 if n <= 4 { 80 return hash32Len0to4(s) 81 } 82 return hash32Len5to12(s) 83 } 84 return hash32Len13to24(s) 85 } 86 87 // n > 24 88 h := n 89 g := c1 * n 90 f := g 91 92 a0 := ror32(fetch32(s[n-4:])*c1, 17) * c2 93 a1 := ror32(fetch32(s[n-8:])*c1, 17) * c2 94 a2 := ror32(fetch32(s[n-16:])*c1, 17) * c2 95 a3 := ror32(fetch32(s[n-12:])*c1, 17) * c2 96 a4 := ror32(fetch32(s[n-20:])*c1, 17) * c2 97 98 const magic = 0xe6546b64 99 h ^= a0 100 h = ror32(h, 19) 101 h = h*5 + magic 102 h ^= a2 103 h = ror32(h, 19) 104 h = h*5 + magic 105 g ^= a1 106 g = ror32(g, 19) 107 g = g*5 + magic 108 g ^= a3 109 g = ror32(g, 19) 110 g = g*5 + magic 111 f += a4 112 f = ror32(f, 19) 113 f = f*5 + magic 114 for i := (n - 1) / 20; i != 0; i-- { 115 a0 := ror32(fetch32(s)*c1, 17) * c2 116 a1 := fetch32(s[4:]) 117 a2 := ror32(fetch32(s[8:])*c1, 17) * c2 118 a3 := ror32(fetch32(s[12:])*c1, 17) * c2 119 a4 := fetch32(s[16:]) 120 h ^= a0 121 h = ror32(h, 18) 122 h = h*5 + magic 123 f += a1 124 f = ror32(f, 19) 125 f = f * c1 126 g += a2 127 g = ror32(g, 18) 128 g = g*5 + magic 129 h ^= a3 + a1 130 h = ror32(h, 19) 131 h = h*5 + magic 132 g ^= a4 133 g = bswap32(g) * 5 134 h += a4 * 5 135 h = bswap32(h) 136 f += a0 137 f, g, h = g, h, f // a.k.a. PERMUTE3 138 s = s[20:] 139 } 140 g = ror32(g, 11) * c1 141 g = ror32(g, 17) * c1 142 f = ror32(f, 11) * c1 143 f = ror32(f, 17) * c1 144 h = ror32(h+g, 19) 145 h = h*5 + magic 146 h = ror32(h, 17) * c1 147 h = ror32(h+f, 19) 148 h = h*5 + magic 149 h = ror32(h, 17) * c1 150 return h 151 } 152 153 // Hash128 returns a 128-bit hash value for s. 154 func Hash128(s []byte) (lo, hi uint64) { 155 if len(s) >= 16 { 156 return Hash128WithSeed(s[16:], fetch64(s), fetch64(s[8:])+k0) 157 } 158 return Hash128WithSeed(s, k0, k1) 159 } 160 161 // Hash128WithSeed returns a 128-bit hash value for s that includes the given 162 // 128-bit seed. 163 func Hash128WithSeed(s []byte, seed0, seed1 uint64) (lo, hi uint64) { 164 if len(s) < 128 { 165 return cityMurmur(s, seed0, seed1) 166 } 167 168 // We expect len >= 128 to be the common case. 169 // Keep 56 bytes of state: v, w, x, y, and z. 170 n := uint64(len(s)) 171 x := seed0 172 y := seed1 173 z := n * k1 174 v1 := ror64(y^k1, 49)*k1 + fetch64(s) 175 v2 := ror64(v1, 42)*k1 + fetch64(s[8:]) 176 w1 := ror64(y+z, 35)*k1 + x 177 w2 := ror64(x+fetch64(s[88:]), 53) * k1 178 179 // This is the same inner loop as Hash64, manually unrolled. 180 t := s 181 for n >= 128 { 182 // Iteration 1 183 x = ror64(x+y+v1+fetch64(t[8:]), 37) * k1 184 y = ror64(y+v2+fetch64(t[48:]), 42) * k1 185 x ^= w2 186 y += v1 + fetch64(t[40:]) 187 z = ror64(z+w1, 33) * k1 188 v1, v2 = weakHashLen32WithSeeds(t, v2*k1, x+w1) 189 w1, w2 = weakHashLen32WithSeeds(t[32:], z+w2, y+fetch64(t[16:])) 190 x, z = z, x 191 t = t[64:] 192 193 // Iteration 2 194 x = ror64(x+y+v1+fetch64(t[8:]), 37) * k1 195 y = ror64(y+v2+fetch64(t[48:]), 42) * k1 196 x ^= w2 197 y += v1 + fetch64(t[40:]) 198 z = ror64(z+w1, 33) * k1 199 v1, v2 = weakHashLen32WithSeeds(t, v2*k1, x+w1) 200 w1, w2 = weakHashLen32WithSeeds(t[32:], z+w2, y+fetch64(t[16:])) 201 x, z = z, x 202 t = t[64:] 203 204 n -= 128 205 } 206 x += ror64(v1+z, 49) * k0 207 y = y*k0 + ror64(w2, 37) 208 z = z*k0 + ror64(w1, 27) 209 w1 *= 9 210 v1 *= k0 211 212 // Here, unlike in Hash64, we didn't do the tail block ahead of time. 213 // We hash in 32-byte blocks working back-to-front, including as many bytes 214 // as necessary from the chunk prior to t to ensure we have a whole number 215 // of blocks. 216 tail := s[len(s)-128:] 217 for pos := 0; pos < int(n); pos += 32 { 218 offset := len(tail) - pos - 32 219 block := tail[offset:] 220 221 y = ror64(x+y, 42)*k0 + v2 222 w1 += fetch64(block[16:]) 223 x = x*k0 + w1 224 z += w2 + fetch64(block) 225 w2 += v1 226 v1, v2 = weakHashLen32WithSeeds(block, v1+z, v2) 227 v1 *= k0 228 } 229 230 // At this point our 56 bytes of state should contain more than 231 // enough information for a strong 128-bit hash. We use two 232 // different 56-byte-to-8-byte hashes to get a 16-byte final result. 233 x = hash64Len16(x, v1) 234 y = hash64Len16(y+z, w1) 235 return hash64Len16(x+v2, w2) + y, hash64Len16(x+w2, y+v2) 236 } 237 238 // Hash128To64 returns a 64-bit hash value for an input of 128 bits. 239 func Hash128To64(lo, hi uint64) uint64 { 240 // Murmur-inspired hashing. 241 const multiplier = 0x9ddfea08eb382d69 242 243 a := (lo ^ hi) * multiplier 244 a ^= a >> 47 245 b := (hi ^ a) * multiplier 246 b ^= b >> 47 247 b *= multiplier 248 return b 249 }