github.com/rolandhe/saber@v0.0.4/hash/cityhash.go (about) 1 // Golang concurrent tools like java juc. 2 // 3 // Copyright 2023 The saber Authors. All rights reserved. 4 5 // Package hash, 实现常见的hash算法,当前实现 cityhash 算法,未来会扩展其他hash 算法。 6 // 当前cityhash 完全移植c++版本的cityhash算法, 由于没有找到对_mm_crc32_u64的支持,所以不支持CityHashCrc256, 后续找到办法后会继续支持 7 package hash 8 9 import ( 10 "github.com/rolandhe/saber/utils/strutil" 11 "math/bits" 12 "unsafe" 13 ) 14 15 // 判断当前系统的大小端属性 16 var littleEndian bool 17 18 const k0 uint64 = 0xc3a5c85c97cb3127 19 const k1 uint64 = 0xb492b66fbe98f273 20 const k2 uint64 = 0x9ae16a3b2f90404f 21 const kMul uint64 = 0x9ddfea08eb382d69 22 23 // Magic numbers for 32-bit hashing. Copied from Murmur3. 24 const c1 uint32 = 0xcc9e2d51 25 const c2 uint32 = 0x1b873593 26 27 func init() { 28 littleEndian = IsLittleEndian() 29 } 30 31 // Uint128 描述 128位无符号整数,它本质上由两个 uint64组成 32 type Uint128 struct { 33 low uint64 34 high uint64 35 } 36 37 // MakeUint128 构建一个无符号128位整数对象,需要低位、高位 uint64两个参数 38 func MakeUint128(low uint64, high uint64) *Uint128 { 39 return &Uint128{ 40 low, high, 41 } 42 } 43 44 // IsLittleEndian 判断当前系统的字节序是否是小端 45 func IsLittleEndian() bool { 46 n := 0x1234 47 f := *((*byte)(unsafe.Pointer(&n))) 48 return (f ^ 0x34) == 0 49 } 50 51 func fetch64(data []byte) uint64 { 52 v := uint64(data[0]) 53 v |= uint64(data[1]) << 8 54 v |= uint64(data[2]) << 16 55 v |= uint64(data[3]) << 24 56 v |= uint64(data[4]) << 32 57 v |= uint64(data[5]) << 40 58 v |= uint64(data[6]) << 48 59 v |= uint64(data[7]) << 56 60 if littleEndian { 61 return v 62 } 63 64 return bits.ReverseBytes64(v) 65 } 66 67 func fetch32(data []byte) uint32 { 68 v := uint32(data[0]) 69 v |= uint32(data[1]) << 8 70 v |= uint32(data[2]) << 16 71 v |= uint32(data[3]) << 24 72 73 if littleEndian { 74 return v 75 } 76 77 return bits.ReverseBytes32(v) 78 } 79 80 func fmix(h uint32) uint32 { 81 h ^= h >> 16 82 h *= 0x85ebca6b 83 h ^= h >> 13 84 h *= 0xc2b2ae35 85 h ^= h >> 16 86 return h 87 } 88 89 func rotate32(val uint32, shift int) uint32 { 90 // Avoid shifting by 32: doing so yields an undefined result. 91 if shift == 0 { 92 return val 93 } 94 return (val >> shift) | (val << (32 - shift)) 95 } 96 97 func mur(a uint32, h uint32) uint32 { 98 // Helper from Murmur3 for combining two 32-bit values. 99 a *= c1 100 a = rotate32(a, 17) 101 a *= c2 102 h ^= a 103 h = rotate32(h, 19) 104 return h*5 + 0xe6546b64 105 } 106 107 func hash32Len13to24(s []byte, len uint) uint32 { 108 a := fetch32(s[len>>1-4:]) 109 b := fetch32(s[4:]) 110 c := fetch32(s[len-8:]) 111 d := fetch32(s[len>>1:]) 112 e := fetch32(s) 113 f := fetch32(s[len-4:]) 114 h := uint32(len) 115 return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, h))))))) 116 } 117 118 func hash32Len0to4(s []byte, len uint) uint32 { 119 b := uint32(0) 120 c := uint32(9) 121 for i := uint(0); i < len; i++ { 122 v := int8(s[i]) 123 b = b*c1 + uint32(v) 124 c ^= b 125 } 126 return fmix(mur(b, mur(uint32(len), c))) 127 } 128 129 func hash32Len5to12(s []byte, len uint) uint32 { 130 a := uint32(len) 131 b := a * 5 132 c := uint32(9) 133 d := b 134 a += fetch32(s) 135 b += fetch32(s[len-4:]) 136 pos := (len >> 1) & 4 137 c += fetch32(s[pos:]) 138 return fmix(mur(c, mur(b, mur(a, d)))) 139 } 140 141 func CityHash32String(str string) uint32 { 142 s := strutil.DetachBytesString(str) 143 length := uint(len(str)) 144 return CityHash32(s, length) 145 } 146 147 // CityHash32 产生32位的hash 148 func CityHash32(s []byte, length uint) uint32 { 149 if length <= 24 { 150 if length <= 12 { 151 if length <= 4 { 152 return hash32Len0to4(s, length) 153 } else { 154 return hash32Len5to12(s, length) 155 } 156 } else { 157 return hash32Len13to24(s, length) 158 } 159 } 160 161 // length > 24 162 h := uint32(length) 163 g := c1 * h 164 f := g 165 a0 := rotate32(fetch32(s[length-4:])*c1, 17) * c2 166 a1 := rotate32(fetch32(s[length-8:])*c1, 17) * c2 167 a2 := rotate32(fetch32(s[length-16:])*c1, 17) * c2 168 a3 := rotate32(fetch32(s[length-12:])*c1, 17) * c2 169 a4 := rotate32(fetch32(s[length-20:])*c1, 17) * c2 170 h ^= a0 171 h = rotate32(h, 19) 172 h = h*5 + 0xe6546b64 173 h ^= a2 174 h = rotate32(h, 19) 175 h = h*5 + 0xe6546b64 176 g ^= a1 177 g = rotate32(g, 19) 178 g = g*5 + 0xe6546b64 179 g ^= a3 180 g = rotate32(g, 19) 181 g = g*5 + 0xe6546b64 182 f += a4 183 f = rotate32(f, 19) 184 f = f*5 + 0xe6546b64 185 iters := (length - 1) / 20 186 for { 187 a0 = rotate32(fetch32(s)*c1, 17) * c2 188 a1 = fetch32(s[4:]) 189 a2 = rotate32(fetch32(s[8:])*c1, 17) * c2 190 a3 = rotate32(fetch32(s[12:])*c1, 17) * c2 191 a4 = fetch32(s[16:]) 192 h ^= a0 193 h = rotate32(h, 18) 194 h = h*5 + 0xe6546b64 195 f += a1 196 f = rotate32(f, 19) 197 f = f * c1 198 g += a2 199 g = rotate32(g, 18) 200 g = g*5 + 0xe6546b64 201 h ^= a3 + a1 202 h = rotate32(h, 19) 203 h = h*5 + 0xe6546b64 204 g ^= a4 205 g = bits.ReverseBytes32(g) * 5 206 h += a4 * 5 207 h = bits.ReverseBytes32(h) 208 f += a0 209 f, h, g = g, f, h 210 s = s[20:] 211 iters-- 212 if iters == 0 { 213 break 214 } 215 } 216 217 g = rotate32(g, 11) * c1 218 g = rotate32(g, 17) * c1 219 f = rotate32(f, 11) * c1 220 f = rotate32(f, 17) * c1 221 h = rotate32(h+g, 19) 222 h = h*5 + 0xe6546b64 223 h = rotate32(h, 17) * c1 224 h = rotate32(h+f, 19) 225 h = h*5 + 0xe6546b64 226 h = rotate32(h, 17) * c1 227 return h 228 } 229 230 func rotate64(val uint64, shift int) uint64 { 231 // Avoid shifting by 64: doing so yields an undefined result. 232 if shift == 0 { 233 return val 234 } 235 return (val >> shift) | (val << (64 - shift)) 236 } 237 238 func shiftMix(val uint64) uint64 { 239 return val ^ (val >> 47) 240 } 241 242 func hash128to64(u uint64, v uint64) uint64 { 243 // Murmur-inspired hashing. 244 a := (u ^ v) * kMul 245 a ^= a >> 47 246 b := (v ^ a) * kMul 247 b ^= b >> 47 248 b *= kMul 249 return b 250 } 251 252 func hashLen16(u uint64, v uint64) uint64 { 253 return hash128to64(u, v) 254 } 255 256 func hashLen16WithMul(u uint64, v uint64, mul uint64) uint64 { 257 // Murmur-inspired hashing. 258 a := (u ^ v) * mul 259 a ^= a >> 47 260 b := (v ^ a) * mul 261 b ^= b >> 47 262 b *= mul 263 return b 264 } 265 266 func hashLen0to16(s []byte, len uint) uint64 { 267 if len >= 8 { 268 mul := k2 + uint64(len)*2 269 a := fetch64(s) + k2 270 b := fetch64(s[len-8:]) 271 c := rotate64(b, 37)*mul + a 272 d := (rotate64(a, 25) + b) * mul 273 return hashLen16WithMul(c, d, mul) 274 } 275 if len >= 4 { 276 mul := k2 + uint64(len)*2 277 a := uint64(fetch32(s)) 278 return hashLen16WithMul(uint64(len)+(a<<3), uint64(fetch32(s[len-4:])), mul) 279 } 280 if len > 0 { 281 a := s[0] 282 b := s[len>>1] 283 c := s[len-1] 284 y := uint32(a) + (uint32(b) << 8) 285 z := uint32(len) + (uint32(c) << 2) 286 return shiftMix(uint64(y)*k2^uint64(z)*k0) * k2 287 } 288 return k2 289 } 290 291 // This probably works well for 16-byte strings as well, but it may be over kill 292 // in that case. 293 func hashLen17to32(s []byte, len uint) uint64 { 294 mul := k2 + uint64(len)*2 295 a := fetch64(s) * k1 296 b := fetch64(s[8:]) 297 c := fetch64(s[len-8:]) * mul 298 d := fetch64(s[len-16:]) * k2 299 return hashLen16WithMul(rotate64(a+b, 43)+rotate64(c, 30)+d, 300 a+rotate64(b+k2, 18)+c, mul) 301 } 302 303 // Return a 16-byte hash for 48 bytes. Quick and dirty. 304 // Callers do best to use "random-looking" values for a and b. 305 func weakHashLen32WithSeedsBaseNumber( 306 w uint64, x uint64, y uint64, z uint64, a uint64, b uint64) *Uint128 { 307 a += w 308 b = rotate64(b+a+z, 21) 309 c := a 310 a += x 311 a += y 312 b += rotate64(a, 44) 313 return MakeUint128(a+z, b+c) 314 } 315 316 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. 317 func weakHashLen32WithSeeds( 318 s []byte, a uint64, b uint64) *Uint128 { 319 return weakHashLen32WithSeedsBaseNumber(fetch64(s), 320 fetch64(s[8:]), 321 fetch64(s[16:]), 322 fetch64(s[24:]), 323 a, 324 b) 325 } 326 327 func hashLen33to64(s []byte, length uint) uint64 { 328 mul := k2 + uint64(length)*2 329 a := fetch64(s) * k2 330 b := fetch64(s[8:]) 331 c := fetch64(s[length-24:]) 332 d := fetch64(s[length-32:]) 333 e := fetch64(s[16:]) * k2 334 f := fetch64(s[24:]) * 9 335 g := fetch64(s[length-8:]) 336 h := fetch64(s[length-16:]) * mul 337 u := rotate64(a+g, 43) + (rotate64(b, 30)+c)*9 338 v := ((a + g) ^ d) + f + 1 339 w := bits.ReverseBytes64((u+v)*mul) + h 340 x := rotate64(e+f, 42) + c 341 y := (bits.ReverseBytes64((v+w)*mul) + g) * mul 342 z := e + f + c 343 a = bits.ReverseBytes64((x+z)*mul+y) + b 344 b = shiftMix((z+a)*mul+d+h) * mul 345 return b + x 346 } 347 348 func CityHash64String(str string) uint64 { 349 s := strutil.DetachBytesString(str) 350 length := uint(len(str)) 351 return CityHash64(s, length) 352 } 353 354 // CityHash64 产生64位的hash 355 func CityHash64(s []byte, length uint) uint64 { 356 if length <= 32 { 357 if length <= 16 { 358 return hashLen0to16(s, length) 359 } else { 360 return hashLen17to32(s, length) 361 } 362 } else if length <= 64 { 363 return hashLen33to64(s, length) 364 } 365 366 // For strings over 64 bytes we hash the end first, and then as we 367 // loop we keep 56 bytes of state: v, w, x, y, and z. 368 x := fetch64(s[length-40:]) 369 y := fetch64(s[length-16:]) + fetch64(s[length-56:]) 370 z := hashLen16(fetch64(s[length-48:])+uint64(length), fetch64(s[length-24:])) 371 v := weakHashLen32WithSeeds(s[length-64:], uint64(length), z) 372 w := weakHashLen32WithSeeds(s[length-32:], y+k1, x) 373 x = x*k1 + fetch64(s) 374 375 // Decrease length to the nearest multiple of 64, and operate on 64-byte chunks. 376 slen := int(length) 377 slen = (slen - 1) & ^63 378 for { 379 x = rotate64(x+y+v.low+fetch64(s[8:]), 37) * k1 380 y = rotate64(y+v.high+fetch64(s[48:]), 42) * k1 381 x ^= w.high 382 y += v.low + fetch64(s[40:]) 383 z = rotate64(z+w.low, 33) * k1 384 v = weakHashLen32WithSeeds(s, v.high*k1, x+w.low) 385 w = weakHashLen32WithSeeds(s[32:], z+w.high, y+fetch64(s[16:])) 386 z, x = x, z 387 s = s[64:] 388 slen -= 64 389 if slen == 0 { 390 break 391 } 392 } 393 394 return hashLen16(hashLen16(v.low, w.low)+shiftMix(y)*k1+z, 395 hashLen16(v.high, w.high)+x) 396 } 397 398 // cityMurmur A subroutine for CityHash128(). Returns a decent 128-bit hash for strings 399 // of any length representable in signed long. Based on City and Murmur. 400 func cityMurmur(s []byte, len uint, seed *Uint128) *Uint128 { 401 a := seed.low 402 b := seed.high 403 c := uint64(0) 404 d := uint64(0) 405 if len <= 16 { 406 a = shiftMix(a*k1) * k1 407 c = b*k1 + hashLen0to16(s, len) 408 cv := c 409 if len >= 8 { 410 cv = fetch64(s) 411 } 412 d = shiftMix(a + cv) 413 } else { 414 c = hashLen16(fetch64(s[len-8:])+k1, a) 415 d = hashLen16(b+uint64(len), c+fetch64(s[len-16:])) 416 a += d 417 // len > 16 here, so do...while is safe 418 for { 419 a ^= shiftMix(fetch64(s)*k1) * k1 420 a *= k1 421 b ^= a 422 c ^= shiftMix(fetch64(s[8:])*k1) * k1 423 c *= k1 424 d ^= c 425 s = s[16:] 426 len -= 16 427 if len <= 16 { 428 break 429 } 430 } 431 } 432 a = hashLen16(a, c) 433 b = hashLen16(d, b) 434 return MakeUint128(a^b, hashLen16(b, a)) 435 } 436 437 func cityHash128WithSeedCore(s []byte, length uint, seed *Uint128) *Uint128 { 438 if length < 128 { 439 return cityMurmur(s, length, seed) 440 } 441 442 // We expect length >= 128 to be the common case. Keep 56 bytes of state: 443 // v, w, x, y, and z. 444 var v Uint128 445 var w Uint128 446 x := seed.low 447 y := seed.high 448 z := uint64(length) * k1 449 v.low = rotate64(y^k1, 49)*k1 + fetch64(s) 450 v.high = rotate64(v.low, 42)*k1 + fetch64(s[8:]) 451 w.low = rotate64(y+z, 35)*k1 + x 452 w.high = rotate64(x+fetch64(s[88:]), 53) * k1 453 454 // This is the same inner loop as CityHash64(), manually unrolled. 455 for { 456 x = rotate64(x+y+v.low+fetch64(s[8:]), 37) * k1 457 y = rotate64(y+v.high+fetch64(s[48:]), 42) * k1 458 x ^= w.high 459 y += v.low + fetch64(s[40:]) 460 z = rotate64(z+w.low, 33) * k1 461 v = *weakHashLen32WithSeeds(s, v.high*k1, x+w.low) 462 w = *weakHashLen32WithSeeds(s[32:], z+w.high, y+fetch64(s[16:])) 463 z, x = x, z 464 s = s[64:] 465 x = rotate64(x+y+v.low+fetch64(s[8:]), 37) * k1 466 y = rotate64(y+v.high+fetch64(s[48:]), 42) * k1 467 x ^= w.high 468 y += v.low + fetch64(s[40:]) 469 z = rotate64(z+w.low, 33) * k1 470 v = *weakHashLen32WithSeeds(s, v.high*k1, x+w.low) 471 w = *weakHashLen32WithSeeds(s[32:], z+w.high, y+fetch64(s[16:])) 472 z, x = x, z 473 s = s[64:] 474 length -= 128 475 if length < 128 { 476 break 477 } 478 } 479 x += rotate64(v.low+z, 49) * k0 480 y = y*k0 + rotate64(w.high, 37) 481 z = z*k0 + rotate64(w.low, 27) 482 w.low *= 9 483 v.low *= k0 484 // If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s. 485 for tailDone := uint(0); tailDone < length; { 486 tailDone += 32 487 y = rotate64(x+y, 42)*k0 + v.high 488 w.low += fetch64(s[length-tailDone+16:]) 489 x = x*k0 + w.low 490 z += w.high + fetch64(s[length-tailDone:]) 491 w.high += v.low 492 v = *weakHashLen32WithSeeds(s[length-tailDone:], v.low+z, v.high) 493 v.low *= k0 494 } 495 // At this point our 56 bytes of state should contain more than 496 // enough information for a strong 128-bit hash. We use two 497 // different 56-byte-to-8-byte hashes to get a 16-byte final result. 498 x = hashLen16(x, v.low) 499 y = hashLen16(y+z, w.low) 500 return MakeUint128(hashLen16(x+v.high, w.high)+y, 501 hashLen16(x+w.high, y+v.high)) 502 } 503 504 // CityHash128String 对string算128位的cityhash值 505 func CityHash128String(str string) *Uint128 { 506 s := strutil.DetachBytesString(str) 507 length := uint(len(str)) 508 return CityHash128(s, length) 509 } 510 511 // CityHash128 产生128位的hash 512 func CityHash128(s []byte, length uint) *Uint128 { 513 if length >= 16 { 514 seed := MakeUint128(fetch64(s), fetch64(s[8:])+k0) 515 516 return cityHash128WithSeedCore(s[16:], length-16, seed) 517 } 518 return CityHash128WithSeed(s, length, MakeUint128(k0, k1)) 519 } 520 521 // CityHash128WithSeedString 计算指定 str 字符串的 128位hash, 需要指定 seed 522 func CityHash128WithSeedString(str string, seed *Uint128) *Uint128 { 523 s := strutil.DetachBytesString(str) 524 length := uint(len(str)) 525 return cityHash128WithSeedCore(s, length, seed) 526 } 527 528 // CityHash128WithSeed 计算指定 二进制数组的 128位hash, 需要指定 seed 529 func CityHash128WithSeed(s []byte, length uint, seed *Uint128) *Uint128 { 530 return cityHash128WithSeedCore(s, length, seed) 531 } 532 533 func CityHash64WithSeedString(str string, seed uint64) uint64 { 534 s := strutil.DetachBytesString(str) 535 length := uint(len(str)) 536 return CityHash64WithSeed(s, length, seed) 537 } 538 539 func CityHash64WithSeed(s []byte, length uint, seed uint64) uint64 { 540 return CityHash64WithSeeds(s, length, k2, seed) 541 } 542 543 func CityHash64WithSeedsString(str string, seed0 uint64, seed1 uint64) uint64 { 544 s := strutil.DetachBytesString(str) 545 length := uint(len(str)) 546 return CityHash64WithSeeds(s, length, seed0, seed1) 547 } 548 549 func CityHash64WithSeeds(s []byte, length uint, seed0 uint64, seed1 uint64) uint64 { 550 return hashLen16(CityHash64(s, length)-seed0, seed1) 551 }