github.com/scottcagno/storage@v1.8.0/pkg/hash/cityhash/cityhash.go (about)

     1  /*
     2   * // Copyright (c) 2021. Scott Cagno. All rights reserved.
     3   * // The license can be found in the root of this project; see LICENSE.
     4   */
     5  
     6  package cityhash
     7  
     8  // Some primes between 2^63 and 2^64 for various uses.
     9  const (
    10  	k0 = uint64(0xc3a5c85c97cb3127)
    11  	k1 = uint64(0xb492b66fbe98f273)
    12  	k2 = uint64(0x9ae16a3b2f90404f)
    13  )
    14  
    15  // Magic numbers for 32-bit hashing.  Copied from Murmur3.
    16  const (
    17  	c1 = uint32(0xcc9e2d51)
    18  	c2 = uint32(0x1b873593)
    19  )
    20  
    21  // Hash64 returns a 64-bit hash for a slice of bytes.
    22  func Hash64(s []byte) uint64 {
    23  	n := uint64(len(s))
    24  	if n <= 32 {
    25  		if n <= 16 {
    26  			return hash64Len0to16(s)
    27  		}
    28  		return hash64Len17to32(s)
    29  	} else if n <= 64 {
    30  		return hash64Len33to64(s)
    31  	}
    32  
    33  	// For strings over 64 bytes we hash the end first, and then as we loop we
    34  	// keep 56 bytes of state: v, w, x, y, and z.
    35  	x := fetch64(s[n-40:])
    36  	y := fetch64(s[n-16:]) + fetch64(s[n-56:])
    37  	z := hash64Len16(fetch64(s[n-48:])+n, fetch64(s[n-24:]))
    38  
    39  	v1, v2 := weakHashLen32WithSeeds(s[n-64:], n, z)
    40  	w1, w2 := weakHashLen32WithSeeds(s[n-32:], y+k1, x)
    41  	x = x*k1 + fetch64(s)
    42  
    43  	// Decrease n to the nearest multiple of 64, and operate on 64-byte chunks.
    44  	n = (n - 1) &^ 63
    45  	for {
    46  		x = ror64(x+y+v1+fetch64(s[8:]), 37) * k1
    47  		y = ror64(y+v2+fetch64(s[48:]), 42) * k1
    48  		x ^= w2
    49  		y += v1 + fetch64(s[40:])
    50  		z = ror64(z+w1, 33) * k1
    51  		v1, v2 = weakHashLen32WithSeeds(s, v2*k1, x+w1)
    52  		w1, w2 = weakHashLen32WithSeeds(s[32:], z+w2, y+fetch64(s[16:]))
    53  		z, x = x, z
    54  		s = s[64:]
    55  		n -= 64
    56  		if n == 0 {
    57  			break
    58  		}
    59  	}
    60  	return hash64Len16(hash64Len16(v1, w1)+shiftMix(y)*k1+z, hash64Len16(v2, w2)+x)
    61  }
    62  
    63  // Hash64WithSeed returns a 64-bit hash for s that includes seed.
    64  func Hash64WithSeed(s []byte, seed uint64) uint64 {
    65  	return Hash64WithSeeds(s, k2, seed)
    66  }
    67  
    68  // Hash64WithSeeds returns a 64-bit hash for s that includes the two seed
    69  // values.
    70  func Hash64WithSeeds(s []byte, seed0, seed1 uint64) uint64 {
    71  	return hash64Len16(Hash64(s)-seed0, seed1)
    72  }
    73  
    74  // Hash32 returns a 32-bit hash for s.
    75  func Hash32(s []byte) uint32 {
    76  	n := uint32(len(s))
    77  	if n <= 24 {
    78  		if n <= 12 {
    79  			if n <= 4 {
    80  				return hash32Len0to4(s)
    81  			}
    82  			return hash32Len5to12(s)
    83  		}
    84  		return hash32Len13to24(s)
    85  	}
    86  
    87  	// n > 24
    88  	h := n
    89  	g := c1 * n
    90  	f := g
    91  
    92  	a0 := ror32(fetch32(s[n-4:])*c1, 17) * c2
    93  	a1 := ror32(fetch32(s[n-8:])*c1, 17) * c2
    94  	a2 := ror32(fetch32(s[n-16:])*c1, 17) * c2
    95  	a3 := ror32(fetch32(s[n-12:])*c1, 17) * c2
    96  	a4 := ror32(fetch32(s[n-20:])*c1, 17) * c2
    97  
    98  	const magic = 0xe6546b64
    99  	h ^= a0
   100  	h = ror32(h, 19)
   101  	h = h*5 + magic
   102  	h ^= a2
   103  	h = ror32(h, 19)
   104  	h = h*5 + magic
   105  	g ^= a1
   106  	g = ror32(g, 19)
   107  	g = g*5 + magic
   108  	g ^= a3
   109  	g = ror32(g, 19)
   110  	g = g*5 + magic
   111  	f += a4
   112  	f = ror32(f, 19)
   113  	f = f*5 + magic
   114  	for i := (n - 1) / 20; i != 0; i-- {
   115  		a0 := ror32(fetch32(s)*c1, 17) * c2
   116  		a1 := fetch32(s[4:])
   117  		a2 := ror32(fetch32(s[8:])*c1, 17) * c2
   118  		a3 := ror32(fetch32(s[12:])*c1, 17) * c2
   119  		a4 := fetch32(s[16:])
   120  		h ^= a0
   121  		h = ror32(h, 18)
   122  		h = h*5 + magic
   123  		f += a1
   124  		f = ror32(f, 19)
   125  		f = f * c1
   126  		g += a2
   127  		g = ror32(g, 18)
   128  		g = g*5 + magic
   129  		h ^= a3 + a1
   130  		h = ror32(h, 19)
   131  		h = h*5 + magic
   132  		g ^= a4
   133  		g = bswap32(g) * 5
   134  		h += a4 * 5
   135  		h = bswap32(h)
   136  		f += a0
   137  		f, g, h = g, h, f // a.k.a. PERMUTE3
   138  		s = s[20:]
   139  	}
   140  	g = ror32(g, 11) * c1
   141  	g = ror32(g, 17) * c1
   142  	f = ror32(f, 11) * c1
   143  	f = ror32(f, 17) * c1
   144  	h = ror32(h+g, 19)
   145  	h = h*5 + magic
   146  	h = ror32(h, 17) * c1
   147  	h = ror32(h+f, 19)
   148  	h = h*5 + magic
   149  	h = ror32(h, 17) * c1
   150  	return h
   151  }
   152  
   153  // Hash128 returns a 128-bit hash value for s.
   154  func Hash128(s []byte) (lo, hi uint64) {
   155  	if len(s) >= 16 {
   156  		return Hash128WithSeed(s[16:], fetch64(s), fetch64(s[8:])+k0)
   157  	}
   158  	return Hash128WithSeed(s, k0, k1)
   159  }
   160  
   161  // Hash128WithSeed returns a 128-bit hash value for s that includes the given
   162  // 128-bit seed.
   163  func Hash128WithSeed(s []byte, seed0, seed1 uint64) (lo, hi uint64) {
   164  	if len(s) < 128 {
   165  		return cityMurmur(s, seed0, seed1)
   166  	}
   167  
   168  	// We expect len >= 128 to be the common case.
   169  	// Keep 56 bytes of state: v, w, x, y, and z.
   170  	n := uint64(len(s))
   171  	x := seed0
   172  	y := seed1
   173  	z := n * k1
   174  	v1 := ror64(y^k1, 49)*k1 + fetch64(s)
   175  	v2 := ror64(v1, 42)*k1 + fetch64(s[8:])
   176  	w1 := ror64(y+z, 35)*k1 + x
   177  	w2 := ror64(x+fetch64(s[88:]), 53) * k1
   178  
   179  	// This is the same inner loop as Hash64, manually unrolled.
   180  	t := s
   181  	for n >= 128 {
   182  		// Iteration 1
   183  		x = ror64(x+y+v1+fetch64(t[8:]), 37) * k1
   184  		y = ror64(y+v2+fetch64(t[48:]), 42) * k1
   185  		x ^= w2
   186  		y += v1 + fetch64(t[40:])
   187  		z = ror64(z+w1, 33) * k1
   188  		v1, v2 = weakHashLen32WithSeeds(t, v2*k1, x+w1)
   189  		w1, w2 = weakHashLen32WithSeeds(t[32:], z+w2, y+fetch64(t[16:]))
   190  		x, z = z, x
   191  		t = t[64:]
   192  
   193  		// Iteration 2
   194  		x = ror64(x+y+v1+fetch64(t[8:]), 37) * k1
   195  		y = ror64(y+v2+fetch64(t[48:]), 42) * k1
   196  		x ^= w2
   197  		y += v1 + fetch64(t[40:])
   198  		z = ror64(z+w1, 33) * k1
   199  		v1, v2 = weakHashLen32WithSeeds(t, v2*k1, x+w1)
   200  		w1, w2 = weakHashLen32WithSeeds(t[32:], z+w2, y+fetch64(t[16:]))
   201  		x, z = z, x
   202  		t = t[64:]
   203  
   204  		n -= 128
   205  	}
   206  	x += ror64(v1+z, 49) * k0
   207  	y = y*k0 + ror64(w2, 37)
   208  	z = z*k0 + ror64(w1, 27)
   209  	w1 *= 9
   210  	v1 *= k0
   211  
   212  	// Here, unlike in Hash64, we didn't do the tail block ahead of time.
   213  	// We hash in 32-byte blocks working back-to-front, including as many bytes
   214  	// as necessary from the chunk prior to t to ensure we have a whole number
   215  	// of blocks.
   216  	tail := s[len(s)-128:]
   217  	for pos := 0; pos < int(n); pos += 32 {
   218  		offset := len(tail) - pos - 32
   219  		block := tail[offset:]
   220  
   221  		y = ror64(x+y, 42)*k0 + v2
   222  		w1 += fetch64(block[16:])
   223  		x = x*k0 + w1
   224  		z += w2 + fetch64(block)
   225  		w2 += v1
   226  		v1, v2 = weakHashLen32WithSeeds(block, v1+z, v2)
   227  		v1 *= k0
   228  	}
   229  
   230  	// At this point our 56 bytes of state should contain more than
   231  	// enough information for a strong 128-bit hash.  We use two
   232  	// different 56-byte-to-8-byte hashes to get a 16-byte final result.
   233  	x = hash64Len16(x, v1)
   234  	y = hash64Len16(y+z, w1)
   235  	return hash64Len16(x+v2, w2) + y, hash64Len16(x+w2, y+v2)
   236  }
   237  
   238  // Hash128To64 returns a 64-bit hash value for an input of 128 bits.
   239  func Hash128To64(lo, hi uint64) uint64 {
   240  	// Murmur-inspired hashing.
   241  	const multiplier = 0x9ddfea08eb382d69
   242  
   243  	a := (lo ^ hi) * multiplier
   244  	a ^= a >> 47
   245  	b := (hi ^ a) * multiplier
   246  	b ^= b >> 47
   247  	b *= multiplier
   248  	return b
   249  }